comparison mercurial/parsers.c @ 16414:e8d37b78acfb

parsers: use base-16 trie for faster node->rev mapping This greatly speeds up node->rev lookups, with results that are often user-perceptible: for instance, "hg --time log" of the node associated with rev 1000 on a linux-2.6 repo improves from 0.3 seconds to 0.03. I have not found any instances of slowdowns. The new perfnodelookup command in contrib/perf.py demonstrates the speedup more dramatically, since it performs no I/O. For a single lookup, the new code is about 40x faster. These changes also prepare the ground for the possibility of further improving the performance of prefix-based node lookups.
author Bryan O'Sullivan <bryano@fb.com>
date Thu, 12 Apr 2012 14:05:59 -0700
parents ee163a9cf37c
children d126a0d16856
comparison
equal deleted inserted replaced
16413:1a420761fcb7 16414:e8d37b78acfb
213 Py_XDECREF(parents); 213 Py_XDECREF(parents);
214 return ret; 214 return ret;
215 } 215 }
216 216
217 /* 217 /*
218 * A list-like object that decodes the contents of a RevlogNG index 218 * A base-16 trie for fast node->rev mapping.
219 * file on demand. It has limited support for insert and delete at the 219 *
220 * last element before the end. The last entry is always a sentinel 220 * Positive value is index of the next node in the trie
221 * nullid. 221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
223 */
224 typedef struct {
225 int children[16];
226 } nodetree;
227
228 /*
229 * This class has two behaviours.
230 *
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
222 */ 239 */
223 typedef struct { 240 typedef struct {
224 PyObject_HEAD 241 PyObject_HEAD
225 /* Type-specific fields go here. */ 242 /* Type-specific fields go here. */
226 PyObject *data; /* raw bytes of index */ 243 PyObject *data; /* raw bytes of index */
227 PyObject **cache; /* cached tuples */ 244 PyObject **cache; /* cached tuples */
228 const char **offsets; /* populated on demand */ 245 const char **offsets; /* populated on demand */
229 Py_ssize_t raw_length; /* original number of elements */ 246 Py_ssize_t raw_length; /* original number of elements */
230 Py_ssize_t length; /* current number of elements */ 247 Py_ssize_t length; /* current number of elements */
231 PyObject *added; /* populated on demand */ 248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
232 int inlined; 257 int inlined;
233 } indexObject; 258 } indexObject;
234 259
235 static Py_ssize_t index_length(indexObject *self) 260 static Py_ssize_t index_length(const indexObject *self)
236 { 261 {
237 if (self->added == NULL) 262 if (self->added == NULL)
238 return self->length; 263 return self->length;
239 return self->length + PyList_GET_SIZE(self->added); 264 return self->length + PyList_GET_SIZE(self->added);
240 } 265 }
241 266
242 static PyObject *nullentry; 267 static PyObject *nullentry;
268 static const char nullid[20];
243 269
244 static long inline_scan(indexObject *self, const char **offsets); 270 static long inline_scan(indexObject *self, const char **offsets);
245 271
246 #if LONG_MAX == 0x7fffffffL 272 #if LONG_MAX == 0x7fffffffL
247 static char *tuple_format = "Kiiiiiis#"; 273 static char *tuple_format = "Kiiiiiis#";
248 #else 274 #else
249 static char *tuple_format = "kiiiiiis#"; 275 static char *tuple_format = "kiiiiiis#";
250 #endif 276 #endif
251 277
252 /* RevlogNG format (all in big endian, data may be inlined): 278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
290 }
291 return self->offsets[pos];
292 }
293
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
296
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
253 * 6 bytes: offset 299 * 6 bytes: offset
254 * 2 bytes: flags 300 * 2 bytes: flags
255 * 4 bytes: compressed length 301 * 4 bytes: compressed length
256 * 4 bytes: uncompressed length 302 * 4 bytes: uncompressed length
257 * 4 bytes: base revision 303 * 4 bytes: base revision
268 const char *c_node_id; 314 const char *c_node_id;
269 const char *data; 315 const char *data;
270 Py_ssize_t length = index_length(self); 316 Py_ssize_t length = index_length(self);
271 PyObject *entry; 317 PyObject *entry;
272 318
273 if (pos >= length) { 319 if (pos < 0)
320 pos += length;
321
322 if (pos < 0 || pos >= length) {
274 PyErr_SetString(PyExc_IndexError, "revlog index out of range"); 323 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
275 return NULL; 324 return NULL;
276 } 325 }
277 326
278 if (pos == length - 1) { 327 if (pos == length - 1) {
296 self->cache = calloc(self->raw_length, sizeof(PyObject *)); 345 self->cache = calloc(self->raw_length, sizeof(PyObject *));
297 if (self->cache == NULL) 346 if (self->cache == NULL)
298 return PyErr_NoMemory(); 347 return PyErr_NoMemory();
299 } 348 }
300 349
301 if (self->inlined && pos > 0) { 350 data = index_deref(self, pos);
302 if (self->offsets == NULL) { 351 if (data == NULL)
303 self->offsets = malloc(self->raw_length * 352 return NULL;
304 sizeof(*self->offsets));
305 if (self->offsets == NULL)
306 return PyErr_NoMemory();
307 inline_scan(self, self->offsets);
308 }
309 data = self->offsets[pos];
310 } else
311 data = PyString_AS_STRING(self->data) + pos * 64;
312 353
313 memcpy(decode, data, 8 * sizeof(uint32_t)); 354 memcpy(decode, data, 8 * sizeof(uint32_t));
314 355
315 offset_flags = ntohl(decode[1]); 356 offset_flags = ntohl(decode[1]);
316 if (pos == 0) /* mask out version number for the first entry */ 357 if (pos == 0) /* mask out version number for the first entry */
339 Py_INCREF(entry); 380 Py_INCREF(entry);
340 381
341 return entry; 382 return entry;
342 } 383 }
343 384
385 /*
386 * Return the 20-byte SHA of the node corresponding to the given rev.
387 */
388 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 {
390 Py_ssize_t length = index_length(self);
391 const char *data;
392
393 if (pos == length - 1)
394 return nullid;
395
396 if (pos >= length)
397 return NULL;
398
399 if (pos >= self->length - 1) {
400 PyObject *tuple, *str;
401 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
402 str = PyTuple_GetItem(tuple, 7);
403 return str ? PyString_AS_STRING(str) : NULL;
404 }
405
406 data = index_deref(self, pos);
407 return data ? data + 32 : NULL;
408 }
409
410 static int nt_insert(indexObject *self, const char *node, int rev);
411
412 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
413 {
414 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
415 return -1;
416 if (*nodelen == 20)
417 return 0;
418 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
419 return -1;
420 }
421
344 static PyObject *index_insert(indexObject *self, PyObject *args) 422 static PyObject *index_insert(indexObject *self, PyObject *args)
345 { 423 {
346 PyObject *obj, *node; 424 PyObject *obj;
425 char *node;
347 long offset; 426 long offset;
348 Py_ssize_t len; 427 Py_ssize_t len, nodelen;
349 428
350 if (!PyArg_ParseTuple(args, "lO", &offset, &obj)) 429 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
351 return NULL; 430 return NULL;
352 431
353 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { 432 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
354 PyErr_SetString(PyExc_ValueError, "8-tuple required"); 433 PyErr_SetString(PyExc_TypeError, "8-tuple required");
355 return NULL; 434 return NULL;
356 } 435 }
357 436
358 node = PyTuple_GET_ITEM(obj, 7); 437 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
359 if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) { 438 return NULL;
360 PyErr_SetString(PyExc_ValueError,
361 "20-byte hash required as last element");
362 return NULL;
363 }
364 439
365 len = index_length(self); 440 len = index_length(self);
366 441
367 if (offset < 0) 442 if (offset < 0)
368 offset += len; 443 offset += len;
369 444
370 if (offset != len - 1) { 445 if (offset != len - 1) {
371 PyErr_SetString(PyExc_IndexError, 446 PyErr_SetString(PyExc_IndexError,
372 "insert only supported at index -1"); 447 "insert only supported at index -1");
448 return NULL;
449 }
450
451 if (offset > INT_MAX) {
452 PyErr_SetString(PyExc_ValueError,
453 "currently only 2**31 revs supported");
373 return NULL; 454 return NULL;
374 } 455 }
375 456
376 if (self->added == NULL) { 457 if (self->added == NULL) {
377 self->added = PyList_New(0); 458 self->added = PyList_New(0);
380 } 461 }
381 462
382 if (PyList_Append(self->added, obj) == -1) 463 if (PyList_Append(self->added, obj) == -1)
383 return NULL; 464 return NULL;
384 465
466 if (self->nt)
467 nt_insert(self, node, (int)offset);
468
385 Py_RETURN_NONE; 469 Py_RETURN_NONE;
386 } 470 }
387 471
388 static void _index_clearcaches(indexObject *self) 472 static void _index_clearcaches(indexObject *self)
389 { 473 {
399 } 483 }
400 if (self->offsets) { 484 if (self->offsets) {
401 free(self->offsets); 485 free(self->offsets);
402 self->offsets = NULL; 486 self->offsets = NULL;
403 } 487 }
488 if (self->nt) {
489 free(self->nt);
490 self->nt = NULL;
491 }
404 } 492 }
405 493
406 static PyObject *index_clearcaches(indexObject *self) 494 static PyObject *index_clearcaches(indexObject *self)
407 { 495 {
408 _index_clearcaches(self); 496 _index_clearcaches(self);
497 self->ntlength = self->ntcapacity = 0;
498 self->ntdepth = self->ntsplits = 0;
499 self->ntrev = -1;
500 self->ntlookups = self->ntmisses = 0;
409 Py_RETURN_NONE; 501 Py_RETURN_NONE;
410 } 502 }
411 503
412 static int index_assign_subscript(indexObject *self, PyObject *item, 504 static PyObject *index_stats(indexObject *self)
413 PyObject *value) 505 {
506 PyObject *obj = PyDict_New();
507
508 if (obj == NULL)
509 return NULL;
510
511 #define istat(__n, __d) \
512 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
513 goto bail;
514
515 if (self->added) {
516 Py_ssize_t len = PyList_GET_SIZE(self->added);
517 if (PyDict_SetItemString(obj, "index entries added",
518 PyInt_FromLong(len)) == -1)
519 goto bail;
520 }
521
522 if (self->raw_length != self->length - 1)
523 istat(raw_length, "revs on disk");
524 istat(length, "revs in memory");
525 istat(ntcapacity, "node trie capacity");
526 istat(ntdepth, "node trie depth");
527 istat(ntlength, "node trie count");
528 istat(ntlookups, "node trie lookups");
529 istat(ntmisses, "node trie misses");
530 istat(ntrev, "node trie last rev scanned");
531 istat(ntsplits, "node trie splits");
532
533 #undef istat
534
535 return obj;
536
537 bail:
538 Py_XDECREF(obj);
539 return NULL;
540 }
541
542 static inline int nt_level(const char *node, int level)
543 {
544 int v = node[level>>1];
545 if (!(level & 1))
546 v >>= 4;
547 return v & 0xf;
548 }
549
550 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
551 {
552 int level, off;
553
554 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
555 return -1;
556
557 if (self->nt == NULL)
558 return -2;
559
560 for (level = off = 0; level < nodelen; level++) {
561 int k = nt_level(node, level);
562 nodetree *n = &self->nt[off];
563 int v = n->children[k];
564
565 if (v < 0) {
566 const char *n;
567 v = -v - 1;
568 n = index_node(self, v);
569 if (n == NULL)
570 return -2;
571 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
572 ? -2 : v;
573 }
574 if (v == 0)
575 return -2;
576 off = v;
577 }
578 return -2;
579 }
580
581 static int nt_new(indexObject *self)
582 {
583 if (self->ntlength == self->ntcapacity) {
584 self->ntcapacity *= 2;
585 self->nt = realloc(self->nt,
586 self->ntcapacity * sizeof(nodetree));
587 if (self->nt == NULL) {
588 PyErr_SetString(PyExc_MemoryError, "out of memory");
589 return -1;
590 }
591 memset(&self->nt[self->ntlength], 0,
592 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
593 }
594 return self->ntlength++;
595 }
596
597 static int nt_insert(indexObject *self, const char *node, int rev)
598 {
599 int level = 0;
600 int off = 0;
601
602 while (level < 20) {
603 int k = nt_level(node, level);
604 nodetree *n;
605 int v;
606
607 n = &self->nt[off];
608 v = n->children[k];
609
610 if (v == 0) {
611 n->children[k] = -rev - 1;
612 return 0;
613 }
614 if (v < 0) {
615 const char *oldnode = index_node(self, -v - 1);
616 int noff;
617
618 if (!oldnode || !memcmp(oldnode, node, 20)) {
619 n->children[k] = -rev - 1;
620 return 0;
621 }
622 noff = nt_new(self);
623 if (noff == -1)
624 return -1;
625 /* self->nt may have been changed by realloc */
626 self->nt[off].children[k] = noff;
627 off = noff;
628 n = &self->nt[off];
629 n->children[nt_level(oldnode, ++level)] = v;
630 if (level > self->ntdepth)
631 self->ntdepth = level;
632 self->ntsplits += 1;
633 } else {
634 level += 1;
635 off = v;
636 }
637 }
638
639 return -1;
640 }
641
642 /*
643 * Return values:
644 *
645 * -3: error (exception set)
646 * -2: not found (no exception set)
647 * rest: valid rev
648 */
649 static int index_find_node(indexObject *self,
650 const char *node, Py_ssize_t nodelen)
651 {
652 int rev;
653
654 self->ntlookups++;
655 rev = nt_find(self, node, nodelen);
656 if (rev >= -1)
657 return rev;
658
659 if (self->nt == NULL) {
660 self->ntcapacity = self->raw_length < 4
661 ? 4 : self->raw_length / 2;
662 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
663 if (self->nt == NULL) {
664 PyErr_SetString(PyExc_MemoryError, "out of memory");
665 return -3;
666 }
667 self->ntlength = 1;
668 self->ntrev = (int)index_length(self) - 1;
669 self->ntlookups = 1;
670 self->ntmisses = 0;
671 }
672
673 /*
674 * For the first handful of lookups, we scan the entire index,
675 * and cache only the matching nodes. This optimizes for cases
676 * like "hg tip", where only a few nodes are accessed.
677 *
678 * After that, we cache every node we visit, using a single
679 * scan amortized over multiple lookups. This gives the best
680 * bulk performance, e.g. for "hg log".
681 */
682 if (self->ntmisses++ < 4) {
683 for (rev = self->ntrev - 1; rev >= 0; rev--) {
684 const char *n = index_node(self, rev);
685 if (n == NULL)
686 return -2;
687 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
688 if (nt_insert(self, n, rev) == -1)
689 return -3;
690 break;
691 }
692 }
693 } else {
694 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 const char *n = index_node(self, rev);
696 if (n == NULL)
697 return -2;
698 if (nt_insert(self, n, rev) == -1)
699 return -3;
700 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 break;
702 }
703 }
704 self->ntrev = rev;
705 }
706
707 if (rev >= 0)
708 return rev;
709 return -2;
710 }
711
712 static PyObject *raise_revlog_error(void)
713 {
714 static PyObject *errclass;
715 PyObject *mod = NULL, *errobj;
716
717 if (errclass == NULL) {
718 PyObject *dict;
719
720 mod = PyImport_ImportModule("mercurial.error");
721 if (mod == NULL)
722 goto classfail;
723
724 dict = PyModule_GetDict(mod);
725 if (dict == NULL)
726 goto classfail;
727
728 errclass = PyDict_GetItemString(dict, "RevlogError");
729 if (errclass == NULL) {
730 PyErr_SetString(PyExc_SystemError,
731 "could not find RevlogError");
732 goto classfail;
733 }
734 Py_INCREF(errclass);
735 }
736
737 errobj = PyObject_CallFunction(errclass, NULL);
738 if (errobj == NULL)
739 return NULL;
740 PyErr_SetObject(errclass, errobj);
741 return errobj;
742
743 classfail:
744 Py_XDECREF(mod);
745 return NULL;
746 }
747
748 static PyObject *index_getitem(indexObject *self, PyObject *value)
749 {
750 char *node;
751 Py_ssize_t nodelen;
752 int rev;
753
754 if (PyInt_Check(value))
755 return index_get(self, PyInt_AS_LONG(value));
756
757 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
758 return NULL;
759 rev = index_find_node(self, node, nodelen);
760 if (rev >= -1)
761 return PyInt_FromLong(rev);
762 if (rev == -2)
763 raise_revlog_error();
764 return NULL;
765 }
766
767 static PyObject *index_m_get(indexObject *self, PyObject *args)
768 {
769 char *node;
770 int nodelen, rev;
771
772 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
773 return NULL;
774
775 rev = index_find_node(self, node, nodelen);
776 if (rev == -3)
777 return NULL;
778 if (rev == -2)
779 Py_RETURN_NONE;
780 return PyInt_FromLong(rev);
781 }
782
783 static int index_contains(indexObject *self, PyObject *value)
784 {
785 char *node;
786 Py_ssize_t nodelen;
787
788 if (PyInt_Check(value)) {
789 long rev = PyInt_AS_LONG(value);
790 return rev >= -1 && rev < index_length(self);
791 }
792
793 if (!PyString_Check(value))
794 return 0;
795
796 node = PyString_AS_STRING(value);
797 nodelen = PyString_GET_SIZE(value);
798
799 switch (index_find_node(self, node, nodelen)) {
800 case -3:
801 return -1;
802 case -2:
803 return 0;
804 default:
805 return 1;
806 }
807 }
808
809 /*
810 * Invalidate any trie entries introduced by added revs.
811 */
812 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
813 {
814 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
815
816 for (i = start; i < len; i++) {
817 PyObject *tuple = PyList_GET_ITEM(self->added, i);
818 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
819
820 nt_insert(self, PyString_AS_STRING(node), -1);
821 }
822
823 if (start == 0) {
824 Py_DECREF(self->added);
825 self->added = NULL;
826 }
827 }
828
829 /*
830 * Delete a numeric range of revs, which must be at the end of the
831 * range, but exclude the sentinel nullid entry.
832 */
833 static int index_slice_del(indexObject *self, PyObject *item)
414 { 834 {
415 Py_ssize_t start, stop, step, slicelength; 835 Py_ssize_t start, stop, step, slicelength;
416 Py_ssize_t length = index_length(self); 836 Py_ssize_t length = index_length(self);
417
418 if (!PySlice_Check(item) || value != NULL) {
419 PyErr_SetString(PyExc_TypeError,
420 "revlog index only supports slice deletion");
421 return -1;
422 }
423 837
424 if (PySlice_GetIndicesEx((PySliceObject*)item, length, 838 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
425 &start, &stop, &step, &slicelength) < 0) 839 &start, &stop, &step, &slicelength) < 0)
426 return -1; 840 return -1;
427 841
447 PyErr_SetString(PyExc_IndexError, 861 PyErr_SetString(PyExc_IndexError,
448 "revlog index deletion indices are invalid"); 862 "revlog index deletion indices are invalid");
449 return -1; 863 return -1;
450 } 864 }
451 865
452 if (start < self->length) { 866 if (start < self->length - 1) {
867 if (self->nt) {
868 Py_ssize_t i;
869
870 for (i = start + 1; i < self->length - 1; i++) {
871 const char *node = index_node(self, i);
872
873 if (node)
874 nt_insert(self, node, -1);
875 }
876 if (self->added)
877 nt_invalidate_added(self, 0);
878 if (self->ntrev > start)
879 self->ntrev = (int)start;
880 }
453 self->length = start + 1; 881 self->length = start + 1;
454 if (self->added) {
455 Py_DECREF(self->added);
456 self->added = NULL;
457 }
458 return 0; 882 return 0;
459 } 883 }
460 884
461 return PyList_SetSlice(self->added, start - self->length + 1, 885 if (self->nt) {
462 PyList_GET_SIZE(self->added), 886 nt_invalidate_added(self, start - self->length + 1);
463 NULL); 887 if (self->ntrev > start)
464 } 888 self->ntrev = (int)start;
465 889 }
890 return self->added
891 ? PyList_SetSlice(self->added, start - self->length + 1,
892 PyList_GET_SIZE(self->added), NULL)
893 : 0;
894 }
895
896 /*
897 * Supported ops:
898 *
899 * slice deletion
900 * string assignment (extend node->rev mapping)
901 * string deletion (shrink node->rev mapping)
902 */
903 static int index_assign_subscript(indexObject *self, PyObject *item,
904 PyObject *value)
905 {
906 char *node;
907 Py_ssize_t nodelen;
908 long rev;
909
910 if (PySlice_Check(item) && value == NULL)
911 return index_slice_del(self, item);
912
913 if (node_check(item, &node, &nodelen) == -1)
914 return -1;
915
916 if (value == NULL)
917 return self->nt ? nt_insert(self, node, -1) : 0;
918 rev = PyInt_AsLong(value);
919 if (rev > INT_MAX || rev < 0) {
920 if (!PyErr_Occurred())
921 PyErr_SetString(PyExc_ValueError, "rev out of range");
922 return -1;
923 }
924 return nt_insert(self, node, (int)rev);
925 }
926
927 /*
928 * Find all RevlogNG entries in an index that has inline data. Update
929 * the optional "offsets" table with those entries.
930 */
466 static long inline_scan(indexObject *self, const char **offsets) 931 static long inline_scan(indexObject *self, const char **offsets)
467 { 932 {
468 const char *data = PyString_AS_STRING(self->data); 933 const char *data = PyString_AS_STRING(self->data);
469 const char *end = data + PyString_GET_SIZE(self->data); 934 const char *end = data + PyString_GET_SIZE(self->data);
470 const long hdrsize = 64; 935 const long hdrsize = 64;
504 self->data = data_obj; 969 self->data = data_obj;
505 self->cache = NULL; 970 self->cache = NULL;
506 971
507 self->added = NULL; 972 self->added = NULL;
508 self->offsets = NULL; 973 self->offsets = NULL;
974 self->nt = NULL;
975 self->ntlength = self->ntcapacity = 0;
976 self->ntdepth = self->ntsplits = 0;
977 self->ntlookups = self->ntmisses = 0;
978 self->ntrev = -1;
509 Py_INCREF(self->data); 979 Py_INCREF(self->data);
510 980
511 if (self->inlined) { 981 if (self->inlined) {
512 long len = inline_scan(self, NULL); 982 long len = inline_scan(self, NULL);
513 if (len == -1) 983 if (len == -1)
539 1009
540 return index_real_init(self, data, size, inlined_obj, 1010 return index_real_init(self, data, size, inlined_obj,
541 PyTuple_GET_ITEM(args, 0)); 1011 PyTuple_GET_ITEM(args, 0));
542 } 1012 }
543 1013
1014 static PyObject *index_nodemap(indexObject *self)
1015 {
1016 return (PyObject *)self;
1017 }
1018
544 static void index_dealloc(indexObject *self) 1019 static void index_dealloc(indexObject *self)
545 { 1020 {
546 _index_clearcaches(self); 1021 _index_clearcaches(self);
547 Py_DECREF(self->data); 1022 Py_DECREF(self->data);
548 Py_XDECREF(self->added); 1023 Py_XDECREF(self->added);
552 static PySequenceMethods index_sequence_methods = { 1027 static PySequenceMethods index_sequence_methods = {
553 (lenfunc)index_length, /* sq_length */ 1028 (lenfunc)index_length, /* sq_length */
554 0, /* sq_concat */ 1029 0, /* sq_concat */
555 0, /* sq_repeat */ 1030 0, /* sq_repeat */
556 (ssizeargfunc)index_get, /* sq_item */ 1031 (ssizeargfunc)index_get, /* sq_item */
1032 0, /* sq_slice */
1033 0, /* sq_ass_item */
1034 0, /* sq_ass_slice */
1035 (objobjproc)index_contains, /* sq_contains */
557 }; 1036 };
558 1037
559 static PyMappingMethods index_mapping_methods = { 1038 static PyMappingMethods index_mapping_methods = {
560 (lenfunc)index_length, /* mp_length */ 1039 (lenfunc)index_length, /* mp_length */
561 NULL, /* mp_subscript */ 1040 (binaryfunc)index_getitem, /* mp_subscript */
562 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ 1041 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
563 }; 1042 };
564 1043
565 static PyMethodDef index_methods[] = { 1044 static PyMethodDef index_methods[] = {
566 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, 1045 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
567 "clear the index caches"}, 1046 "clear the index caches"},
1047 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1048 "get an index entry"},
568 {"insert", (PyCFunction)index_insert, METH_VARARGS, 1049 {"insert", (PyCFunction)index_insert, METH_VARARGS,
569 "insert an index entry"}, 1050 "insert an index entry"},
1051 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1052 "stats for the index"},
1053 {NULL} /* Sentinel */
1054 };
1055
1056 static PyGetSetDef index_getset[] = {
1057 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
570 {NULL} /* Sentinel */ 1058 {NULL} /* Sentinel */
571 }; 1059 };
572 1060
573 static PyTypeObject indexType = { 1061 static PyTypeObject indexType = {
574 PyObject_HEAD_INIT(NULL) 1062 PyObject_HEAD_INIT(NULL)
599 0, /* tp_weaklistoffset */ 1087 0, /* tp_weaklistoffset */
600 0, /* tp_iter */ 1088 0, /* tp_iter */
601 0, /* tp_iternext */ 1089 0, /* tp_iternext */
602 index_methods, /* tp_methods */ 1090 index_methods, /* tp_methods */
603 0, /* tp_members */ 1091 0, /* tp_members */
604 0, /* tp_getset */ 1092 index_getset, /* tp_getset */
605 0, /* tp_base */ 1093 0, /* tp_base */
606 0, /* tp_dict */ 1094 0, /* tp_dict */
607 0, /* tp_descr_get */ 1095 0, /* tp_descr_get */
608 0, /* tp_descr_set */ 1096 0, /* tp_descr_set */
609 0, /* tp_dictoffset */ 1097 0, /* tp_dictoffset */
611 0, /* tp_alloc */ 1099 0, /* tp_alloc */
612 PyType_GenericNew, /* tp_new */ 1100 PyType_GenericNew, /* tp_new */
613 }; 1101 };
614 1102
615 /* 1103 /*
616 * returns a tuple of the form (index, None, cache) with elements as 1104 * returns a tuple of the form (index, index, cache) with elements as
617 * follows: 1105 * follows:
618 * 1106 *
619 * index: an index object that lazily parses the RevlogNG records 1107 * index: an index object that lazily parses RevlogNG records
620 * cache: if data is inlined, a tuple (index_file_content, 0), else None 1108 * cache: if data is inlined, a tuple (index_file_content, 0), else None
621 * 1109 *
622 * added complications are for backwards compatibility 1110 * added complications are for backwards compatibility
623 */ 1111 */
624 static PyObject *parse_index2(PyObject *self, PyObject *args) 1112 static PyObject *parse_index2(PyObject *self, PyObject *args)
649 } else { 1137 } else {
650 cache = Py_None; 1138 cache = Py_None;
651 Py_INCREF(cache); 1139 Py_INCREF(cache);
652 } 1140 }
653 1141
1142 Py_INCREF(idx);
1143
654 tuple = Py_BuildValue("NN", idx, cache); 1144 tuple = Py_BuildValue("NN", idx, cache);
655 if (!tuple) 1145 if (!tuple)
656 goto bail; 1146 goto bail;
657 return tuple; 1147 return tuple;
658 1148
672 {NULL, NULL} 1162 {NULL, NULL}
673 }; 1163 };
674 1164
675 static void module_init(PyObject *mod) 1165 static void module_init(PyObject *mod)
676 { 1166 {
677 static const char nullid[20];
678
679 if (PyType_Ready(&indexType) < 0) 1167 if (PyType_Ready(&indexType) < 0)
680 return; 1168 return;
681 Py_INCREF(&indexType); 1169 Py_INCREF(&indexType);
682 1170
683 PyModule_AddObject(mod, "index", (PyObject *)&indexType); 1171 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
708 { 1196 {
709 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc); 1197 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
710 module_init(mod); 1198 module_init(mod);
711 } 1199 }
712 #endif 1200 #endif
713