comparison mercurial/cext/revlog.c @ 32378:7d0c69505a66

cext: extract revlog/index parsing code to own C file parsers.c is ~3000 lines and ~2/3 of it is related to the revlog index type. We already have separate C source files for directory utilities and manifest parsing. I think the quite unwieldy revlog/index parsing code should be self-contained as well. I performed the extraction as a file copy then removed content from both sides in order to preserve file history and blame. As part of this, I also had to move the hexdigit table and function to a shared header since it is used by both parsers.c and revlog.c # no-check-commit
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 20 May 2017 14:01:05 -0700
parents mercurial/cext/parsers.c@df448de7cf3b
children 2e5a476b2e46
comparison
equal deleted inserted replaced
32377:c942c83ac2ec 32378:7d0c69505a66
1 /*
2 parsers.c - efficient content parsing
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
8 */
9
10 #include <Python.h>
11 #include <ctype.h>
12 #include <stddef.h>
13 #include <string.h>
14
15 #include "util.h"
16 #include "bitmanipulation.h"
17
18 #ifdef IS_PY3K
19 /* The mapping of Python types is meant to be temporary to get Python
20 * 3 to compile. We should remove this once Python 3 support is fully
21 * supported and proper types are used in the extensions themselves. */
22 #define PyInt_Check PyLong_Check
23 #define PyInt_FromLong PyLong_FromLong
24 #define PyInt_FromSsize_t PyLong_FromSsize_t
25 #define PyInt_AS_LONG PyLong_AS_LONG
26 #define PyInt_AsLong PyLong_AsLong
27 #endif
28
29 /*
30 * A base-16 trie for fast node->rev mapping.
31 *
32 * Positive value is index of the next node in the trie
33 * Negative value is a leaf: -(rev + 1)
34 * Zero is empty
35 */
36 typedef struct {
37 int children[16];
38 } nodetree;
39
40 /*
41 * This class has two behaviors.
42 *
43 * When used in a list-like way (with integer keys), we decode an
44 * entry in a RevlogNG index file on demand. Our last entry is a
45 * sentinel, always a nullid. We have limited support for
46 * integer-keyed insert and delete, only at elements right before the
47 * sentinel.
48 *
49 * With string keys, we lazily perform a reverse mapping from node to
50 * rev, using a base-16 trie.
51 */
52 typedef struct {
53 PyObject_HEAD
54 /* Type-specific fields go here. */
55 PyObject *data; /* raw bytes of index */
56 Py_buffer buf; /* buffer of data */
57 PyObject **cache; /* cached tuples */
58 const char **offsets; /* populated on demand */
59 Py_ssize_t raw_length; /* original number of elements */
60 Py_ssize_t length; /* current number of elements */
61 PyObject *added; /* populated on demand */
62 PyObject *headrevs; /* cache, invalidated on changes */
63 PyObject *filteredrevs;/* filtered revs set */
64 nodetree *nt; /* base-16 trie */
65 unsigned ntlength; /* # nodes in use */
66 unsigned ntcapacity; /* # nodes allocated */
67 int ntdepth; /* maximum depth of tree */
68 int ntsplits; /* # splits performed */
69 int ntrev; /* last rev scanned */
70 int ntlookups; /* # lookups */
71 int ntmisses; /* # lookups that miss the cache */
72 int inlined;
73 } indexObject;
74
75 static Py_ssize_t index_length(const indexObject *self)
76 {
77 if (self->added == NULL)
78 return self->length;
79 return self->length + PyList_GET_SIZE(self->added);
80 }
81
82 static PyObject *nullentry;
83 static const char nullid[20];
84
85 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
86
87 #if LONG_MAX == 0x7fffffffL
88 static char *tuple_format = "Kiiiiiis#";
89 #else
90 static char *tuple_format = "kiiiiiis#";
91 #endif
92
93 /* A RevlogNG v1 index entry is 64 bytes long. */
94 static const long v1_hdrsize = 64;
95
96 /*
97 * Return a pointer to the beginning of a RevlogNG record.
98 */
99 static const char *index_deref(indexObject *self, Py_ssize_t pos)
100 {
101 if (self->inlined && pos > 0) {
102 if (self->offsets == NULL) {
103 self->offsets = PyMem_Malloc(self->raw_length *
104 sizeof(*self->offsets));
105 if (self->offsets == NULL)
106 return (const char *)PyErr_NoMemory();
107 inline_scan(self, self->offsets);
108 }
109 return self->offsets[pos];
110 }
111
112 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
113 }
114
115 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
116 int *ps, int maxrev)
117 {
118 if (rev >= self->length - 1) {
119 PyObject *tuple = PyList_GET_ITEM(self->added,
120 rev - self->length + 1);
121 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
122 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
123 } else {
124 const char *data = index_deref(self, rev);
125 ps[0] = getbe32(data + 24);
126 ps[1] = getbe32(data + 28);
127 }
128 /* If index file is corrupted, ps[] may point to invalid revisions. So
129 * there is a risk of buffer overflow to trust them unconditionally. */
130 if (ps[0] > maxrev || ps[1] > maxrev) {
131 PyErr_SetString(PyExc_ValueError, "parent out of range");
132 return -1;
133 }
134 return 0;
135 }
136
137
138 /*
139 * RevlogNG format (all in big endian, data may be inlined):
140 * 6 bytes: offset
141 * 2 bytes: flags
142 * 4 bytes: compressed length
143 * 4 bytes: uncompressed length
144 * 4 bytes: base revision
145 * 4 bytes: link revision
146 * 4 bytes: parent 1 revision
147 * 4 bytes: parent 2 revision
148 * 32 bytes: nodeid (only 20 bytes used)
149 */
150 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
151 {
152 uint64_t offset_flags;
153 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
154 const char *c_node_id;
155 const char *data;
156 Py_ssize_t length = index_length(self);
157 PyObject *entry;
158
159 if (pos < 0)
160 pos += length;
161
162 if (pos < 0 || pos >= length) {
163 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
164 return NULL;
165 }
166
167 if (pos == length - 1) {
168 Py_INCREF(nullentry);
169 return nullentry;
170 }
171
172 if (pos >= self->length - 1) {
173 PyObject *obj;
174 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
175 Py_INCREF(obj);
176 return obj;
177 }
178
179 if (self->cache) {
180 if (self->cache[pos]) {
181 Py_INCREF(self->cache[pos]);
182 return self->cache[pos];
183 }
184 } else {
185 self->cache = calloc(self->raw_length, sizeof(PyObject *));
186 if (self->cache == NULL)
187 return PyErr_NoMemory();
188 }
189
190 data = index_deref(self, pos);
191 if (data == NULL)
192 return NULL;
193
194 offset_flags = getbe32(data + 4);
195 if (pos == 0) /* mask out version number for the first entry */
196 offset_flags &= 0xFFFF;
197 else {
198 uint32_t offset_high = getbe32(data);
199 offset_flags |= ((uint64_t)offset_high) << 32;
200 }
201
202 comp_len = getbe32(data + 8);
203 uncomp_len = getbe32(data + 12);
204 base_rev = getbe32(data + 16);
205 link_rev = getbe32(data + 20);
206 parent_1 = getbe32(data + 24);
207 parent_2 = getbe32(data + 28);
208 c_node_id = data + 32;
209
210 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
211 uncomp_len, base_rev, link_rev,
212 parent_1, parent_2, c_node_id, 20);
213
214 if (entry) {
215 PyObject_GC_UnTrack(entry);
216 Py_INCREF(entry);
217 }
218
219 self->cache[pos] = entry;
220
221 return entry;
222 }
223
224 /*
225 * Return the 20-byte SHA of the node corresponding to the given rev.
226 */
227 static const char *index_node(indexObject *self, Py_ssize_t pos)
228 {
229 Py_ssize_t length = index_length(self);
230 const char *data;
231
232 if (pos == length - 1 || pos == INT_MAX)
233 return nullid;
234
235 if (pos >= length)
236 return NULL;
237
238 if (pos >= self->length - 1) {
239 PyObject *tuple, *str;
240 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
241 str = PyTuple_GetItem(tuple, 7);
242 return str ? PyBytes_AS_STRING(str) : NULL;
243 }
244
245 data = index_deref(self, pos);
246 return data ? data + 32 : NULL;
247 }
248
249 static int nt_insert(indexObject *self, const char *node, int rev);
250
251 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
252 {
253 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
254 return -1;
255 if (*nodelen == 20)
256 return 0;
257 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
258 return -1;
259 }
260
261 static PyObject *index_insert(indexObject *self, PyObject *args)
262 {
263 PyObject *obj;
264 char *node;
265 int index;
266 Py_ssize_t len, nodelen;
267
268 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
269 return NULL;
270
271 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
272 PyErr_SetString(PyExc_TypeError, "8-tuple required");
273 return NULL;
274 }
275
276 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
277 return NULL;
278
279 len = index_length(self);
280
281 if (index < 0)
282 index += len;
283
284 if (index != len - 1) {
285 PyErr_SetString(PyExc_IndexError,
286 "insert only supported at index -1");
287 return NULL;
288 }
289
290 if (self->added == NULL) {
291 self->added = PyList_New(0);
292 if (self->added == NULL)
293 return NULL;
294 }
295
296 if (PyList_Append(self->added, obj) == -1)
297 return NULL;
298
299 if (self->nt)
300 nt_insert(self, node, index);
301
302 Py_CLEAR(self->headrevs);
303 Py_RETURN_NONE;
304 }
305
306 static void _index_clearcaches(indexObject *self)
307 {
308 if (self->cache) {
309 Py_ssize_t i;
310
311 for (i = 0; i < self->raw_length; i++)
312 Py_CLEAR(self->cache[i]);
313 free(self->cache);
314 self->cache = NULL;
315 }
316 if (self->offsets) {
317 PyMem_Free(self->offsets);
318 self->offsets = NULL;
319 }
320 if (self->nt) {
321 free(self->nt);
322 self->nt = NULL;
323 }
324 Py_CLEAR(self->headrevs);
325 }
326
327 static PyObject *index_clearcaches(indexObject *self)
328 {
329 _index_clearcaches(self);
330 self->ntlength = self->ntcapacity = 0;
331 self->ntdepth = self->ntsplits = 0;
332 self->ntrev = -1;
333 self->ntlookups = self->ntmisses = 0;
334 Py_RETURN_NONE;
335 }
336
337 static PyObject *index_stats(indexObject *self)
338 {
339 PyObject *obj = PyDict_New();
340 PyObject *t = NULL;
341
342 if (obj == NULL)
343 return NULL;
344
345 #define istat(__n, __d) \
346 do { \
347 t = PyInt_FromSsize_t(self->__n); \
348 if (!t) \
349 goto bail; \
350 if (PyDict_SetItemString(obj, __d, t) == -1) \
351 goto bail; \
352 Py_DECREF(t); \
353 } while (0)
354
355 if (self->added) {
356 Py_ssize_t len = PyList_GET_SIZE(self->added);
357 t = PyInt_FromSsize_t(len);
358 if (!t)
359 goto bail;
360 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
361 goto bail;
362 Py_DECREF(t);
363 }
364
365 if (self->raw_length != self->length - 1)
366 istat(raw_length, "revs on disk");
367 istat(length, "revs in memory");
368 istat(ntcapacity, "node trie capacity");
369 istat(ntdepth, "node trie depth");
370 istat(ntlength, "node trie count");
371 istat(ntlookups, "node trie lookups");
372 istat(ntmisses, "node trie misses");
373 istat(ntrev, "node trie last rev scanned");
374 istat(ntsplits, "node trie splits");
375
376 #undef istat
377
378 return obj;
379
380 bail:
381 Py_XDECREF(obj);
382 Py_XDECREF(t);
383 return NULL;
384 }
385
386 /*
387 * When we cache a list, we want to be sure the caller can't mutate
388 * the cached copy.
389 */
390 static PyObject *list_copy(PyObject *list)
391 {
392 Py_ssize_t len = PyList_GET_SIZE(list);
393 PyObject *newlist = PyList_New(len);
394 Py_ssize_t i;
395
396 if (newlist == NULL)
397 return NULL;
398
399 for (i = 0; i < len; i++) {
400 PyObject *obj = PyList_GET_ITEM(list, i);
401 Py_INCREF(obj);
402 PyList_SET_ITEM(newlist, i, obj);
403 }
404
405 return newlist;
406 }
407
408 static int check_filter(PyObject *filter, Py_ssize_t arg) {
409 if (filter) {
410 PyObject *arglist, *result;
411 int isfiltered;
412
413 arglist = Py_BuildValue("(n)", arg);
414 if (!arglist) {
415 return -1;
416 }
417
418 result = PyEval_CallObject(filter, arglist);
419 Py_DECREF(arglist);
420 if (!result) {
421 return -1;
422 }
423
424 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
425 * same as this function, so we can just return it directly.*/
426 isfiltered = PyObject_IsTrue(result);
427 Py_DECREF(result);
428 return isfiltered;
429 } else {
430 return 0;
431 }
432 }
433
434 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
435 Py_ssize_t marker, char *phases)
436 {
437 PyObject *iter = NULL;
438 PyObject *iter_item = NULL;
439 Py_ssize_t min_idx = index_length(self) + 1;
440 long iter_item_long;
441
442 if (PyList_GET_SIZE(list) != 0) {
443 iter = PyObject_GetIter(list);
444 if (iter == NULL)
445 return -2;
446 while ((iter_item = PyIter_Next(iter)))
447 {
448 iter_item_long = PyInt_AS_LONG(iter_item);
449 Py_DECREF(iter_item);
450 if (iter_item_long < min_idx)
451 min_idx = iter_item_long;
452 phases[iter_item_long] = marker;
453 }
454 Py_DECREF(iter);
455 }
456
457 return min_idx;
458 }
459
460 static inline void set_phase_from_parents(char *phases, int parent_1,
461 int parent_2, Py_ssize_t i)
462 {
463 if (parent_1 >= 0 && phases[parent_1] > phases[i])
464 phases[i] = phases[parent_1];
465 if (parent_2 >= 0 && phases[parent_2] > phases[i])
466 phases[i] = phases[parent_2];
467 }
468
469 static PyObject *reachableroots2(indexObject *self, PyObject *args)
470 {
471
472 /* Input */
473 long minroot;
474 PyObject *includepatharg = NULL;
475 int includepath = 0;
476 /* heads and roots are lists */
477 PyObject *heads = NULL;
478 PyObject *roots = NULL;
479 PyObject *reachable = NULL;
480
481 PyObject *val;
482 Py_ssize_t len = index_length(self) - 1;
483 long revnum;
484 Py_ssize_t k;
485 Py_ssize_t i;
486 Py_ssize_t l;
487 int r;
488 int parents[2];
489
490 /* Internal data structure:
491 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
492 * revstates: array of length len+1 (all revs + nullrev) */
493 int *tovisit = NULL;
494 long lentovisit = 0;
495 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
496 char *revstates = NULL;
497
498 /* Get arguments */
499 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
500 &PyList_Type, &roots,
501 &PyBool_Type, &includepatharg))
502 goto bail;
503
504 if (includepatharg == Py_True)
505 includepath = 1;
506
507 /* Initialize return set */
508 reachable = PyList_New(0);
509 if (reachable == NULL)
510 goto bail;
511
512 /* Initialize internal datastructures */
513 tovisit = (int *)malloc((len + 1) * sizeof(int));
514 if (tovisit == NULL) {
515 PyErr_NoMemory();
516 goto bail;
517 }
518
519 revstates = (char *)calloc(len + 1, 1);
520 if (revstates == NULL) {
521 PyErr_NoMemory();
522 goto bail;
523 }
524
525 l = PyList_GET_SIZE(roots);
526 for (i = 0; i < l; i++) {
527 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
528 if (revnum == -1 && PyErr_Occurred())
529 goto bail;
530 /* If root is out of range, e.g. wdir(), it must be unreachable
531 * from heads. So we can just ignore it. */
532 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
533 continue;
534 revstates[revnum + 1] |= RS_ROOT;
535 }
536
537 /* Populate tovisit with all the heads */
538 l = PyList_GET_SIZE(heads);
539 for (i = 0; i < l; i++) {
540 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
541 if (revnum == -1 && PyErr_Occurred())
542 goto bail;
543 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
544 PyErr_SetString(PyExc_IndexError, "head out of range");
545 goto bail;
546 }
547 if (!(revstates[revnum + 1] & RS_SEEN)) {
548 tovisit[lentovisit++] = (int)revnum;
549 revstates[revnum + 1] |= RS_SEEN;
550 }
551 }
552
553 /* Visit the tovisit list and find the reachable roots */
554 k = 0;
555 while (k < lentovisit) {
556 /* Add the node to reachable if it is a root*/
557 revnum = tovisit[k++];
558 if (revstates[revnum + 1] & RS_ROOT) {
559 revstates[revnum + 1] |= RS_REACHABLE;
560 val = PyInt_FromLong(revnum);
561 if (val == NULL)
562 goto bail;
563 r = PyList_Append(reachable, val);
564 Py_DECREF(val);
565 if (r < 0)
566 goto bail;
567 if (includepath == 0)
568 continue;
569 }
570
571 /* Add its parents to the list of nodes to visit */
572 if (revnum == -1)
573 continue;
574 r = index_get_parents(self, revnum, parents, (int)len - 1);
575 if (r < 0)
576 goto bail;
577 for (i = 0; i < 2; i++) {
578 if (!(revstates[parents[i] + 1] & RS_SEEN)
579 && parents[i] >= minroot) {
580 tovisit[lentovisit++] = parents[i];
581 revstates[parents[i] + 1] |= RS_SEEN;
582 }
583 }
584 }
585
586 /* Find all the nodes in between the roots we found and the heads
587 * and add them to the reachable set */
588 if (includepath == 1) {
589 long minidx = minroot;
590 if (minidx < 0)
591 minidx = 0;
592 for (i = minidx; i < len; i++) {
593 if (!(revstates[i + 1] & RS_SEEN))
594 continue;
595 r = index_get_parents(self, i, parents, (int)len - 1);
596 /* Corrupted index file, error is set from
597 * index_get_parents */
598 if (r < 0)
599 goto bail;
600 if (((revstates[parents[0] + 1] |
601 revstates[parents[1] + 1]) & RS_REACHABLE)
602 && !(revstates[i + 1] & RS_REACHABLE)) {
603 revstates[i + 1] |= RS_REACHABLE;
604 val = PyInt_FromLong(i);
605 if (val == NULL)
606 goto bail;
607 r = PyList_Append(reachable, val);
608 Py_DECREF(val);
609 if (r < 0)
610 goto bail;
611 }
612 }
613 }
614
615 free(revstates);
616 free(tovisit);
617 return reachable;
618 bail:
619 Py_XDECREF(reachable);
620 free(revstates);
621 free(tovisit);
622 return NULL;
623 }
624
625 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
626 {
627 PyObject *roots = Py_None;
628 PyObject *ret = NULL;
629 PyObject *phaseslist = NULL;
630 PyObject *phaseroots = NULL;
631 PyObject *phaseset = NULL;
632 PyObject *phasessetlist = NULL;
633 PyObject *rev = NULL;
634 Py_ssize_t len = index_length(self) - 1;
635 Py_ssize_t numphase = 0;
636 Py_ssize_t minrevallphases = 0;
637 Py_ssize_t minrevphase = 0;
638 Py_ssize_t i = 0;
639 char *phases = NULL;
640 long phase;
641
642 if (!PyArg_ParseTuple(args, "O", &roots))
643 goto done;
644 if (roots == NULL || !PyList_Check(roots))
645 goto done;
646
647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
648 if (phases == NULL) {
649 PyErr_NoMemory();
650 goto done;
651 }
652 /* Put the phase information of all the roots in phases */
653 numphase = PyList_GET_SIZE(roots)+1;
654 minrevallphases = len + 1;
655 phasessetlist = PyList_New(numphase);
656 if (phasessetlist == NULL)
657 goto done;
658
659 PyList_SET_ITEM(phasessetlist, 0, Py_None);
660 Py_INCREF(Py_None);
661
662 for (i = 0; i < numphase-1; i++) {
663 phaseroots = PyList_GET_ITEM(roots, i);
664 phaseset = PySet_New(NULL);
665 if (phaseset == NULL)
666 goto release;
667 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
668 if (!PyList_Check(phaseroots))
669 goto release;
670 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
671 if (minrevphase == -2) /* Error from add_roots_get_min */
672 goto release;
673 minrevallphases = MIN(minrevallphases, minrevphase);
674 }
675 /* Propagate the phase information from the roots to the revs */
676 if (minrevallphases != -1) {
677 int parents[2];
678 for (i = minrevallphases; i < len; i++) {
679 if (index_get_parents(self, i, parents,
680 (int)len - 1) < 0)
681 goto release;
682 set_phase_from_parents(phases, parents[0], parents[1], i);
683 }
684 }
685 /* Transform phase list to a python list */
686 phaseslist = PyList_New(len);
687 if (phaseslist == NULL)
688 goto release;
689 for (i = 0; i < len; i++) {
690 PyObject *phaseval;
691
692 phase = phases[i];
693 /* We only store the sets of phase for non public phase, the public phase
694 * is computed as a difference */
695 if (phase != 0) {
696 phaseset = PyList_GET_ITEM(phasessetlist, phase);
697 rev = PyInt_FromLong(i);
698 if (rev == NULL)
699 goto release;
700 PySet_Add(phaseset, rev);
701 Py_XDECREF(rev);
702 }
703 phaseval = PyInt_FromLong(phase);
704 if (phaseval == NULL)
705 goto release;
706 PyList_SET_ITEM(phaseslist, i, phaseval);
707 }
708 ret = PyTuple_Pack(2, phaseslist, phasessetlist);
709
710 release:
711 Py_XDECREF(phaseslist);
712 Py_XDECREF(phasessetlist);
713 done:
714 free(phases);
715 return ret;
716 }
717
718 static PyObject *index_headrevs(indexObject *self, PyObject *args)
719 {
720 Py_ssize_t i, j, len;
721 char *nothead = NULL;
722 PyObject *heads = NULL;
723 PyObject *filter = NULL;
724 PyObject *filteredrevs = Py_None;
725
726 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
727 return NULL;
728 }
729
730 if (self->headrevs && filteredrevs == self->filteredrevs)
731 return list_copy(self->headrevs);
732
733 Py_DECREF(self->filteredrevs);
734 self->filteredrevs = filteredrevs;
735 Py_INCREF(filteredrevs);
736
737 if (filteredrevs != Py_None) {
738 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
739 if (!filter) {
740 PyErr_SetString(PyExc_TypeError,
741 "filteredrevs has no attribute __contains__");
742 goto bail;
743 }
744 }
745
746 len = index_length(self) - 1;
747 heads = PyList_New(0);
748 if (heads == NULL)
749 goto bail;
750 if (len == 0) {
751 PyObject *nullid = PyInt_FromLong(-1);
752 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
753 Py_XDECREF(nullid);
754 goto bail;
755 }
756 goto done;
757 }
758
759 nothead = calloc(len, 1);
760 if (nothead == NULL) {
761 PyErr_NoMemory();
762 goto bail;
763 }
764
765 for (i = len - 1; i >= 0; i--) {
766 int isfiltered;
767 int parents[2];
768
769 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
770 * node already, and therefore this node is not filtered. So we can skip
771 * the expensive check_filter step.
772 */
773 if (nothead[i] != 1) {
774 isfiltered = check_filter(filter, i);
775 if (isfiltered == -1) {
776 PyErr_SetString(PyExc_TypeError,
777 "unable to check filter");
778 goto bail;
779 }
780
781 if (isfiltered) {
782 nothead[i] = 1;
783 continue;
784 }
785 }
786
787 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
788 goto bail;
789 for (j = 0; j < 2; j++) {
790 if (parents[j] >= 0)
791 nothead[parents[j]] = 1;
792 }
793 }
794
795 for (i = 0; i < len; i++) {
796 PyObject *head;
797
798 if (nothead[i])
799 continue;
800 head = PyInt_FromSsize_t(i);
801 if (head == NULL || PyList_Append(heads, head) == -1) {
802 Py_XDECREF(head);
803 goto bail;
804 }
805 }
806
807 done:
808 self->headrevs = heads;
809 Py_XDECREF(filter);
810 free(nothead);
811 return list_copy(self->headrevs);
812 bail:
813 Py_XDECREF(filter);
814 Py_XDECREF(heads);
815 free(nothead);
816 return NULL;
817 }
818
819 static inline int nt_level(const char *node, Py_ssize_t level)
820 {
821 int v = node[level>>1];
822 if (!(level & 1))
823 v >>= 4;
824 return v & 0xf;
825 }
826
827 /*
828 * Return values:
829 *
830 * -4: match is ambiguous (multiple candidates)
831 * -2: not found
832 * rest: valid rev
833 */
834 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
835 int hex)
836 {
837 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
838 int level, maxlevel, off;
839
840 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
841 return -1;
842
843 if (self->nt == NULL)
844 return -2;
845
846 if (hex)
847 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
848 else
849 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
850
851 for (level = off = 0; level < maxlevel; level++) {
852 int k = getnybble(node, level);
853 nodetree *n = &self->nt[off];
854 int v = n->children[k];
855
856 if (v < 0) {
857 const char *n;
858 Py_ssize_t i;
859
860 v = -(v + 1);
861 n = index_node(self, v);
862 if (n == NULL)
863 return -2;
864 for (i = level; i < maxlevel; i++)
865 if (getnybble(node, i) != nt_level(n, i))
866 return -2;
867 return v;
868 }
869 if (v == 0)
870 return -2;
871 off = v;
872 }
873 /* multiple matches against an ambiguous prefix */
874 return -4;
875 }
876
877 static int nt_new(indexObject *self)
878 {
879 if (self->ntlength == self->ntcapacity) {
880 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
881 PyErr_SetString(PyExc_MemoryError,
882 "overflow in nt_new");
883 return -1;
884 }
885 self->ntcapacity *= 2;
886 self->nt = realloc(self->nt,
887 self->ntcapacity * sizeof(nodetree));
888 if (self->nt == NULL) {
889 PyErr_SetString(PyExc_MemoryError, "out of memory");
890 return -1;
891 }
892 memset(&self->nt[self->ntlength], 0,
893 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
894 }
895 return self->ntlength++;
896 }
897
898 static int nt_insert(indexObject *self, const char *node, int rev)
899 {
900 int level = 0;
901 int off = 0;
902
903 while (level < 40) {
904 int k = nt_level(node, level);
905 nodetree *n;
906 int v;
907
908 n = &self->nt[off];
909 v = n->children[k];
910
911 if (v == 0) {
912 n->children[k] = -rev - 1;
913 return 0;
914 }
915 if (v < 0) {
916 const char *oldnode = index_node(self, -(v + 1));
917 int noff;
918
919 if (!oldnode || !memcmp(oldnode, node, 20)) {
920 n->children[k] = -rev - 1;
921 return 0;
922 }
923 noff = nt_new(self);
924 if (noff == -1)
925 return -1;
926 /* self->nt may have been changed by realloc */
927 self->nt[off].children[k] = noff;
928 off = noff;
929 n = &self->nt[off];
930 n->children[nt_level(oldnode, ++level)] = v;
931 if (level > self->ntdepth)
932 self->ntdepth = level;
933 self->ntsplits += 1;
934 } else {
935 level += 1;
936 off = v;
937 }
938 }
939
940 return -1;
941 }
942
943 static int nt_init(indexObject *self)
944 {
945 if (self->nt == NULL) {
946 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
947 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
948 return -1;
949 }
950 self->ntcapacity = self->raw_length < 4
951 ? 4 : (int)self->raw_length / 2;
952
953 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
954 if (self->nt == NULL) {
955 PyErr_NoMemory();
956 return -1;
957 }
958 self->ntlength = 1;
959 self->ntrev = (int)index_length(self) - 1;
960 self->ntlookups = 1;
961 self->ntmisses = 0;
962 if (nt_insert(self, nullid, INT_MAX) == -1)
963 return -1;
964 }
965 return 0;
966 }
967
968 /*
969 * Return values:
970 *
971 * -3: error (exception set)
972 * -2: not found (no exception set)
973 * rest: valid rev
974 */
975 static int index_find_node(indexObject *self,
976 const char *node, Py_ssize_t nodelen)
977 {
978 int rev;
979
980 self->ntlookups++;
981 rev = nt_find(self, node, nodelen, 0);
982 if (rev >= -1)
983 return rev;
984
985 if (nt_init(self) == -1)
986 return -3;
987
988 /*
989 * For the first handful of lookups, we scan the entire index,
990 * and cache only the matching nodes. This optimizes for cases
991 * like "hg tip", where only a few nodes are accessed.
992 *
993 * After that, we cache every node we visit, using a single
994 * scan amortized over multiple lookups. This gives the best
995 * bulk performance, e.g. for "hg log".
996 */
997 if (self->ntmisses++ < 4) {
998 for (rev = self->ntrev - 1; rev >= 0; rev--) {
999 const char *n = index_node(self, rev);
1000 if (n == NULL)
1001 return -2;
1002 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1003 if (nt_insert(self, n, rev) == -1)
1004 return -3;
1005 break;
1006 }
1007 }
1008 } else {
1009 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1010 const char *n = index_node(self, rev);
1011 if (n == NULL) {
1012 self->ntrev = rev + 1;
1013 return -2;
1014 }
1015 if (nt_insert(self, n, rev) == -1) {
1016 self->ntrev = rev + 1;
1017 return -3;
1018 }
1019 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1020 break;
1021 }
1022 }
1023 self->ntrev = rev;
1024 }
1025
1026 if (rev >= 0)
1027 return rev;
1028 return -2;
1029 }
1030
1031 static void raise_revlog_error(void)
1032 {
1033 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1034
1035 mod = PyImport_ImportModule("mercurial.error");
1036 if (mod == NULL) {
1037 goto cleanup;
1038 }
1039
1040 dict = PyModule_GetDict(mod);
1041 if (dict == NULL) {
1042 goto cleanup;
1043 }
1044 Py_INCREF(dict);
1045
1046 errclass = PyDict_GetItemString(dict, "RevlogError");
1047 if (errclass == NULL) {
1048 PyErr_SetString(PyExc_SystemError,
1049 "could not find RevlogError");
1050 goto cleanup;
1051 }
1052
1053 /* value of exception is ignored by callers */
1054 PyErr_SetString(errclass, "RevlogError");
1055
1056 cleanup:
1057 Py_XDECREF(dict);
1058 Py_XDECREF(mod);
1059 }
1060
1061 static PyObject *index_getitem(indexObject *self, PyObject *value)
1062 {
1063 char *node;
1064 Py_ssize_t nodelen;
1065 int rev;
1066
1067 if (PyInt_Check(value))
1068 return index_get(self, PyInt_AS_LONG(value));
1069
1070 if (node_check(value, &node, &nodelen) == -1)
1071 return NULL;
1072 rev = index_find_node(self, node, nodelen);
1073 if (rev >= -1)
1074 return PyInt_FromLong(rev);
1075 if (rev == -2)
1076 raise_revlog_error();
1077 return NULL;
1078 }
1079
1080 static int nt_partialmatch(indexObject *self, const char *node,
1081 Py_ssize_t nodelen)
1082 {
1083 int rev;
1084
1085 if (nt_init(self) == -1)
1086 return -3;
1087
1088 if (self->ntrev > 0) {
1089 /* ensure that the radix tree is fully populated */
1090 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1091 const char *n = index_node(self, rev);
1092 if (n == NULL)
1093 return -2;
1094 if (nt_insert(self, n, rev) == -1)
1095 return -3;
1096 }
1097 self->ntrev = rev;
1098 }
1099
1100 return nt_find(self, node, nodelen, 1);
1101 }
1102
1103 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1104 {
1105 const char *fullnode;
1106 int nodelen;
1107 char *node;
1108 int rev, i;
1109
1110 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1111 return NULL;
1112
1113 if (nodelen < 4) {
1114 PyErr_SetString(PyExc_ValueError, "key too short");
1115 return NULL;
1116 }
1117
1118 if (nodelen > 40) {
1119 PyErr_SetString(PyExc_ValueError, "key too long");
1120 return NULL;
1121 }
1122
1123 for (i = 0; i < nodelen; i++)
1124 hexdigit(node, i);
1125 if (PyErr_Occurred()) {
1126 /* input contains non-hex characters */
1127 PyErr_Clear();
1128 Py_RETURN_NONE;
1129 }
1130
1131 rev = nt_partialmatch(self, node, nodelen);
1132
1133 switch (rev) {
1134 case -4:
1135 raise_revlog_error();
1136 case -3:
1137 return NULL;
1138 case -2:
1139 Py_RETURN_NONE;
1140 case -1:
1141 return PyBytes_FromStringAndSize(nullid, 20);
1142 }
1143
1144 fullnode = index_node(self, rev);
1145 if (fullnode == NULL) {
1146 PyErr_Format(PyExc_IndexError,
1147 "could not access rev %d", rev);
1148 return NULL;
1149 }
1150 return PyBytes_FromStringAndSize(fullnode, 20);
1151 }
1152
1153 static PyObject *index_m_get(indexObject *self, PyObject *args)
1154 {
1155 Py_ssize_t nodelen;
1156 PyObject *val;
1157 char *node;
1158 int rev;
1159
1160 if (!PyArg_ParseTuple(args, "O", &val))
1161 return NULL;
1162 if (node_check(val, &node, &nodelen) == -1)
1163 return NULL;
1164 rev = index_find_node(self, node, nodelen);
1165 if (rev == -3)
1166 return NULL;
1167 if (rev == -2)
1168 Py_RETURN_NONE;
1169 return PyInt_FromLong(rev);
1170 }
1171
1172 static int index_contains(indexObject *self, PyObject *value)
1173 {
1174 char *node;
1175 Py_ssize_t nodelen;
1176
1177 if (PyInt_Check(value)) {
1178 long rev = PyInt_AS_LONG(value);
1179 return rev >= -1 && rev < index_length(self);
1180 }
1181
1182 if (node_check(value, &node, &nodelen) == -1)
1183 return -1;
1184
1185 switch (index_find_node(self, node, nodelen)) {
1186 case -3:
1187 return -1;
1188 case -2:
1189 return 0;
1190 default:
1191 return 1;
1192 }
1193 }
1194
1195 typedef uint64_t bitmask;
1196
1197 /*
1198 * Given a disjoint set of revs, return all candidates for the
1199 * greatest common ancestor. In revset notation, this is the set
1200 * "heads(::a and ::b and ...)"
1201 */
1202 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1203 int revcount)
1204 {
1205 const bitmask allseen = (1ull << revcount) - 1;
1206 const bitmask poison = 1ull << revcount;
1207 PyObject *gca = PyList_New(0);
1208 int i, v, interesting;
1209 int maxrev = -1;
1210 bitmask sp;
1211 bitmask *seen;
1212
1213 if (gca == NULL)
1214 return PyErr_NoMemory();
1215
1216 for (i = 0; i < revcount; i++) {
1217 if (revs[i] > maxrev)
1218 maxrev = revs[i];
1219 }
1220
1221 seen = calloc(sizeof(*seen), maxrev + 1);
1222 if (seen == NULL) {
1223 Py_DECREF(gca);
1224 return PyErr_NoMemory();
1225 }
1226
1227 for (i = 0; i < revcount; i++)
1228 seen[revs[i]] = 1ull << i;
1229
1230 interesting = revcount;
1231
1232 for (v = maxrev; v >= 0 && interesting; v--) {
1233 bitmask sv = seen[v];
1234 int parents[2];
1235
1236 if (!sv)
1237 continue;
1238
1239 if (sv < poison) {
1240 interesting -= 1;
1241 if (sv == allseen) {
1242 PyObject *obj = PyInt_FromLong(v);
1243 if (obj == NULL)
1244 goto bail;
1245 if (PyList_Append(gca, obj) == -1) {
1246 Py_DECREF(obj);
1247 goto bail;
1248 }
1249 sv |= poison;
1250 for (i = 0; i < revcount; i++) {
1251 if (revs[i] == v)
1252 goto done;
1253 }
1254 }
1255 }
1256 if (index_get_parents(self, v, parents, maxrev) < 0)
1257 goto bail;
1258
1259 for (i = 0; i < 2; i++) {
1260 int p = parents[i];
1261 if (p == -1)
1262 continue;
1263 sp = seen[p];
1264 if (sv < poison) {
1265 if (sp == 0) {
1266 seen[p] = sv;
1267 interesting++;
1268 }
1269 else if (sp != sv)
1270 seen[p] |= sv;
1271 } else {
1272 if (sp && sp < poison)
1273 interesting--;
1274 seen[p] = sv;
1275 }
1276 }
1277 }
1278
1279 done:
1280 free(seen);
1281 return gca;
1282 bail:
1283 free(seen);
1284 Py_XDECREF(gca);
1285 return NULL;
1286 }
1287
1288 /*
1289 * Given a disjoint set of revs, return the subset with the longest
1290 * path to the root.
1291 */
1292 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1293 {
1294 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1295 static const Py_ssize_t capacity = 24;
1296 int *depth, *interesting = NULL;
1297 int i, j, v, ninteresting;
1298 PyObject *dict = NULL, *keys = NULL;
1299 long *seen = NULL;
1300 int maxrev = -1;
1301 long final;
1302
1303 if (revcount > capacity) {
1304 PyErr_Format(PyExc_OverflowError,
1305 "bitset size (%ld) > capacity (%ld)",
1306 (long)revcount, (long)capacity);
1307 return NULL;
1308 }
1309
1310 for (i = 0; i < revcount; i++) {
1311 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1312 if (n > maxrev)
1313 maxrev = n;
1314 }
1315
1316 depth = calloc(sizeof(*depth), maxrev + 1);
1317 if (depth == NULL)
1318 return PyErr_NoMemory();
1319
1320 seen = calloc(sizeof(*seen), maxrev + 1);
1321 if (seen == NULL) {
1322 PyErr_NoMemory();
1323 goto bail;
1324 }
1325
1326 interesting = calloc(sizeof(*interesting), 2 << revcount);
1327 if (interesting == NULL) {
1328 PyErr_NoMemory();
1329 goto bail;
1330 }
1331
1332 if (PyList_Sort(revs) == -1)
1333 goto bail;
1334
1335 for (i = 0; i < revcount; i++) {
1336 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1337 long b = 1l << i;
1338 depth[n] = 1;
1339 seen[n] = b;
1340 interesting[b] = 1;
1341 }
1342
1343 ninteresting = (int)revcount;
1344
1345 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1346 int dv = depth[v];
1347 int parents[2];
1348 long sv;
1349
1350 if (dv == 0)
1351 continue;
1352
1353 sv = seen[v];
1354 if (index_get_parents(self, v, parents, maxrev) < 0)
1355 goto bail;
1356
1357 for (i = 0; i < 2; i++) {
1358 int p = parents[i];
1359 long sp;
1360 int dp;
1361
1362 if (p == -1)
1363 continue;
1364
1365 dp = depth[p];
1366 sp = seen[p];
1367 if (dp <= dv) {
1368 depth[p] = dv + 1;
1369 if (sp != sv) {
1370 interesting[sv] += 1;
1371 seen[p] = sv;
1372 if (sp) {
1373 interesting[sp] -= 1;
1374 if (interesting[sp] == 0)
1375 ninteresting -= 1;
1376 }
1377 }
1378 }
1379 else if (dv == dp - 1) {
1380 long nsp = sp | sv;
1381 if (nsp == sp)
1382 continue;
1383 seen[p] = nsp;
1384 interesting[sp] -= 1;
1385 if (interesting[sp] == 0 && interesting[nsp] > 0)
1386 ninteresting -= 1;
1387 interesting[nsp] += 1;
1388 }
1389 }
1390 interesting[sv] -= 1;
1391 if (interesting[sv] == 0)
1392 ninteresting -= 1;
1393 }
1394
1395 final = 0;
1396 j = ninteresting;
1397 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1398 if (interesting[i] == 0)
1399 continue;
1400 final |= i;
1401 j -= 1;
1402 }
1403 if (final == 0) {
1404 keys = PyList_New(0);
1405 goto bail;
1406 }
1407
1408 dict = PyDict_New();
1409 if (dict == NULL)
1410 goto bail;
1411
1412 for (i = 0; i < revcount; i++) {
1413 PyObject *key;
1414
1415 if ((final & (1 << i)) == 0)
1416 continue;
1417
1418 key = PyList_GET_ITEM(revs, i);
1419 Py_INCREF(key);
1420 Py_INCREF(Py_None);
1421 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1422 Py_DECREF(key);
1423 Py_DECREF(Py_None);
1424 goto bail;
1425 }
1426 }
1427
1428 keys = PyDict_Keys(dict);
1429
1430 bail:
1431 free(depth);
1432 free(seen);
1433 free(interesting);
1434 Py_XDECREF(dict);
1435
1436 return keys;
1437 }
1438
1439 /*
1440 * Given a (possibly overlapping) set of revs, return all the
1441 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1442 */
1443 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1444 {
1445 PyObject *ret = NULL;
1446 Py_ssize_t argcount, i, len;
1447 bitmask repeat = 0;
1448 int revcount = 0;
1449 int *revs;
1450
1451 argcount = PySequence_Length(args);
1452 revs = PyMem_Malloc(argcount * sizeof(*revs));
1453 if (argcount > 0 && revs == NULL)
1454 return PyErr_NoMemory();
1455 len = index_length(self) - 1;
1456
1457 for (i = 0; i < argcount; i++) {
1458 static const int capacity = 24;
1459 PyObject *obj = PySequence_GetItem(args, i);
1460 bitmask x;
1461 long val;
1462
1463 if (!PyInt_Check(obj)) {
1464 PyErr_SetString(PyExc_TypeError,
1465 "arguments must all be ints");
1466 Py_DECREF(obj);
1467 goto bail;
1468 }
1469 val = PyInt_AsLong(obj);
1470 Py_DECREF(obj);
1471 if (val == -1) {
1472 ret = PyList_New(0);
1473 goto done;
1474 }
1475 if (val < 0 || val >= len) {
1476 PyErr_SetString(PyExc_IndexError,
1477 "index out of range");
1478 goto bail;
1479 }
1480 /* this cheesy bloom filter lets us avoid some more
1481 * expensive duplicate checks in the common set-is-disjoint
1482 * case */
1483 x = 1ull << (val & 0x3f);
1484 if (repeat & x) {
1485 int k;
1486 for (k = 0; k < revcount; k++) {
1487 if (val == revs[k])
1488 goto duplicate;
1489 }
1490 }
1491 else repeat |= x;
1492 if (revcount >= capacity) {
1493 PyErr_Format(PyExc_OverflowError,
1494 "bitset size (%d) > capacity (%d)",
1495 revcount, capacity);
1496 goto bail;
1497 }
1498 revs[revcount++] = (int)val;
1499 duplicate:;
1500 }
1501
1502 if (revcount == 0) {
1503 ret = PyList_New(0);
1504 goto done;
1505 }
1506 if (revcount == 1) {
1507 PyObject *obj;
1508 ret = PyList_New(1);
1509 if (ret == NULL)
1510 goto bail;
1511 obj = PyInt_FromLong(revs[0]);
1512 if (obj == NULL)
1513 goto bail;
1514 PyList_SET_ITEM(ret, 0, obj);
1515 goto done;
1516 }
1517
1518 ret = find_gca_candidates(self, revs, revcount);
1519 if (ret == NULL)
1520 goto bail;
1521
1522 done:
1523 PyMem_Free(revs);
1524 return ret;
1525
1526 bail:
1527 PyMem_Free(revs);
1528 Py_XDECREF(ret);
1529 return NULL;
1530 }
1531
1532 /*
1533 * Given a (possibly overlapping) set of revs, return the greatest
1534 * common ancestors: those with the longest path to the root.
1535 */
1536 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1537 {
1538 PyObject *ret;
1539 PyObject *gca = index_commonancestorsheads(self, args);
1540 if (gca == NULL)
1541 return NULL;
1542
1543 if (PyList_GET_SIZE(gca) <= 1) {
1544 return gca;
1545 }
1546
1547 ret = find_deepest(self, gca);
1548 Py_DECREF(gca);
1549 return ret;
1550 }
1551
1552 /*
1553 * Invalidate any trie entries introduced by added revs.
1554 */
1555 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1556 {
1557 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1558
1559 for (i = start; i < len; i++) {
1560 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1561 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1562
1563 nt_insert(self, PyBytes_AS_STRING(node), -1);
1564 }
1565
1566 if (start == 0)
1567 Py_CLEAR(self->added);
1568 }
1569
1570 /*
1571 * Delete a numeric range of revs, which must be at the end of the
1572 * range, but exclude the sentinel nullid entry.
1573 */
1574 static int index_slice_del(indexObject *self, PyObject *item)
1575 {
1576 Py_ssize_t start, stop, step, slicelength;
1577 Py_ssize_t length = index_length(self);
1578 int ret = 0;
1579
1580 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1581 #ifdef IS_PY3K
1582 if (PySlice_GetIndicesEx(item, length,
1583 #else
1584 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1585 #endif
1586 &start, &stop, &step, &slicelength) < 0)
1587 return -1;
1588
1589 if (slicelength <= 0)
1590 return 0;
1591
1592 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1593 stop = start;
1594
1595 if (step < 0) {
1596 stop = start + 1;
1597 start = stop + step*(slicelength - 1) - 1;
1598 step = -step;
1599 }
1600
1601 if (step != 1) {
1602 PyErr_SetString(PyExc_ValueError,
1603 "revlog index delete requires step size of 1");
1604 return -1;
1605 }
1606
1607 if (stop != length - 1) {
1608 PyErr_SetString(PyExc_IndexError,
1609 "revlog index deletion indices are invalid");
1610 return -1;
1611 }
1612
1613 if (start < self->length - 1) {
1614 if (self->nt) {
1615 Py_ssize_t i;
1616
1617 for (i = start + 1; i < self->length - 1; i++) {
1618 const char *node = index_node(self, i);
1619
1620 if (node)
1621 nt_insert(self, node, -1);
1622 }
1623 if (self->added)
1624 nt_invalidate_added(self, 0);
1625 if (self->ntrev > start)
1626 self->ntrev = (int)start;
1627 }
1628 self->length = start + 1;
1629 if (start < self->raw_length) {
1630 if (self->cache) {
1631 Py_ssize_t i;
1632 for (i = start; i < self->raw_length; i++)
1633 Py_CLEAR(self->cache[i]);
1634 }
1635 self->raw_length = start;
1636 }
1637 goto done;
1638 }
1639
1640 if (self->nt) {
1641 nt_invalidate_added(self, start - self->length + 1);
1642 if (self->ntrev > start)
1643 self->ntrev = (int)start;
1644 }
1645 if (self->added)
1646 ret = PyList_SetSlice(self->added, start - self->length + 1,
1647 PyList_GET_SIZE(self->added), NULL);
1648 done:
1649 Py_CLEAR(self->headrevs);
1650 return ret;
1651 }
1652
1653 /*
1654 * Supported ops:
1655 *
1656 * slice deletion
1657 * string assignment (extend node->rev mapping)
1658 * string deletion (shrink node->rev mapping)
1659 */
1660 static int index_assign_subscript(indexObject *self, PyObject *item,
1661 PyObject *value)
1662 {
1663 char *node;
1664 Py_ssize_t nodelen;
1665 long rev;
1666
1667 if (PySlice_Check(item) && value == NULL)
1668 return index_slice_del(self, item);
1669
1670 if (node_check(item, &node, &nodelen) == -1)
1671 return -1;
1672
1673 if (value == NULL)
1674 return self->nt ? nt_insert(self, node, -1) : 0;
1675 rev = PyInt_AsLong(value);
1676 if (rev > INT_MAX || rev < 0) {
1677 if (!PyErr_Occurred())
1678 PyErr_SetString(PyExc_ValueError, "rev out of range");
1679 return -1;
1680 }
1681
1682 if (nt_init(self) == -1)
1683 return -1;
1684 return nt_insert(self, node, (int)rev);
1685 }
1686
1687 /*
1688 * Find all RevlogNG entries in an index that has inline data. Update
1689 * the optional "offsets" table with those entries.
1690 */
1691 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1692 {
1693 const char *data = (const char *)self->buf.buf;
1694 Py_ssize_t pos = 0;
1695 Py_ssize_t end = self->buf.len;
1696 long incr = v1_hdrsize;
1697 Py_ssize_t len = 0;
1698
1699 while (pos + v1_hdrsize <= end && pos >= 0) {
1700 uint32_t comp_len;
1701 /* 3rd element of header is length of compressed inline data */
1702 comp_len = getbe32(data + pos + 8);
1703 incr = v1_hdrsize + comp_len;
1704 if (offsets)
1705 offsets[len] = data + pos;
1706 len++;
1707 pos += incr;
1708 }
1709
1710 if (pos != end) {
1711 if (!PyErr_Occurred())
1712 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1713 return -1;
1714 }
1715
1716 return len;
1717 }
1718
1719 static int index_init(indexObject *self, PyObject *args)
1720 {
1721 PyObject *data_obj, *inlined_obj;
1722 Py_ssize_t size;
1723
1724 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1725 self->raw_length = 0;
1726 self->added = NULL;
1727 self->cache = NULL;
1728 self->data = NULL;
1729 memset(&self->buf, 0, sizeof(self->buf));
1730 self->headrevs = NULL;
1731 self->filteredrevs = Py_None;
1732 Py_INCREF(Py_None);
1733 self->nt = NULL;
1734 self->offsets = NULL;
1735
1736 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1737 return -1;
1738 if (!PyObject_CheckBuffer(data_obj)) {
1739 PyErr_SetString(PyExc_TypeError,
1740 "data does not support buffer interface");
1741 return -1;
1742 }
1743
1744 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1745 return -1;
1746 size = self->buf.len;
1747
1748 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1749 self->data = data_obj;
1750
1751 self->ntlength = self->ntcapacity = 0;
1752 self->ntdepth = self->ntsplits = 0;
1753 self->ntlookups = self->ntmisses = 0;
1754 self->ntrev = -1;
1755 Py_INCREF(self->data);
1756
1757 if (self->inlined) {
1758 Py_ssize_t len = inline_scan(self, NULL);
1759 if (len == -1)
1760 goto bail;
1761 self->raw_length = len;
1762 self->length = len + 1;
1763 } else {
1764 if (size % v1_hdrsize) {
1765 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1766 goto bail;
1767 }
1768 self->raw_length = size / v1_hdrsize;
1769 self->length = self->raw_length + 1;
1770 }
1771
1772 return 0;
1773 bail:
1774 return -1;
1775 }
1776
1777 static PyObject *index_nodemap(indexObject *self)
1778 {
1779 Py_INCREF(self);
1780 return (PyObject *)self;
1781 }
1782
1783 static void index_dealloc(indexObject *self)
1784 {
1785 _index_clearcaches(self);
1786 Py_XDECREF(self->filteredrevs);
1787 if (self->buf.buf) {
1788 PyBuffer_Release(&self->buf);
1789 memset(&self->buf, 0, sizeof(self->buf));
1790 }
1791 Py_XDECREF(self->data);
1792 Py_XDECREF(self->added);
1793 PyObject_Del(self);
1794 }
1795
1796 static PySequenceMethods index_sequence_methods = {
1797 (lenfunc)index_length, /* sq_length */
1798 0, /* sq_concat */
1799 0, /* sq_repeat */
1800 (ssizeargfunc)index_get, /* sq_item */
1801 0, /* sq_slice */
1802 0, /* sq_ass_item */
1803 0, /* sq_ass_slice */
1804 (objobjproc)index_contains, /* sq_contains */
1805 };
1806
1807 static PyMappingMethods index_mapping_methods = {
1808 (lenfunc)index_length, /* mp_length */
1809 (binaryfunc)index_getitem, /* mp_subscript */
1810 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1811 };
1812
1813 static PyMethodDef index_methods[] = {
1814 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1815 "return the gca set of the given revs"},
1816 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1817 METH_VARARGS,
1818 "return the heads of the common ancestors of the given revs"},
1819 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1820 "clear the index caches"},
1821 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1822 "get an index entry"},
1823 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1824 METH_VARARGS, "compute phases"},
1825 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1826 "reachableroots"},
1827 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1828 "get head revisions"}, /* Can do filtering since 3.2 */
1829 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1830 "get filtered head revisions"}, /* Can always do filtering */
1831 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1832 "insert an index entry"},
1833 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1834 "match a potentially ambiguous node ID"},
1835 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1836 "stats for the index"},
1837 {NULL} /* Sentinel */
1838 };
1839
1840 static PyGetSetDef index_getset[] = {
1841 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1842 {NULL} /* Sentinel */
1843 };
1844
1845 static PyTypeObject indexType = {
1846 PyVarObject_HEAD_INIT(NULL, 0)
1847 "parsers.index", /* tp_name */
1848 sizeof(indexObject), /* tp_basicsize */
1849 0, /* tp_itemsize */
1850 (destructor)index_dealloc, /* tp_dealloc */
1851 0, /* tp_print */
1852 0, /* tp_getattr */
1853 0, /* tp_setattr */
1854 0, /* tp_compare */
1855 0, /* tp_repr */
1856 0, /* tp_as_number */
1857 &index_sequence_methods, /* tp_as_sequence */
1858 &index_mapping_methods, /* tp_as_mapping */
1859 0, /* tp_hash */
1860 0, /* tp_call */
1861 0, /* tp_str */
1862 0, /* tp_getattro */
1863 0, /* tp_setattro */
1864 0, /* tp_as_buffer */
1865 Py_TPFLAGS_DEFAULT, /* tp_flags */
1866 "revlog index", /* tp_doc */
1867 0, /* tp_traverse */
1868 0, /* tp_clear */
1869 0, /* tp_richcompare */
1870 0, /* tp_weaklistoffset */
1871 0, /* tp_iter */
1872 0, /* tp_iternext */
1873 index_methods, /* tp_methods */
1874 0, /* tp_members */
1875 index_getset, /* tp_getset */
1876 0, /* tp_base */
1877 0, /* tp_dict */
1878 0, /* tp_descr_get */
1879 0, /* tp_descr_set */
1880 0, /* tp_dictoffset */
1881 (initproc)index_init, /* tp_init */
1882 0, /* tp_alloc */
1883 };
1884
1885 /*
1886 * returns a tuple of the form (index, index, cache) with elements as
1887 * follows:
1888 *
1889 * index: an index object that lazily parses RevlogNG records
1890 * cache: if data is inlined, a tuple (0, index_file_content), else None
1891 * index_file_content could be a string, or a buffer
1892 *
1893 * added complications are for backwards compatibility
1894 */
1895 PyObject *parse_index2(PyObject *self, PyObject *args)
1896 {
1897 PyObject *tuple = NULL, *cache = NULL;
1898 indexObject *idx;
1899 int ret;
1900
1901 idx = PyObject_New(indexObject, &indexType);
1902 if (idx == NULL)
1903 goto bail;
1904
1905 ret = index_init(idx, args);
1906 if (ret == -1)
1907 goto bail;
1908
1909 if (idx->inlined) {
1910 cache = Py_BuildValue("iO", 0, idx->data);
1911 if (cache == NULL)
1912 goto bail;
1913 } else {
1914 cache = Py_None;
1915 Py_INCREF(cache);
1916 }
1917
1918 tuple = Py_BuildValue("NN", idx, cache);
1919 if (!tuple)
1920 goto bail;
1921 return tuple;
1922
1923 bail:
1924 Py_XDECREF(idx);
1925 Py_XDECREF(cache);
1926 Py_XDECREF(tuple);
1927 return NULL;
1928 }
1929
1930 void revlog_module_init(PyObject *mod)
1931 {
1932 indexType.tp_new = PyType_GenericNew;
1933 if (PyType_Ready(&indexType) < 0 ||
1934 PyType_Ready(&dirstateTupleType) < 0)
1935 return;
1936 Py_INCREF(&indexType);
1937 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1938
1939 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1940 -1, -1, -1, -1, nullid, 20);
1941 if (nullentry)
1942 PyObject_GC_UnTrack(nullentry);
1943 }