Mercurial > hg
comparison mercurial/cext/revlog.c @ 32378:7d0c69505a66
cext: extract revlog/index parsing code to own C file
parsers.c is ~3000 lines and ~2/3 of it is related to the revlog
index type.
We already have separate C source files for directory utilities
and manifest parsing. I think the quite unwieldy revlog/index
parsing code should be self-contained as well.
I performed the extraction as a file copy then removed content
from both sides in order to preserve file history and blame.
As part of this, I also had to move the hexdigit table and
function to a shared header since it is used by both parsers.c
and revlog.c
# no-check-commit
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 20 May 2017 14:01:05 -0700 |
parents | mercurial/cext/parsers.c@df448de7cf3b |
children | 2e5a476b2e46 |
comparison
equal
deleted
inserted
replaced
32377:c942c83ac2ec | 32378:7d0c69505a66 |
---|---|
1 /* | |
2 parsers.c - efficient content parsing | |
3 | |
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others | |
5 | |
6 This software may be used and distributed according to the terms of | |
7 the GNU General Public License, incorporated herein by reference. | |
8 */ | |
9 | |
10 #include <Python.h> | |
11 #include <ctype.h> | |
12 #include <stddef.h> | |
13 #include <string.h> | |
14 | |
15 #include "util.h" | |
16 #include "bitmanipulation.h" | |
17 | |
18 #ifdef IS_PY3K | |
19 /* The mapping of Python types is meant to be temporary to get Python | |
20 * 3 to compile. We should remove this once Python 3 support is fully | |
21 * supported and proper types are used in the extensions themselves. */ | |
22 #define PyInt_Check PyLong_Check | |
23 #define PyInt_FromLong PyLong_FromLong | |
24 #define PyInt_FromSsize_t PyLong_FromSsize_t | |
25 #define PyInt_AS_LONG PyLong_AS_LONG | |
26 #define PyInt_AsLong PyLong_AsLong | |
27 #endif | |
28 | |
29 /* | |
30 * A base-16 trie for fast node->rev mapping. | |
31 * | |
32 * Positive value is index of the next node in the trie | |
33 * Negative value is a leaf: -(rev + 1) | |
34 * Zero is empty | |
35 */ | |
36 typedef struct { | |
37 int children[16]; | |
38 } nodetree; | |
39 | |
40 /* | |
41 * This class has two behaviors. | |
42 * | |
43 * When used in a list-like way (with integer keys), we decode an | |
44 * entry in a RevlogNG index file on demand. Our last entry is a | |
45 * sentinel, always a nullid. We have limited support for | |
46 * integer-keyed insert and delete, only at elements right before the | |
47 * sentinel. | |
48 * | |
49 * With string keys, we lazily perform a reverse mapping from node to | |
50 * rev, using a base-16 trie. | |
51 */ | |
52 typedef struct { | |
53 PyObject_HEAD | |
54 /* Type-specific fields go here. */ | |
55 PyObject *data; /* raw bytes of index */ | |
56 Py_buffer buf; /* buffer of data */ | |
57 PyObject **cache; /* cached tuples */ | |
58 const char **offsets; /* populated on demand */ | |
59 Py_ssize_t raw_length; /* original number of elements */ | |
60 Py_ssize_t length; /* current number of elements */ | |
61 PyObject *added; /* populated on demand */ | |
62 PyObject *headrevs; /* cache, invalidated on changes */ | |
63 PyObject *filteredrevs;/* filtered revs set */ | |
64 nodetree *nt; /* base-16 trie */ | |
65 unsigned ntlength; /* # nodes in use */ | |
66 unsigned ntcapacity; /* # nodes allocated */ | |
67 int ntdepth; /* maximum depth of tree */ | |
68 int ntsplits; /* # splits performed */ | |
69 int ntrev; /* last rev scanned */ | |
70 int ntlookups; /* # lookups */ | |
71 int ntmisses; /* # lookups that miss the cache */ | |
72 int inlined; | |
73 } indexObject; | |
74 | |
75 static Py_ssize_t index_length(const indexObject *self) | |
76 { | |
77 if (self->added == NULL) | |
78 return self->length; | |
79 return self->length + PyList_GET_SIZE(self->added); | |
80 } | |
81 | |
82 static PyObject *nullentry; | |
83 static const char nullid[20]; | |
84 | |
85 static Py_ssize_t inline_scan(indexObject *self, const char **offsets); | |
86 | |
87 #if LONG_MAX == 0x7fffffffL | |
88 static char *tuple_format = "Kiiiiiis#"; | |
89 #else | |
90 static char *tuple_format = "kiiiiiis#"; | |
91 #endif | |
92 | |
93 /* A RevlogNG v1 index entry is 64 bytes long. */ | |
94 static const long v1_hdrsize = 64; | |
95 | |
96 /* | |
97 * Return a pointer to the beginning of a RevlogNG record. | |
98 */ | |
99 static const char *index_deref(indexObject *self, Py_ssize_t pos) | |
100 { | |
101 if (self->inlined && pos > 0) { | |
102 if (self->offsets == NULL) { | |
103 self->offsets = PyMem_Malloc(self->raw_length * | |
104 sizeof(*self->offsets)); | |
105 if (self->offsets == NULL) | |
106 return (const char *)PyErr_NoMemory(); | |
107 inline_scan(self, self->offsets); | |
108 } | |
109 return self->offsets[pos]; | |
110 } | |
111 | |
112 return (const char *)(self->buf.buf) + pos * v1_hdrsize; | |
113 } | |
114 | |
115 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, | |
116 int *ps, int maxrev) | |
117 { | |
118 if (rev >= self->length - 1) { | |
119 PyObject *tuple = PyList_GET_ITEM(self->added, | |
120 rev - self->length + 1); | |
121 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); | |
122 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); | |
123 } else { | |
124 const char *data = index_deref(self, rev); | |
125 ps[0] = getbe32(data + 24); | |
126 ps[1] = getbe32(data + 28); | |
127 } | |
128 /* If index file is corrupted, ps[] may point to invalid revisions. So | |
129 * there is a risk of buffer overflow to trust them unconditionally. */ | |
130 if (ps[0] > maxrev || ps[1] > maxrev) { | |
131 PyErr_SetString(PyExc_ValueError, "parent out of range"); | |
132 return -1; | |
133 } | |
134 return 0; | |
135 } | |
136 | |
137 | |
138 /* | |
139 * RevlogNG format (all in big endian, data may be inlined): | |
140 * 6 bytes: offset | |
141 * 2 bytes: flags | |
142 * 4 bytes: compressed length | |
143 * 4 bytes: uncompressed length | |
144 * 4 bytes: base revision | |
145 * 4 bytes: link revision | |
146 * 4 bytes: parent 1 revision | |
147 * 4 bytes: parent 2 revision | |
148 * 32 bytes: nodeid (only 20 bytes used) | |
149 */ | |
150 static PyObject *index_get(indexObject *self, Py_ssize_t pos) | |
151 { | |
152 uint64_t offset_flags; | |
153 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; | |
154 const char *c_node_id; | |
155 const char *data; | |
156 Py_ssize_t length = index_length(self); | |
157 PyObject *entry; | |
158 | |
159 if (pos < 0) | |
160 pos += length; | |
161 | |
162 if (pos < 0 || pos >= length) { | |
163 PyErr_SetString(PyExc_IndexError, "revlog index out of range"); | |
164 return NULL; | |
165 } | |
166 | |
167 if (pos == length - 1) { | |
168 Py_INCREF(nullentry); | |
169 return nullentry; | |
170 } | |
171 | |
172 if (pos >= self->length - 1) { | |
173 PyObject *obj; | |
174 obj = PyList_GET_ITEM(self->added, pos - self->length + 1); | |
175 Py_INCREF(obj); | |
176 return obj; | |
177 } | |
178 | |
179 if (self->cache) { | |
180 if (self->cache[pos]) { | |
181 Py_INCREF(self->cache[pos]); | |
182 return self->cache[pos]; | |
183 } | |
184 } else { | |
185 self->cache = calloc(self->raw_length, sizeof(PyObject *)); | |
186 if (self->cache == NULL) | |
187 return PyErr_NoMemory(); | |
188 } | |
189 | |
190 data = index_deref(self, pos); | |
191 if (data == NULL) | |
192 return NULL; | |
193 | |
194 offset_flags = getbe32(data + 4); | |
195 if (pos == 0) /* mask out version number for the first entry */ | |
196 offset_flags &= 0xFFFF; | |
197 else { | |
198 uint32_t offset_high = getbe32(data); | |
199 offset_flags |= ((uint64_t)offset_high) << 32; | |
200 } | |
201 | |
202 comp_len = getbe32(data + 8); | |
203 uncomp_len = getbe32(data + 12); | |
204 base_rev = getbe32(data + 16); | |
205 link_rev = getbe32(data + 20); | |
206 parent_1 = getbe32(data + 24); | |
207 parent_2 = getbe32(data + 28); | |
208 c_node_id = data + 32; | |
209 | |
210 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, | |
211 uncomp_len, base_rev, link_rev, | |
212 parent_1, parent_2, c_node_id, 20); | |
213 | |
214 if (entry) { | |
215 PyObject_GC_UnTrack(entry); | |
216 Py_INCREF(entry); | |
217 } | |
218 | |
219 self->cache[pos] = entry; | |
220 | |
221 return entry; | |
222 } | |
223 | |
224 /* | |
225 * Return the 20-byte SHA of the node corresponding to the given rev. | |
226 */ | |
227 static const char *index_node(indexObject *self, Py_ssize_t pos) | |
228 { | |
229 Py_ssize_t length = index_length(self); | |
230 const char *data; | |
231 | |
232 if (pos == length - 1 || pos == INT_MAX) | |
233 return nullid; | |
234 | |
235 if (pos >= length) | |
236 return NULL; | |
237 | |
238 if (pos >= self->length - 1) { | |
239 PyObject *tuple, *str; | |
240 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); | |
241 str = PyTuple_GetItem(tuple, 7); | |
242 return str ? PyBytes_AS_STRING(str) : NULL; | |
243 } | |
244 | |
245 data = index_deref(self, pos); | |
246 return data ? data + 32 : NULL; | |
247 } | |
248 | |
249 static int nt_insert(indexObject *self, const char *node, int rev); | |
250 | |
251 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) | |
252 { | |
253 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1) | |
254 return -1; | |
255 if (*nodelen == 20) | |
256 return 0; | |
257 PyErr_SetString(PyExc_ValueError, "20-byte hash required"); | |
258 return -1; | |
259 } | |
260 | |
261 static PyObject *index_insert(indexObject *self, PyObject *args) | |
262 { | |
263 PyObject *obj; | |
264 char *node; | |
265 int index; | |
266 Py_ssize_t len, nodelen; | |
267 | |
268 if (!PyArg_ParseTuple(args, "iO", &index, &obj)) | |
269 return NULL; | |
270 | |
271 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { | |
272 PyErr_SetString(PyExc_TypeError, "8-tuple required"); | |
273 return NULL; | |
274 } | |
275 | |
276 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) | |
277 return NULL; | |
278 | |
279 len = index_length(self); | |
280 | |
281 if (index < 0) | |
282 index += len; | |
283 | |
284 if (index != len - 1) { | |
285 PyErr_SetString(PyExc_IndexError, | |
286 "insert only supported at index -1"); | |
287 return NULL; | |
288 } | |
289 | |
290 if (self->added == NULL) { | |
291 self->added = PyList_New(0); | |
292 if (self->added == NULL) | |
293 return NULL; | |
294 } | |
295 | |
296 if (PyList_Append(self->added, obj) == -1) | |
297 return NULL; | |
298 | |
299 if (self->nt) | |
300 nt_insert(self, node, index); | |
301 | |
302 Py_CLEAR(self->headrevs); | |
303 Py_RETURN_NONE; | |
304 } | |
305 | |
306 static void _index_clearcaches(indexObject *self) | |
307 { | |
308 if (self->cache) { | |
309 Py_ssize_t i; | |
310 | |
311 for (i = 0; i < self->raw_length; i++) | |
312 Py_CLEAR(self->cache[i]); | |
313 free(self->cache); | |
314 self->cache = NULL; | |
315 } | |
316 if (self->offsets) { | |
317 PyMem_Free(self->offsets); | |
318 self->offsets = NULL; | |
319 } | |
320 if (self->nt) { | |
321 free(self->nt); | |
322 self->nt = NULL; | |
323 } | |
324 Py_CLEAR(self->headrevs); | |
325 } | |
326 | |
327 static PyObject *index_clearcaches(indexObject *self) | |
328 { | |
329 _index_clearcaches(self); | |
330 self->ntlength = self->ntcapacity = 0; | |
331 self->ntdepth = self->ntsplits = 0; | |
332 self->ntrev = -1; | |
333 self->ntlookups = self->ntmisses = 0; | |
334 Py_RETURN_NONE; | |
335 } | |
336 | |
337 static PyObject *index_stats(indexObject *self) | |
338 { | |
339 PyObject *obj = PyDict_New(); | |
340 PyObject *t = NULL; | |
341 | |
342 if (obj == NULL) | |
343 return NULL; | |
344 | |
345 #define istat(__n, __d) \ | |
346 do { \ | |
347 t = PyInt_FromSsize_t(self->__n); \ | |
348 if (!t) \ | |
349 goto bail; \ | |
350 if (PyDict_SetItemString(obj, __d, t) == -1) \ | |
351 goto bail; \ | |
352 Py_DECREF(t); \ | |
353 } while (0) | |
354 | |
355 if (self->added) { | |
356 Py_ssize_t len = PyList_GET_SIZE(self->added); | |
357 t = PyInt_FromSsize_t(len); | |
358 if (!t) | |
359 goto bail; | |
360 if (PyDict_SetItemString(obj, "index entries added", t) == -1) | |
361 goto bail; | |
362 Py_DECREF(t); | |
363 } | |
364 | |
365 if (self->raw_length != self->length - 1) | |
366 istat(raw_length, "revs on disk"); | |
367 istat(length, "revs in memory"); | |
368 istat(ntcapacity, "node trie capacity"); | |
369 istat(ntdepth, "node trie depth"); | |
370 istat(ntlength, "node trie count"); | |
371 istat(ntlookups, "node trie lookups"); | |
372 istat(ntmisses, "node trie misses"); | |
373 istat(ntrev, "node trie last rev scanned"); | |
374 istat(ntsplits, "node trie splits"); | |
375 | |
376 #undef istat | |
377 | |
378 return obj; | |
379 | |
380 bail: | |
381 Py_XDECREF(obj); | |
382 Py_XDECREF(t); | |
383 return NULL; | |
384 } | |
385 | |
386 /* | |
387 * When we cache a list, we want to be sure the caller can't mutate | |
388 * the cached copy. | |
389 */ | |
390 static PyObject *list_copy(PyObject *list) | |
391 { | |
392 Py_ssize_t len = PyList_GET_SIZE(list); | |
393 PyObject *newlist = PyList_New(len); | |
394 Py_ssize_t i; | |
395 | |
396 if (newlist == NULL) | |
397 return NULL; | |
398 | |
399 for (i = 0; i < len; i++) { | |
400 PyObject *obj = PyList_GET_ITEM(list, i); | |
401 Py_INCREF(obj); | |
402 PyList_SET_ITEM(newlist, i, obj); | |
403 } | |
404 | |
405 return newlist; | |
406 } | |
407 | |
408 static int check_filter(PyObject *filter, Py_ssize_t arg) { | |
409 if (filter) { | |
410 PyObject *arglist, *result; | |
411 int isfiltered; | |
412 | |
413 arglist = Py_BuildValue("(n)", arg); | |
414 if (!arglist) { | |
415 return -1; | |
416 } | |
417 | |
418 result = PyEval_CallObject(filter, arglist); | |
419 Py_DECREF(arglist); | |
420 if (!result) { | |
421 return -1; | |
422 } | |
423 | |
424 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error, | |
425 * same as this function, so we can just return it directly.*/ | |
426 isfiltered = PyObject_IsTrue(result); | |
427 Py_DECREF(result); | |
428 return isfiltered; | |
429 } else { | |
430 return 0; | |
431 } | |
432 } | |
433 | |
434 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, | |
435 Py_ssize_t marker, char *phases) | |
436 { | |
437 PyObject *iter = NULL; | |
438 PyObject *iter_item = NULL; | |
439 Py_ssize_t min_idx = index_length(self) + 1; | |
440 long iter_item_long; | |
441 | |
442 if (PyList_GET_SIZE(list) != 0) { | |
443 iter = PyObject_GetIter(list); | |
444 if (iter == NULL) | |
445 return -2; | |
446 while ((iter_item = PyIter_Next(iter))) | |
447 { | |
448 iter_item_long = PyInt_AS_LONG(iter_item); | |
449 Py_DECREF(iter_item); | |
450 if (iter_item_long < min_idx) | |
451 min_idx = iter_item_long; | |
452 phases[iter_item_long] = marker; | |
453 } | |
454 Py_DECREF(iter); | |
455 } | |
456 | |
457 return min_idx; | |
458 } | |
459 | |
460 static inline void set_phase_from_parents(char *phases, int parent_1, | |
461 int parent_2, Py_ssize_t i) | |
462 { | |
463 if (parent_1 >= 0 && phases[parent_1] > phases[i]) | |
464 phases[i] = phases[parent_1]; | |
465 if (parent_2 >= 0 && phases[parent_2] > phases[i]) | |
466 phases[i] = phases[parent_2]; | |
467 } | |
468 | |
469 static PyObject *reachableroots2(indexObject *self, PyObject *args) | |
470 { | |
471 | |
472 /* Input */ | |
473 long minroot; | |
474 PyObject *includepatharg = NULL; | |
475 int includepath = 0; | |
476 /* heads and roots are lists */ | |
477 PyObject *heads = NULL; | |
478 PyObject *roots = NULL; | |
479 PyObject *reachable = NULL; | |
480 | |
481 PyObject *val; | |
482 Py_ssize_t len = index_length(self) - 1; | |
483 long revnum; | |
484 Py_ssize_t k; | |
485 Py_ssize_t i; | |
486 Py_ssize_t l; | |
487 int r; | |
488 int parents[2]; | |
489 | |
490 /* Internal data structure: | |
491 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit | |
492 * revstates: array of length len+1 (all revs + nullrev) */ | |
493 int *tovisit = NULL; | |
494 long lentovisit = 0; | |
495 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 }; | |
496 char *revstates = NULL; | |
497 | |
498 /* Get arguments */ | |
499 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads, | |
500 &PyList_Type, &roots, | |
501 &PyBool_Type, &includepatharg)) | |
502 goto bail; | |
503 | |
504 if (includepatharg == Py_True) | |
505 includepath = 1; | |
506 | |
507 /* Initialize return set */ | |
508 reachable = PyList_New(0); | |
509 if (reachable == NULL) | |
510 goto bail; | |
511 | |
512 /* Initialize internal datastructures */ | |
513 tovisit = (int *)malloc((len + 1) * sizeof(int)); | |
514 if (tovisit == NULL) { | |
515 PyErr_NoMemory(); | |
516 goto bail; | |
517 } | |
518 | |
519 revstates = (char *)calloc(len + 1, 1); | |
520 if (revstates == NULL) { | |
521 PyErr_NoMemory(); | |
522 goto bail; | |
523 } | |
524 | |
525 l = PyList_GET_SIZE(roots); | |
526 for (i = 0; i < l; i++) { | |
527 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i)); | |
528 if (revnum == -1 && PyErr_Occurred()) | |
529 goto bail; | |
530 /* If root is out of range, e.g. wdir(), it must be unreachable | |
531 * from heads. So we can just ignore it. */ | |
532 if (revnum + 1 < 0 || revnum + 1 >= len + 1) | |
533 continue; | |
534 revstates[revnum + 1] |= RS_ROOT; | |
535 } | |
536 | |
537 /* Populate tovisit with all the heads */ | |
538 l = PyList_GET_SIZE(heads); | |
539 for (i = 0; i < l; i++) { | |
540 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i)); | |
541 if (revnum == -1 && PyErr_Occurred()) | |
542 goto bail; | |
543 if (revnum + 1 < 0 || revnum + 1 >= len + 1) { | |
544 PyErr_SetString(PyExc_IndexError, "head out of range"); | |
545 goto bail; | |
546 } | |
547 if (!(revstates[revnum + 1] & RS_SEEN)) { | |
548 tovisit[lentovisit++] = (int)revnum; | |
549 revstates[revnum + 1] |= RS_SEEN; | |
550 } | |
551 } | |
552 | |
553 /* Visit the tovisit list and find the reachable roots */ | |
554 k = 0; | |
555 while (k < lentovisit) { | |
556 /* Add the node to reachable if it is a root*/ | |
557 revnum = tovisit[k++]; | |
558 if (revstates[revnum + 1] & RS_ROOT) { | |
559 revstates[revnum + 1] |= RS_REACHABLE; | |
560 val = PyInt_FromLong(revnum); | |
561 if (val == NULL) | |
562 goto bail; | |
563 r = PyList_Append(reachable, val); | |
564 Py_DECREF(val); | |
565 if (r < 0) | |
566 goto bail; | |
567 if (includepath == 0) | |
568 continue; | |
569 } | |
570 | |
571 /* Add its parents to the list of nodes to visit */ | |
572 if (revnum == -1) | |
573 continue; | |
574 r = index_get_parents(self, revnum, parents, (int)len - 1); | |
575 if (r < 0) | |
576 goto bail; | |
577 for (i = 0; i < 2; i++) { | |
578 if (!(revstates[parents[i] + 1] & RS_SEEN) | |
579 && parents[i] >= minroot) { | |
580 tovisit[lentovisit++] = parents[i]; | |
581 revstates[parents[i] + 1] |= RS_SEEN; | |
582 } | |
583 } | |
584 } | |
585 | |
586 /* Find all the nodes in between the roots we found and the heads | |
587 * and add them to the reachable set */ | |
588 if (includepath == 1) { | |
589 long minidx = minroot; | |
590 if (minidx < 0) | |
591 minidx = 0; | |
592 for (i = minidx; i < len; i++) { | |
593 if (!(revstates[i + 1] & RS_SEEN)) | |
594 continue; | |
595 r = index_get_parents(self, i, parents, (int)len - 1); | |
596 /* Corrupted index file, error is set from | |
597 * index_get_parents */ | |
598 if (r < 0) | |
599 goto bail; | |
600 if (((revstates[parents[0] + 1] | | |
601 revstates[parents[1] + 1]) & RS_REACHABLE) | |
602 && !(revstates[i + 1] & RS_REACHABLE)) { | |
603 revstates[i + 1] |= RS_REACHABLE; | |
604 val = PyInt_FromLong(i); | |
605 if (val == NULL) | |
606 goto bail; | |
607 r = PyList_Append(reachable, val); | |
608 Py_DECREF(val); | |
609 if (r < 0) | |
610 goto bail; | |
611 } | |
612 } | |
613 } | |
614 | |
615 free(revstates); | |
616 free(tovisit); | |
617 return reachable; | |
618 bail: | |
619 Py_XDECREF(reachable); | |
620 free(revstates); | |
621 free(tovisit); | |
622 return NULL; | |
623 } | |
624 | |
625 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args) | |
626 { | |
627 PyObject *roots = Py_None; | |
628 PyObject *ret = NULL; | |
629 PyObject *phaseslist = NULL; | |
630 PyObject *phaseroots = NULL; | |
631 PyObject *phaseset = NULL; | |
632 PyObject *phasessetlist = NULL; | |
633 PyObject *rev = NULL; | |
634 Py_ssize_t len = index_length(self) - 1; | |
635 Py_ssize_t numphase = 0; | |
636 Py_ssize_t minrevallphases = 0; | |
637 Py_ssize_t minrevphase = 0; | |
638 Py_ssize_t i = 0; | |
639 char *phases = NULL; | |
640 long phase; | |
641 | |
642 if (!PyArg_ParseTuple(args, "O", &roots)) | |
643 goto done; | |
644 if (roots == NULL || !PyList_Check(roots)) | |
645 goto done; | |
646 | |
647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */ | |
648 if (phases == NULL) { | |
649 PyErr_NoMemory(); | |
650 goto done; | |
651 } | |
652 /* Put the phase information of all the roots in phases */ | |
653 numphase = PyList_GET_SIZE(roots)+1; | |
654 minrevallphases = len + 1; | |
655 phasessetlist = PyList_New(numphase); | |
656 if (phasessetlist == NULL) | |
657 goto done; | |
658 | |
659 PyList_SET_ITEM(phasessetlist, 0, Py_None); | |
660 Py_INCREF(Py_None); | |
661 | |
662 for (i = 0; i < numphase-1; i++) { | |
663 phaseroots = PyList_GET_ITEM(roots, i); | |
664 phaseset = PySet_New(NULL); | |
665 if (phaseset == NULL) | |
666 goto release; | |
667 PyList_SET_ITEM(phasessetlist, i+1, phaseset); | |
668 if (!PyList_Check(phaseroots)) | |
669 goto release; | |
670 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases); | |
671 if (minrevphase == -2) /* Error from add_roots_get_min */ | |
672 goto release; | |
673 minrevallphases = MIN(minrevallphases, minrevphase); | |
674 } | |
675 /* Propagate the phase information from the roots to the revs */ | |
676 if (minrevallphases != -1) { | |
677 int parents[2]; | |
678 for (i = minrevallphases; i < len; i++) { | |
679 if (index_get_parents(self, i, parents, | |
680 (int)len - 1) < 0) | |
681 goto release; | |
682 set_phase_from_parents(phases, parents[0], parents[1], i); | |
683 } | |
684 } | |
685 /* Transform phase list to a python list */ | |
686 phaseslist = PyList_New(len); | |
687 if (phaseslist == NULL) | |
688 goto release; | |
689 for (i = 0; i < len; i++) { | |
690 PyObject *phaseval; | |
691 | |
692 phase = phases[i]; | |
693 /* We only store the sets of phase for non public phase, the public phase | |
694 * is computed as a difference */ | |
695 if (phase != 0) { | |
696 phaseset = PyList_GET_ITEM(phasessetlist, phase); | |
697 rev = PyInt_FromLong(i); | |
698 if (rev == NULL) | |
699 goto release; | |
700 PySet_Add(phaseset, rev); | |
701 Py_XDECREF(rev); | |
702 } | |
703 phaseval = PyInt_FromLong(phase); | |
704 if (phaseval == NULL) | |
705 goto release; | |
706 PyList_SET_ITEM(phaseslist, i, phaseval); | |
707 } | |
708 ret = PyTuple_Pack(2, phaseslist, phasessetlist); | |
709 | |
710 release: | |
711 Py_XDECREF(phaseslist); | |
712 Py_XDECREF(phasessetlist); | |
713 done: | |
714 free(phases); | |
715 return ret; | |
716 } | |
717 | |
718 static PyObject *index_headrevs(indexObject *self, PyObject *args) | |
719 { | |
720 Py_ssize_t i, j, len; | |
721 char *nothead = NULL; | |
722 PyObject *heads = NULL; | |
723 PyObject *filter = NULL; | |
724 PyObject *filteredrevs = Py_None; | |
725 | |
726 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) { | |
727 return NULL; | |
728 } | |
729 | |
730 if (self->headrevs && filteredrevs == self->filteredrevs) | |
731 return list_copy(self->headrevs); | |
732 | |
733 Py_DECREF(self->filteredrevs); | |
734 self->filteredrevs = filteredrevs; | |
735 Py_INCREF(filteredrevs); | |
736 | |
737 if (filteredrevs != Py_None) { | |
738 filter = PyObject_GetAttrString(filteredrevs, "__contains__"); | |
739 if (!filter) { | |
740 PyErr_SetString(PyExc_TypeError, | |
741 "filteredrevs has no attribute __contains__"); | |
742 goto bail; | |
743 } | |
744 } | |
745 | |
746 len = index_length(self) - 1; | |
747 heads = PyList_New(0); | |
748 if (heads == NULL) | |
749 goto bail; | |
750 if (len == 0) { | |
751 PyObject *nullid = PyInt_FromLong(-1); | |
752 if (nullid == NULL || PyList_Append(heads, nullid) == -1) { | |
753 Py_XDECREF(nullid); | |
754 goto bail; | |
755 } | |
756 goto done; | |
757 } | |
758 | |
759 nothead = calloc(len, 1); | |
760 if (nothead == NULL) { | |
761 PyErr_NoMemory(); | |
762 goto bail; | |
763 } | |
764 | |
765 for (i = len - 1; i >= 0; i--) { | |
766 int isfiltered; | |
767 int parents[2]; | |
768 | |
769 /* If nothead[i] == 1, it means we've seen an unfiltered child of this | |
770 * node already, and therefore this node is not filtered. So we can skip | |
771 * the expensive check_filter step. | |
772 */ | |
773 if (nothead[i] != 1) { | |
774 isfiltered = check_filter(filter, i); | |
775 if (isfiltered == -1) { | |
776 PyErr_SetString(PyExc_TypeError, | |
777 "unable to check filter"); | |
778 goto bail; | |
779 } | |
780 | |
781 if (isfiltered) { | |
782 nothead[i] = 1; | |
783 continue; | |
784 } | |
785 } | |
786 | |
787 if (index_get_parents(self, i, parents, (int)len - 1) < 0) | |
788 goto bail; | |
789 for (j = 0; j < 2; j++) { | |
790 if (parents[j] >= 0) | |
791 nothead[parents[j]] = 1; | |
792 } | |
793 } | |
794 | |
795 for (i = 0; i < len; i++) { | |
796 PyObject *head; | |
797 | |
798 if (nothead[i]) | |
799 continue; | |
800 head = PyInt_FromSsize_t(i); | |
801 if (head == NULL || PyList_Append(heads, head) == -1) { | |
802 Py_XDECREF(head); | |
803 goto bail; | |
804 } | |
805 } | |
806 | |
807 done: | |
808 self->headrevs = heads; | |
809 Py_XDECREF(filter); | |
810 free(nothead); | |
811 return list_copy(self->headrevs); | |
812 bail: | |
813 Py_XDECREF(filter); | |
814 Py_XDECREF(heads); | |
815 free(nothead); | |
816 return NULL; | |
817 } | |
818 | |
819 static inline int nt_level(const char *node, Py_ssize_t level) | |
820 { | |
821 int v = node[level>>1]; | |
822 if (!(level & 1)) | |
823 v >>= 4; | |
824 return v & 0xf; | |
825 } | |
826 | |
827 /* | |
828 * Return values: | |
829 * | |
830 * -4: match is ambiguous (multiple candidates) | |
831 * -2: not found | |
832 * rest: valid rev | |
833 */ | |
834 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen, | |
835 int hex) | |
836 { | |
837 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level; | |
838 int level, maxlevel, off; | |
839 | |
840 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) | |
841 return -1; | |
842 | |
843 if (self->nt == NULL) | |
844 return -2; | |
845 | |
846 if (hex) | |
847 maxlevel = nodelen > 40 ? 40 : (int)nodelen; | |
848 else | |
849 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); | |
850 | |
851 for (level = off = 0; level < maxlevel; level++) { | |
852 int k = getnybble(node, level); | |
853 nodetree *n = &self->nt[off]; | |
854 int v = n->children[k]; | |
855 | |
856 if (v < 0) { | |
857 const char *n; | |
858 Py_ssize_t i; | |
859 | |
860 v = -(v + 1); | |
861 n = index_node(self, v); | |
862 if (n == NULL) | |
863 return -2; | |
864 for (i = level; i < maxlevel; i++) | |
865 if (getnybble(node, i) != nt_level(n, i)) | |
866 return -2; | |
867 return v; | |
868 } | |
869 if (v == 0) | |
870 return -2; | |
871 off = v; | |
872 } | |
873 /* multiple matches against an ambiguous prefix */ | |
874 return -4; | |
875 } | |
876 | |
877 static int nt_new(indexObject *self) | |
878 { | |
879 if (self->ntlength == self->ntcapacity) { | |
880 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) { | |
881 PyErr_SetString(PyExc_MemoryError, | |
882 "overflow in nt_new"); | |
883 return -1; | |
884 } | |
885 self->ntcapacity *= 2; | |
886 self->nt = realloc(self->nt, | |
887 self->ntcapacity * sizeof(nodetree)); | |
888 if (self->nt == NULL) { | |
889 PyErr_SetString(PyExc_MemoryError, "out of memory"); | |
890 return -1; | |
891 } | |
892 memset(&self->nt[self->ntlength], 0, | |
893 sizeof(nodetree) * (self->ntcapacity - self->ntlength)); | |
894 } | |
895 return self->ntlength++; | |
896 } | |
897 | |
898 static int nt_insert(indexObject *self, const char *node, int rev) | |
899 { | |
900 int level = 0; | |
901 int off = 0; | |
902 | |
903 while (level < 40) { | |
904 int k = nt_level(node, level); | |
905 nodetree *n; | |
906 int v; | |
907 | |
908 n = &self->nt[off]; | |
909 v = n->children[k]; | |
910 | |
911 if (v == 0) { | |
912 n->children[k] = -rev - 1; | |
913 return 0; | |
914 } | |
915 if (v < 0) { | |
916 const char *oldnode = index_node(self, -(v + 1)); | |
917 int noff; | |
918 | |
919 if (!oldnode || !memcmp(oldnode, node, 20)) { | |
920 n->children[k] = -rev - 1; | |
921 return 0; | |
922 } | |
923 noff = nt_new(self); | |
924 if (noff == -1) | |
925 return -1; | |
926 /* self->nt may have been changed by realloc */ | |
927 self->nt[off].children[k] = noff; | |
928 off = noff; | |
929 n = &self->nt[off]; | |
930 n->children[nt_level(oldnode, ++level)] = v; | |
931 if (level > self->ntdepth) | |
932 self->ntdepth = level; | |
933 self->ntsplits += 1; | |
934 } else { | |
935 level += 1; | |
936 off = v; | |
937 } | |
938 } | |
939 | |
940 return -1; | |
941 } | |
942 | |
943 static int nt_init(indexObject *self) | |
944 { | |
945 if (self->nt == NULL) { | |
946 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) { | |
947 PyErr_SetString(PyExc_ValueError, "overflow in nt_init"); | |
948 return -1; | |
949 } | |
950 self->ntcapacity = self->raw_length < 4 | |
951 ? 4 : (int)self->raw_length / 2; | |
952 | |
953 self->nt = calloc(self->ntcapacity, sizeof(nodetree)); | |
954 if (self->nt == NULL) { | |
955 PyErr_NoMemory(); | |
956 return -1; | |
957 } | |
958 self->ntlength = 1; | |
959 self->ntrev = (int)index_length(self) - 1; | |
960 self->ntlookups = 1; | |
961 self->ntmisses = 0; | |
962 if (nt_insert(self, nullid, INT_MAX) == -1) | |
963 return -1; | |
964 } | |
965 return 0; | |
966 } | |
967 | |
968 /* | |
969 * Return values: | |
970 * | |
971 * -3: error (exception set) | |
972 * -2: not found (no exception set) | |
973 * rest: valid rev | |
974 */ | |
975 static int index_find_node(indexObject *self, | |
976 const char *node, Py_ssize_t nodelen) | |
977 { | |
978 int rev; | |
979 | |
980 self->ntlookups++; | |
981 rev = nt_find(self, node, nodelen, 0); | |
982 if (rev >= -1) | |
983 return rev; | |
984 | |
985 if (nt_init(self) == -1) | |
986 return -3; | |
987 | |
988 /* | |
989 * For the first handful of lookups, we scan the entire index, | |
990 * and cache only the matching nodes. This optimizes for cases | |
991 * like "hg tip", where only a few nodes are accessed. | |
992 * | |
993 * After that, we cache every node we visit, using a single | |
994 * scan amortized over multiple lookups. This gives the best | |
995 * bulk performance, e.g. for "hg log". | |
996 */ | |
997 if (self->ntmisses++ < 4) { | |
998 for (rev = self->ntrev - 1; rev >= 0; rev--) { | |
999 const char *n = index_node(self, rev); | |
1000 if (n == NULL) | |
1001 return -2; | |
1002 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { | |
1003 if (nt_insert(self, n, rev) == -1) | |
1004 return -3; | |
1005 break; | |
1006 } | |
1007 } | |
1008 } else { | |
1009 for (rev = self->ntrev - 1; rev >= 0; rev--) { | |
1010 const char *n = index_node(self, rev); | |
1011 if (n == NULL) { | |
1012 self->ntrev = rev + 1; | |
1013 return -2; | |
1014 } | |
1015 if (nt_insert(self, n, rev) == -1) { | |
1016 self->ntrev = rev + 1; | |
1017 return -3; | |
1018 } | |
1019 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { | |
1020 break; | |
1021 } | |
1022 } | |
1023 self->ntrev = rev; | |
1024 } | |
1025 | |
1026 if (rev >= 0) | |
1027 return rev; | |
1028 return -2; | |
1029 } | |
1030 | |
1031 static void raise_revlog_error(void) | |
1032 { | |
1033 PyObject *mod = NULL, *dict = NULL, *errclass = NULL; | |
1034 | |
1035 mod = PyImport_ImportModule("mercurial.error"); | |
1036 if (mod == NULL) { | |
1037 goto cleanup; | |
1038 } | |
1039 | |
1040 dict = PyModule_GetDict(mod); | |
1041 if (dict == NULL) { | |
1042 goto cleanup; | |
1043 } | |
1044 Py_INCREF(dict); | |
1045 | |
1046 errclass = PyDict_GetItemString(dict, "RevlogError"); | |
1047 if (errclass == NULL) { | |
1048 PyErr_SetString(PyExc_SystemError, | |
1049 "could not find RevlogError"); | |
1050 goto cleanup; | |
1051 } | |
1052 | |
1053 /* value of exception is ignored by callers */ | |
1054 PyErr_SetString(errclass, "RevlogError"); | |
1055 | |
1056 cleanup: | |
1057 Py_XDECREF(dict); | |
1058 Py_XDECREF(mod); | |
1059 } | |
1060 | |
1061 static PyObject *index_getitem(indexObject *self, PyObject *value) | |
1062 { | |
1063 char *node; | |
1064 Py_ssize_t nodelen; | |
1065 int rev; | |
1066 | |
1067 if (PyInt_Check(value)) | |
1068 return index_get(self, PyInt_AS_LONG(value)); | |
1069 | |
1070 if (node_check(value, &node, &nodelen) == -1) | |
1071 return NULL; | |
1072 rev = index_find_node(self, node, nodelen); | |
1073 if (rev >= -1) | |
1074 return PyInt_FromLong(rev); | |
1075 if (rev == -2) | |
1076 raise_revlog_error(); | |
1077 return NULL; | |
1078 } | |
1079 | |
1080 static int nt_partialmatch(indexObject *self, const char *node, | |
1081 Py_ssize_t nodelen) | |
1082 { | |
1083 int rev; | |
1084 | |
1085 if (nt_init(self) == -1) | |
1086 return -3; | |
1087 | |
1088 if (self->ntrev > 0) { | |
1089 /* ensure that the radix tree is fully populated */ | |
1090 for (rev = self->ntrev - 1; rev >= 0; rev--) { | |
1091 const char *n = index_node(self, rev); | |
1092 if (n == NULL) | |
1093 return -2; | |
1094 if (nt_insert(self, n, rev) == -1) | |
1095 return -3; | |
1096 } | |
1097 self->ntrev = rev; | |
1098 } | |
1099 | |
1100 return nt_find(self, node, nodelen, 1); | |
1101 } | |
1102 | |
1103 static PyObject *index_partialmatch(indexObject *self, PyObject *args) | |
1104 { | |
1105 const char *fullnode; | |
1106 int nodelen; | |
1107 char *node; | |
1108 int rev, i; | |
1109 | |
1110 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) | |
1111 return NULL; | |
1112 | |
1113 if (nodelen < 4) { | |
1114 PyErr_SetString(PyExc_ValueError, "key too short"); | |
1115 return NULL; | |
1116 } | |
1117 | |
1118 if (nodelen > 40) { | |
1119 PyErr_SetString(PyExc_ValueError, "key too long"); | |
1120 return NULL; | |
1121 } | |
1122 | |
1123 for (i = 0; i < nodelen; i++) | |
1124 hexdigit(node, i); | |
1125 if (PyErr_Occurred()) { | |
1126 /* input contains non-hex characters */ | |
1127 PyErr_Clear(); | |
1128 Py_RETURN_NONE; | |
1129 } | |
1130 | |
1131 rev = nt_partialmatch(self, node, nodelen); | |
1132 | |
1133 switch (rev) { | |
1134 case -4: | |
1135 raise_revlog_error(); | |
1136 case -3: | |
1137 return NULL; | |
1138 case -2: | |
1139 Py_RETURN_NONE; | |
1140 case -1: | |
1141 return PyBytes_FromStringAndSize(nullid, 20); | |
1142 } | |
1143 | |
1144 fullnode = index_node(self, rev); | |
1145 if (fullnode == NULL) { | |
1146 PyErr_Format(PyExc_IndexError, | |
1147 "could not access rev %d", rev); | |
1148 return NULL; | |
1149 } | |
1150 return PyBytes_FromStringAndSize(fullnode, 20); | |
1151 } | |
1152 | |
1153 static PyObject *index_m_get(indexObject *self, PyObject *args) | |
1154 { | |
1155 Py_ssize_t nodelen; | |
1156 PyObject *val; | |
1157 char *node; | |
1158 int rev; | |
1159 | |
1160 if (!PyArg_ParseTuple(args, "O", &val)) | |
1161 return NULL; | |
1162 if (node_check(val, &node, &nodelen) == -1) | |
1163 return NULL; | |
1164 rev = index_find_node(self, node, nodelen); | |
1165 if (rev == -3) | |
1166 return NULL; | |
1167 if (rev == -2) | |
1168 Py_RETURN_NONE; | |
1169 return PyInt_FromLong(rev); | |
1170 } | |
1171 | |
1172 static int index_contains(indexObject *self, PyObject *value) | |
1173 { | |
1174 char *node; | |
1175 Py_ssize_t nodelen; | |
1176 | |
1177 if (PyInt_Check(value)) { | |
1178 long rev = PyInt_AS_LONG(value); | |
1179 return rev >= -1 && rev < index_length(self); | |
1180 } | |
1181 | |
1182 if (node_check(value, &node, &nodelen) == -1) | |
1183 return -1; | |
1184 | |
1185 switch (index_find_node(self, node, nodelen)) { | |
1186 case -3: | |
1187 return -1; | |
1188 case -2: | |
1189 return 0; | |
1190 default: | |
1191 return 1; | |
1192 } | |
1193 } | |
1194 | |
1195 typedef uint64_t bitmask; | |
1196 | |
1197 /* | |
1198 * Given a disjoint set of revs, return all candidates for the | |
1199 * greatest common ancestor. In revset notation, this is the set | |
1200 * "heads(::a and ::b and ...)" | |
1201 */ | |
1202 static PyObject *find_gca_candidates(indexObject *self, const int *revs, | |
1203 int revcount) | |
1204 { | |
1205 const bitmask allseen = (1ull << revcount) - 1; | |
1206 const bitmask poison = 1ull << revcount; | |
1207 PyObject *gca = PyList_New(0); | |
1208 int i, v, interesting; | |
1209 int maxrev = -1; | |
1210 bitmask sp; | |
1211 bitmask *seen; | |
1212 | |
1213 if (gca == NULL) | |
1214 return PyErr_NoMemory(); | |
1215 | |
1216 for (i = 0; i < revcount; i++) { | |
1217 if (revs[i] > maxrev) | |
1218 maxrev = revs[i]; | |
1219 } | |
1220 | |
1221 seen = calloc(sizeof(*seen), maxrev + 1); | |
1222 if (seen == NULL) { | |
1223 Py_DECREF(gca); | |
1224 return PyErr_NoMemory(); | |
1225 } | |
1226 | |
1227 for (i = 0; i < revcount; i++) | |
1228 seen[revs[i]] = 1ull << i; | |
1229 | |
1230 interesting = revcount; | |
1231 | |
1232 for (v = maxrev; v >= 0 && interesting; v--) { | |
1233 bitmask sv = seen[v]; | |
1234 int parents[2]; | |
1235 | |
1236 if (!sv) | |
1237 continue; | |
1238 | |
1239 if (sv < poison) { | |
1240 interesting -= 1; | |
1241 if (sv == allseen) { | |
1242 PyObject *obj = PyInt_FromLong(v); | |
1243 if (obj == NULL) | |
1244 goto bail; | |
1245 if (PyList_Append(gca, obj) == -1) { | |
1246 Py_DECREF(obj); | |
1247 goto bail; | |
1248 } | |
1249 sv |= poison; | |
1250 for (i = 0; i < revcount; i++) { | |
1251 if (revs[i] == v) | |
1252 goto done; | |
1253 } | |
1254 } | |
1255 } | |
1256 if (index_get_parents(self, v, parents, maxrev) < 0) | |
1257 goto bail; | |
1258 | |
1259 for (i = 0; i < 2; i++) { | |
1260 int p = parents[i]; | |
1261 if (p == -1) | |
1262 continue; | |
1263 sp = seen[p]; | |
1264 if (sv < poison) { | |
1265 if (sp == 0) { | |
1266 seen[p] = sv; | |
1267 interesting++; | |
1268 } | |
1269 else if (sp != sv) | |
1270 seen[p] |= sv; | |
1271 } else { | |
1272 if (sp && sp < poison) | |
1273 interesting--; | |
1274 seen[p] = sv; | |
1275 } | |
1276 } | |
1277 } | |
1278 | |
1279 done: | |
1280 free(seen); | |
1281 return gca; | |
1282 bail: | |
1283 free(seen); | |
1284 Py_XDECREF(gca); | |
1285 return NULL; | |
1286 } | |
1287 | |
1288 /* | |
1289 * Given a disjoint set of revs, return the subset with the longest | |
1290 * path to the root. | |
1291 */ | |
1292 static PyObject *find_deepest(indexObject *self, PyObject *revs) | |
1293 { | |
1294 const Py_ssize_t revcount = PyList_GET_SIZE(revs); | |
1295 static const Py_ssize_t capacity = 24; | |
1296 int *depth, *interesting = NULL; | |
1297 int i, j, v, ninteresting; | |
1298 PyObject *dict = NULL, *keys = NULL; | |
1299 long *seen = NULL; | |
1300 int maxrev = -1; | |
1301 long final; | |
1302 | |
1303 if (revcount > capacity) { | |
1304 PyErr_Format(PyExc_OverflowError, | |
1305 "bitset size (%ld) > capacity (%ld)", | |
1306 (long)revcount, (long)capacity); | |
1307 return NULL; | |
1308 } | |
1309 | |
1310 for (i = 0; i < revcount; i++) { | |
1311 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); | |
1312 if (n > maxrev) | |
1313 maxrev = n; | |
1314 } | |
1315 | |
1316 depth = calloc(sizeof(*depth), maxrev + 1); | |
1317 if (depth == NULL) | |
1318 return PyErr_NoMemory(); | |
1319 | |
1320 seen = calloc(sizeof(*seen), maxrev + 1); | |
1321 if (seen == NULL) { | |
1322 PyErr_NoMemory(); | |
1323 goto bail; | |
1324 } | |
1325 | |
1326 interesting = calloc(sizeof(*interesting), 2 << revcount); | |
1327 if (interesting == NULL) { | |
1328 PyErr_NoMemory(); | |
1329 goto bail; | |
1330 } | |
1331 | |
1332 if (PyList_Sort(revs) == -1) | |
1333 goto bail; | |
1334 | |
1335 for (i = 0; i < revcount; i++) { | |
1336 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); | |
1337 long b = 1l << i; | |
1338 depth[n] = 1; | |
1339 seen[n] = b; | |
1340 interesting[b] = 1; | |
1341 } | |
1342 | |
1343 ninteresting = (int)revcount; | |
1344 | |
1345 for (v = maxrev; v >= 0 && ninteresting > 1; v--) { | |
1346 int dv = depth[v]; | |
1347 int parents[2]; | |
1348 long sv; | |
1349 | |
1350 if (dv == 0) | |
1351 continue; | |
1352 | |
1353 sv = seen[v]; | |
1354 if (index_get_parents(self, v, parents, maxrev) < 0) | |
1355 goto bail; | |
1356 | |
1357 for (i = 0; i < 2; i++) { | |
1358 int p = parents[i]; | |
1359 long sp; | |
1360 int dp; | |
1361 | |
1362 if (p == -1) | |
1363 continue; | |
1364 | |
1365 dp = depth[p]; | |
1366 sp = seen[p]; | |
1367 if (dp <= dv) { | |
1368 depth[p] = dv + 1; | |
1369 if (sp != sv) { | |
1370 interesting[sv] += 1; | |
1371 seen[p] = sv; | |
1372 if (sp) { | |
1373 interesting[sp] -= 1; | |
1374 if (interesting[sp] == 0) | |
1375 ninteresting -= 1; | |
1376 } | |
1377 } | |
1378 } | |
1379 else if (dv == dp - 1) { | |
1380 long nsp = sp | sv; | |
1381 if (nsp == sp) | |
1382 continue; | |
1383 seen[p] = nsp; | |
1384 interesting[sp] -= 1; | |
1385 if (interesting[sp] == 0 && interesting[nsp] > 0) | |
1386 ninteresting -= 1; | |
1387 interesting[nsp] += 1; | |
1388 } | |
1389 } | |
1390 interesting[sv] -= 1; | |
1391 if (interesting[sv] == 0) | |
1392 ninteresting -= 1; | |
1393 } | |
1394 | |
1395 final = 0; | |
1396 j = ninteresting; | |
1397 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) { | |
1398 if (interesting[i] == 0) | |
1399 continue; | |
1400 final |= i; | |
1401 j -= 1; | |
1402 } | |
1403 if (final == 0) { | |
1404 keys = PyList_New(0); | |
1405 goto bail; | |
1406 } | |
1407 | |
1408 dict = PyDict_New(); | |
1409 if (dict == NULL) | |
1410 goto bail; | |
1411 | |
1412 for (i = 0; i < revcount; i++) { | |
1413 PyObject *key; | |
1414 | |
1415 if ((final & (1 << i)) == 0) | |
1416 continue; | |
1417 | |
1418 key = PyList_GET_ITEM(revs, i); | |
1419 Py_INCREF(key); | |
1420 Py_INCREF(Py_None); | |
1421 if (PyDict_SetItem(dict, key, Py_None) == -1) { | |
1422 Py_DECREF(key); | |
1423 Py_DECREF(Py_None); | |
1424 goto bail; | |
1425 } | |
1426 } | |
1427 | |
1428 keys = PyDict_Keys(dict); | |
1429 | |
1430 bail: | |
1431 free(depth); | |
1432 free(seen); | |
1433 free(interesting); | |
1434 Py_XDECREF(dict); | |
1435 | |
1436 return keys; | |
1437 } | |
1438 | |
1439 /* | |
1440 * Given a (possibly overlapping) set of revs, return all the | |
1441 * common ancestors heads: heads(::args[0] and ::a[1] and ...) | |
1442 */ | |
1443 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args) | |
1444 { | |
1445 PyObject *ret = NULL; | |
1446 Py_ssize_t argcount, i, len; | |
1447 bitmask repeat = 0; | |
1448 int revcount = 0; | |
1449 int *revs; | |
1450 | |
1451 argcount = PySequence_Length(args); | |
1452 revs = PyMem_Malloc(argcount * sizeof(*revs)); | |
1453 if (argcount > 0 && revs == NULL) | |
1454 return PyErr_NoMemory(); | |
1455 len = index_length(self) - 1; | |
1456 | |
1457 for (i = 0; i < argcount; i++) { | |
1458 static const int capacity = 24; | |
1459 PyObject *obj = PySequence_GetItem(args, i); | |
1460 bitmask x; | |
1461 long val; | |
1462 | |
1463 if (!PyInt_Check(obj)) { | |
1464 PyErr_SetString(PyExc_TypeError, | |
1465 "arguments must all be ints"); | |
1466 Py_DECREF(obj); | |
1467 goto bail; | |
1468 } | |
1469 val = PyInt_AsLong(obj); | |
1470 Py_DECREF(obj); | |
1471 if (val == -1) { | |
1472 ret = PyList_New(0); | |
1473 goto done; | |
1474 } | |
1475 if (val < 0 || val >= len) { | |
1476 PyErr_SetString(PyExc_IndexError, | |
1477 "index out of range"); | |
1478 goto bail; | |
1479 } | |
1480 /* this cheesy bloom filter lets us avoid some more | |
1481 * expensive duplicate checks in the common set-is-disjoint | |
1482 * case */ | |
1483 x = 1ull << (val & 0x3f); | |
1484 if (repeat & x) { | |
1485 int k; | |
1486 for (k = 0; k < revcount; k++) { | |
1487 if (val == revs[k]) | |
1488 goto duplicate; | |
1489 } | |
1490 } | |
1491 else repeat |= x; | |
1492 if (revcount >= capacity) { | |
1493 PyErr_Format(PyExc_OverflowError, | |
1494 "bitset size (%d) > capacity (%d)", | |
1495 revcount, capacity); | |
1496 goto bail; | |
1497 } | |
1498 revs[revcount++] = (int)val; | |
1499 duplicate:; | |
1500 } | |
1501 | |
1502 if (revcount == 0) { | |
1503 ret = PyList_New(0); | |
1504 goto done; | |
1505 } | |
1506 if (revcount == 1) { | |
1507 PyObject *obj; | |
1508 ret = PyList_New(1); | |
1509 if (ret == NULL) | |
1510 goto bail; | |
1511 obj = PyInt_FromLong(revs[0]); | |
1512 if (obj == NULL) | |
1513 goto bail; | |
1514 PyList_SET_ITEM(ret, 0, obj); | |
1515 goto done; | |
1516 } | |
1517 | |
1518 ret = find_gca_candidates(self, revs, revcount); | |
1519 if (ret == NULL) | |
1520 goto bail; | |
1521 | |
1522 done: | |
1523 PyMem_Free(revs); | |
1524 return ret; | |
1525 | |
1526 bail: | |
1527 PyMem_Free(revs); | |
1528 Py_XDECREF(ret); | |
1529 return NULL; | |
1530 } | |
1531 | |
1532 /* | |
1533 * Given a (possibly overlapping) set of revs, return the greatest | |
1534 * common ancestors: those with the longest path to the root. | |
1535 */ | |
1536 static PyObject *index_ancestors(indexObject *self, PyObject *args) | |
1537 { | |
1538 PyObject *ret; | |
1539 PyObject *gca = index_commonancestorsheads(self, args); | |
1540 if (gca == NULL) | |
1541 return NULL; | |
1542 | |
1543 if (PyList_GET_SIZE(gca) <= 1) { | |
1544 return gca; | |
1545 } | |
1546 | |
1547 ret = find_deepest(self, gca); | |
1548 Py_DECREF(gca); | |
1549 return ret; | |
1550 } | |
1551 | |
1552 /* | |
1553 * Invalidate any trie entries introduced by added revs. | |
1554 */ | |
1555 static void nt_invalidate_added(indexObject *self, Py_ssize_t start) | |
1556 { | |
1557 Py_ssize_t i, len = PyList_GET_SIZE(self->added); | |
1558 | |
1559 for (i = start; i < len; i++) { | |
1560 PyObject *tuple = PyList_GET_ITEM(self->added, i); | |
1561 PyObject *node = PyTuple_GET_ITEM(tuple, 7); | |
1562 | |
1563 nt_insert(self, PyBytes_AS_STRING(node), -1); | |
1564 } | |
1565 | |
1566 if (start == 0) | |
1567 Py_CLEAR(self->added); | |
1568 } | |
1569 | |
1570 /* | |
1571 * Delete a numeric range of revs, which must be at the end of the | |
1572 * range, but exclude the sentinel nullid entry. | |
1573 */ | |
1574 static int index_slice_del(indexObject *self, PyObject *item) | |
1575 { | |
1576 Py_ssize_t start, stop, step, slicelength; | |
1577 Py_ssize_t length = index_length(self); | |
1578 int ret = 0; | |
1579 | |
1580 /* Argument changed from PySliceObject* to PyObject* in Python 3. */ | |
1581 #ifdef IS_PY3K | |
1582 if (PySlice_GetIndicesEx(item, length, | |
1583 #else | |
1584 if (PySlice_GetIndicesEx((PySliceObject*)item, length, | |
1585 #endif | |
1586 &start, &stop, &step, &slicelength) < 0) | |
1587 return -1; | |
1588 | |
1589 if (slicelength <= 0) | |
1590 return 0; | |
1591 | |
1592 if ((step < 0 && start < stop) || (step > 0 && start > stop)) | |
1593 stop = start; | |
1594 | |
1595 if (step < 0) { | |
1596 stop = start + 1; | |
1597 start = stop + step*(slicelength - 1) - 1; | |
1598 step = -step; | |
1599 } | |
1600 | |
1601 if (step != 1) { | |
1602 PyErr_SetString(PyExc_ValueError, | |
1603 "revlog index delete requires step size of 1"); | |
1604 return -1; | |
1605 } | |
1606 | |
1607 if (stop != length - 1) { | |
1608 PyErr_SetString(PyExc_IndexError, | |
1609 "revlog index deletion indices are invalid"); | |
1610 return -1; | |
1611 } | |
1612 | |
1613 if (start < self->length - 1) { | |
1614 if (self->nt) { | |
1615 Py_ssize_t i; | |
1616 | |
1617 for (i = start + 1; i < self->length - 1; i++) { | |
1618 const char *node = index_node(self, i); | |
1619 | |
1620 if (node) | |
1621 nt_insert(self, node, -1); | |
1622 } | |
1623 if (self->added) | |
1624 nt_invalidate_added(self, 0); | |
1625 if (self->ntrev > start) | |
1626 self->ntrev = (int)start; | |
1627 } | |
1628 self->length = start + 1; | |
1629 if (start < self->raw_length) { | |
1630 if (self->cache) { | |
1631 Py_ssize_t i; | |
1632 for (i = start; i < self->raw_length; i++) | |
1633 Py_CLEAR(self->cache[i]); | |
1634 } | |
1635 self->raw_length = start; | |
1636 } | |
1637 goto done; | |
1638 } | |
1639 | |
1640 if (self->nt) { | |
1641 nt_invalidate_added(self, start - self->length + 1); | |
1642 if (self->ntrev > start) | |
1643 self->ntrev = (int)start; | |
1644 } | |
1645 if (self->added) | |
1646 ret = PyList_SetSlice(self->added, start - self->length + 1, | |
1647 PyList_GET_SIZE(self->added), NULL); | |
1648 done: | |
1649 Py_CLEAR(self->headrevs); | |
1650 return ret; | |
1651 } | |
1652 | |
1653 /* | |
1654 * Supported ops: | |
1655 * | |
1656 * slice deletion | |
1657 * string assignment (extend node->rev mapping) | |
1658 * string deletion (shrink node->rev mapping) | |
1659 */ | |
1660 static int index_assign_subscript(indexObject *self, PyObject *item, | |
1661 PyObject *value) | |
1662 { | |
1663 char *node; | |
1664 Py_ssize_t nodelen; | |
1665 long rev; | |
1666 | |
1667 if (PySlice_Check(item) && value == NULL) | |
1668 return index_slice_del(self, item); | |
1669 | |
1670 if (node_check(item, &node, &nodelen) == -1) | |
1671 return -1; | |
1672 | |
1673 if (value == NULL) | |
1674 return self->nt ? nt_insert(self, node, -1) : 0; | |
1675 rev = PyInt_AsLong(value); | |
1676 if (rev > INT_MAX || rev < 0) { | |
1677 if (!PyErr_Occurred()) | |
1678 PyErr_SetString(PyExc_ValueError, "rev out of range"); | |
1679 return -1; | |
1680 } | |
1681 | |
1682 if (nt_init(self) == -1) | |
1683 return -1; | |
1684 return nt_insert(self, node, (int)rev); | |
1685 } | |
1686 | |
1687 /* | |
1688 * Find all RevlogNG entries in an index that has inline data. Update | |
1689 * the optional "offsets" table with those entries. | |
1690 */ | |
1691 static Py_ssize_t inline_scan(indexObject *self, const char **offsets) | |
1692 { | |
1693 const char *data = (const char *)self->buf.buf; | |
1694 Py_ssize_t pos = 0; | |
1695 Py_ssize_t end = self->buf.len; | |
1696 long incr = v1_hdrsize; | |
1697 Py_ssize_t len = 0; | |
1698 | |
1699 while (pos + v1_hdrsize <= end && pos >= 0) { | |
1700 uint32_t comp_len; | |
1701 /* 3rd element of header is length of compressed inline data */ | |
1702 comp_len = getbe32(data + pos + 8); | |
1703 incr = v1_hdrsize + comp_len; | |
1704 if (offsets) | |
1705 offsets[len] = data + pos; | |
1706 len++; | |
1707 pos += incr; | |
1708 } | |
1709 | |
1710 if (pos != end) { | |
1711 if (!PyErr_Occurred()) | |
1712 PyErr_SetString(PyExc_ValueError, "corrupt index file"); | |
1713 return -1; | |
1714 } | |
1715 | |
1716 return len; | |
1717 } | |
1718 | |
1719 static int index_init(indexObject *self, PyObject *args) | |
1720 { | |
1721 PyObject *data_obj, *inlined_obj; | |
1722 Py_ssize_t size; | |
1723 | |
1724 /* Initialize before argument-checking to avoid index_dealloc() crash. */ | |
1725 self->raw_length = 0; | |
1726 self->added = NULL; | |
1727 self->cache = NULL; | |
1728 self->data = NULL; | |
1729 memset(&self->buf, 0, sizeof(self->buf)); | |
1730 self->headrevs = NULL; | |
1731 self->filteredrevs = Py_None; | |
1732 Py_INCREF(Py_None); | |
1733 self->nt = NULL; | |
1734 self->offsets = NULL; | |
1735 | |
1736 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) | |
1737 return -1; | |
1738 if (!PyObject_CheckBuffer(data_obj)) { | |
1739 PyErr_SetString(PyExc_TypeError, | |
1740 "data does not support buffer interface"); | |
1741 return -1; | |
1742 } | |
1743 | |
1744 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1) | |
1745 return -1; | |
1746 size = self->buf.len; | |
1747 | |
1748 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); | |
1749 self->data = data_obj; | |
1750 | |
1751 self->ntlength = self->ntcapacity = 0; | |
1752 self->ntdepth = self->ntsplits = 0; | |
1753 self->ntlookups = self->ntmisses = 0; | |
1754 self->ntrev = -1; | |
1755 Py_INCREF(self->data); | |
1756 | |
1757 if (self->inlined) { | |
1758 Py_ssize_t len = inline_scan(self, NULL); | |
1759 if (len == -1) | |
1760 goto bail; | |
1761 self->raw_length = len; | |
1762 self->length = len + 1; | |
1763 } else { | |
1764 if (size % v1_hdrsize) { | |
1765 PyErr_SetString(PyExc_ValueError, "corrupt index file"); | |
1766 goto bail; | |
1767 } | |
1768 self->raw_length = size / v1_hdrsize; | |
1769 self->length = self->raw_length + 1; | |
1770 } | |
1771 | |
1772 return 0; | |
1773 bail: | |
1774 return -1; | |
1775 } | |
1776 | |
1777 static PyObject *index_nodemap(indexObject *self) | |
1778 { | |
1779 Py_INCREF(self); | |
1780 return (PyObject *)self; | |
1781 } | |
1782 | |
1783 static void index_dealloc(indexObject *self) | |
1784 { | |
1785 _index_clearcaches(self); | |
1786 Py_XDECREF(self->filteredrevs); | |
1787 if (self->buf.buf) { | |
1788 PyBuffer_Release(&self->buf); | |
1789 memset(&self->buf, 0, sizeof(self->buf)); | |
1790 } | |
1791 Py_XDECREF(self->data); | |
1792 Py_XDECREF(self->added); | |
1793 PyObject_Del(self); | |
1794 } | |
1795 | |
1796 static PySequenceMethods index_sequence_methods = { | |
1797 (lenfunc)index_length, /* sq_length */ | |
1798 0, /* sq_concat */ | |
1799 0, /* sq_repeat */ | |
1800 (ssizeargfunc)index_get, /* sq_item */ | |
1801 0, /* sq_slice */ | |
1802 0, /* sq_ass_item */ | |
1803 0, /* sq_ass_slice */ | |
1804 (objobjproc)index_contains, /* sq_contains */ | |
1805 }; | |
1806 | |
1807 static PyMappingMethods index_mapping_methods = { | |
1808 (lenfunc)index_length, /* mp_length */ | |
1809 (binaryfunc)index_getitem, /* mp_subscript */ | |
1810 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ | |
1811 }; | |
1812 | |
1813 static PyMethodDef index_methods[] = { | |
1814 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS, | |
1815 "return the gca set of the given revs"}, | |
1816 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads, | |
1817 METH_VARARGS, | |
1818 "return the heads of the common ancestors of the given revs"}, | |
1819 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, | |
1820 "clear the index caches"}, | |
1821 {"get", (PyCFunction)index_m_get, METH_VARARGS, | |
1822 "get an index entry"}, | |
1823 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, | |
1824 METH_VARARGS, "compute phases"}, | |
1825 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS, | |
1826 "reachableroots"}, | |
1827 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS, | |
1828 "get head revisions"}, /* Can do filtering since 3.2 */ | |
1829 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, | |
1830 "get filtered head revisions"}, /* Can always do filtering */ | |
1831 {"insert", (PyCFunction)index_insert, METH_VARARGS, | |
1832 "insert an index entry"}, | |
1833 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, | |
1834 "match a potentially ambiguous node ID"}, | |
1835 {"stats", (PyCFunction)index_stats, METH_NOARGS, | |
1836 "stats for the index"}, | |
1837 {NULL} /* Sentinel */ | |
1838 }; | |
1839 | |
1840 static PyGetSetDef index_getset[] = { | |
1841 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, | |
1842 {NULL} /* Sentinel */ | |
1843 }; | |
1844 | |
1845 static PyTypeObject indexType = { | |
1846 PyVarObject_HEAD_INIT(NULL, 0) | |
1847 "parsers.index", /* tp_name */ | |
1848 sizeof(indexObject), /* tp_basicsize */ | |
1849 0, /* tp_itemsize */ | |
1850 (destructor)index_dealloc, /* tp_dealloc */ | |
1851 0, /* tp_print */ | |
1852 0, /* tp_getattr */ | |
1853 0, /* tp_setattr */ | |
1854 0, /* tp_compare */ | |
1855 0, /* tp_repr */ | |
1856 0, /* tp_as_number */ | |
1857 &index_sequence_methods, /* tp_as_sequence */ | |
1858 &index_mapping_methods, /* tp_as_mapping */ | |
1859 0, /* tp_hash */ | |
1860 0, /* tp_call */ | |
1861 0, /* tp_str */ | |
1862 0, /* tp_getattro */ | |
1863 0, /* tp_setattro */ | |
1864 0, /* tp_as_buffer */ | |
1865 Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
1866 "revlog index", /* tp_doc */ | |
1867 0, /* tp_traverse */ | |
1868 0, /* tp_clear */ | |
1869 0, /* tp_richcompare */ | |
1870 0, /* tp_weaklistoffset */ | |
1871 0, /* tp_iter */ | |
1872 0, /* tp_iternext */ | |
1873 index_methods, /* tp_methods */ | |
1874 0, /* tp_members */ | |
1875 index_getset, /* tp_getset */ | |
1876 0, /* tp_base */ | |
1877 0, /* tp_dict */ | |
1878 0, /* tp_descr_get */ | |
1879 0, /* tp_descr_set */ | |
1880 0, /* tp_dictoffset */ | |
1881 (initproc)index_init, /* tp_init */ | |
1882 0, /* tp_alloc */ | |
1883 }; | |
1884 | |
1885 /* | |
1886 * returns a tuple of the form (index, index, cache) with elements as | |
1887 * follows: | |
1888 * | |
1889 * index: an index object that lazily parses RevlogNG records | |
1890 * cache: if data is inlined, a tuple (0, index_file_content), else None | |
1891 * index_file_content could be a string, or a buffer | |
1892 * | |
1893 * added complications are for backwards compatibility | |
1894 */ | |
1895 PyObject *parse_index2(PyObject *self, PyObject *args) | |
1896 { | |
1897 PyObject *tuple = NULL, *cache = NULL; | |
1898 indexObject *idx; | |
1899 int ret; | |
1900 | |
1901 idx = PyObject_New(indexObject, &indexType); | |
1902 if (idx == NULL) | |
1903 goto bail; | |
1904 | |
1905 ret = index_init(idx, args); | |
1906 if (ret == -1) | |
1907 goto bail; | |
1908 | |
1909 if (idx->inlined) { | |
1910 cache = Py_BuildValue("iO", 0, idx->data); | |
1911 if (cache == NULL) | |
1912 goto bail; | |
1913 } else { | |
1914 cache = Py_None; | |
1915 Py_INCREF(cache); | |
1916 } | |
1917 | |
1918 tuple = Py_BuildValue("NN", idx, cache); | |
1919 if (!tuple) | |
1920 goto bail; | |
1921 return tuple; | |
1922 | |
1923 bail: | |
1924 Py_XDECREF(idx); | |
1925 Py_XDECREF(cache); | |
1926 Py_XDECREF(tuple); | |
1927 return NULL; | |
1928 } | |
1929 | |
1930 void revlog_module_init(PyObject *mod) | |
1931 { | |
1932 indexType.tp_new = PyType_GenericNew; | |
1933 if (PyType_Ready(&indexType) < 0 || | |
1934 PyType_Ready(&dirstateTupleType) < 0) | |
1935 return; | |
1936 Py_INCREF(&indexType); | |
1937 PyModule_AddObject(mod, "index", (PyObject *)&indexType); | |
1938 | |
1939 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0, | |
1940 -1, -1, -1, -1, nullid, 20); | |
1941 if (nullentry) | |
1942 PyObject_GC_UnTrack(nullentry); | |
1943 } |