comparison mercurial/cext/manifest.c @ 47043:12450fbea288

manifests: push down expected node length into the parser This strictly enforces the node length in the manifest lines according to what the repository expects. One test case moves large hash testing into the non-treemanifest part as treemanifests don't provide an interface for overriding just the node length for now. Differential Revision: https://phab.mercurial-scm.org/D10533
author Joerg Sonnenberger <joerg@bec.de>
date Fri, 30 Apr 2021 02:11:58 +0200
parents 9719e118e4af
children be3af7eb2bbb
comparison
equal deleted inserted replaced
47042:c5e1cc0b4c77 47043:12450fbea288
26 } line; 26 } line;
27 27
28 typedef struct { 28 typedef struct {
29 PyObject_HEAD 29 PyObject_HEAD
30 PyObject *pydata; 30 PyObject *pydata;
31 Py_ssize_t nodelen;
31 line *lines; 32 line *lines;
32 int numlines; /* number of line entries */ 33 int numlines; /* number of line entries */
33 int livelines; /* number of non-deleted lines */ 34 int livelines; /* number of non-deleted lines */
34 int maxlines; /* allocated number of lines */ 35 int maxlines; /* allocated number of lines */
35 bool dirty; 36 bool dirty;
47 const char *end = memchr(l->start, '\0', l->len); 48 const char *end = memchr(l->start, '\0', l->len);
48 return (end) ? (Py_ssize_t)(end - l->start) : l->len; 49 return (end) ? (Py_ssize_t)(end - l->start) : l->len;
49 } 50 }
50 51
51 /* get the node value of a single line */ 52 /* get the node value of a single line */
52 static PyObject *nodeof(line *l, char *flag) 53 static PyObject *nodeof(Py_ssize_t nodelen, line *l, char *flag)
53 { 54 {
54 char *s = l->start; 55 char *s = l->start;
55 Py_ssize_t llen = pathlen(l); 56 Py_ssize_t llen = pathlen(l);
56 Py_ssize_t hlen = l->len - llen - 2; 57 Py_ssize_t hlen = l->len - llen - 2;
57 Py_ssize_t hlen_raw;
58 PyObject *hash; 58 PyObject *hash;
59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */ 59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
60 PyErr_SetString(PyExc_ValueError, "manifest line too short"); 60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
61 return NULL; 61 return NULL;
62 } 62 }
71 default: 71 default:
72 *flag = '\0'; 72 *flag = '\0';
73 break; 73 break;
74 } 74 }
75 75
76 switch (hlen) { 76 if (hlen != 2 * nodelen) {
77 case 40: /* sha1 */
78 hlen_raw = 20;
79 break;
80 case 64: /* new hash */
81 hlen_raw = 32;
82 break;
83 default:
84 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest"); 77 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
85 return NULL; 78 return NULL;
86 } 79 }
87 hash = unhexlify(s + llen + 1, hlen_raw * 2); 80 hash = unhexlify(s + llen + 1, nodelen * 2);
88 if (!hash) { 81 if (!hash) {
89 return NULL; 82 return NULL;
90 } 83 }
91 if (l->hash_suffix != '\0') { 84 if (l->hash_suffix != '\0') {
92 char newhash[33]; 85 char newhash[33];
93 memcpy(newhash, PyBytes_AsString(hash), hlen_raw); 86 memcpy(newhash, PyBytes_AsString(hash), nodelen);
94 Py_DECREF(hash); 87 Py_DECREF(hash);
95 newhash[hlen_raw] = l->hash_suffix; 88 newhash[nodelen] = l->hash_suffix;
96 hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1); 89 hash = PyBytes_FromStringAndSize(newhash, nodelen + 1);
97 } 90 }
98 return hash; 91 return hash;
99 } 92 }
100 93
101 /* get the node hash and flags of a line as a tuple */ 94 /* get the node hash and flags of a line as a tuple */
102 static PyObject *hashflags(line *l) 95 static PyObject *hashflags(Py_ssize_t nodelen, line *l)
103 { 96 {
104 char flag; 97 char flag;
105 PyObject *hash = nodeof(l, &flag); 98 PyObject *hash = nodeof(nodelen, l, &flag);
106 PyObject *flags; 99 PyObject *flags;
107 PyObject *tup; 100 PyObject *tup;
108 101
109 if (!hash) 102 if (!hash)
110 return NULL; 103 return NULL;
188 } 181 }
189 182
190 static int lazymanifest_init(lazymanifest *self, PyObject *args) 183 static int lazymanifest_init(lazymanifest *self, PyObject *args)
191 { 184 {
192 char *data; 185 char *data;
193 Py_ssize_t len; 186 Py_ssize_t nodelen, len;
194 int err, ret; 187 int err, ret;
195 PyObject *pydata; 188 PyObject *pydata;
196 189
197 lazymanifest_init_early(self); 190 lazymanifest_init_early(self);
198 if (!PyArg_ParseTuple(args, "S", &pydata)) { 191 if (!PyArg_ParseTuple(args, "nS", &nodelen, &pydata)) {
199 return -1; 192 return -1;
200 } 193 }
194 if (nodelen != 20 && nodelen != 32) {
195 /* See fixed buffer in nodeof */
196 PyErr_Format(PyExc_ValueError, "Unsupported node length");
197 return -1;
198 }
199 self->nodelen = nodelen;
200 self->dirty = false;
201
201 err = PyBytes_AsStringAndSize(pydata, &data, &len); 202 err = PyBytes_AsStringAndSize(pydata, &data, &len);
202
203 self->dirty = false;
204 if (err == -1) 203 if (err == -1)
205 return -1; 204 return -1;
206 self->pydata = pydata; 205 self->pydata = pydata;
207 Py_INCREF(self->pydata); 206 Py_INCREF(self->pydata);
208 Py_BEGIN_ALLOW_THREADS 207 Py_BEGIN_ALLOW_THREADS
289 return self->m->lines + self->pos; 288 return self->m->lines + self->pos;
290 } 289 }
291 290
292 static PyObject *lmiter_iterentriesnext(PyObject *o) 291 static PyObject *lmiter_iterentriesnext(PyObject *o)
293 { 292 {
293 lmIter *self = (lmIter *)o;
294 Py_ssize_t pl; 294 Py_ssize_t pl;
295 line *l; 295 line *l;
296 char flag; 296 char flag;
297 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; 297 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
298 l = lmiter_nextline((lmIter *)o); 298 l = lmiter_nextline(self);
299 if (!l) { 299 if (!l) {
300 goto done; 300 goto done;
301 } 301 }
302 pl = pathlen(l); 302 pl = pathlen(l);
303 path = PyBytes_FromStringAndSize(l->start, pl); 303 path = PyBytes_FromStringAndSize(l->start, pl);
304 hash = nodeof(l, &flag); 304 hash = nodeof(self->m->nodelen, l, &flag);
305 if (!path || !hash) { 305 if (!path || !hash) {
306 goto done; 306 goto done;
307 } 307 }
308 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0); 308 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
309 if (!flags) { 309 if (!flags) {
469 &linecmp); 469 &linecmp);
470 if (!hit || hit->deleted) { 470 if (!hit || hit->deleted) {
471 PyErr_Format(PyExc_KeyError, "No such manifest entry."); 471 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
472 return NULL; 472 return NULL;
473 } 473 }
474 return hashflags(hit); 474 return hashflags(self->nodelen, hit);
475 } 475 }
476 476
477 static int lazymanifest_delitem(lazymanifest *self, PyObject *key) 477 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
478 { 478 {
479 line needle; 479 line needle;
566 } 566 }
567 567
568 pyhash = PyTuple_GetItem(value, 0); 568 pyhash = PyTuple_GetItem(value, 0);
569 if (!PyBytes_Check(pyhash)) { 569 if (!PyBytes_Check(pyhash)) {
570 PyErr_Format(PyExc_TypeError, 570 PyErr_Format(PyExc_TypeError,
571 "node must be a 20 or 32 bytes string"); 571 "node must be a %zi bytes string", self->nodelen);
572 return -1; 572 return -1;
573 } 573 }
574 hlen = PyBytes_Size(pyhash); 574 hlen = PyBytes_Size(pyhash);
575 if (hlen != 20 && hlen != 32) { 575 if (hlen != self->nodelen) {
576 PyErr_Format(PyExc_TypeError, 576 PyErr_Format(PyExc_TypeError,
577 "node must be a 20 or 32 bytes string"); 577 "node must be a %zi bytes string", self->nodelen);
578 return -1; 578 return -1;
579 } 579 }
580 hash = PyBytes_AsString(pyhash); 580 hash = PyBytes_AsString(pyhash);
581 581
582 pyflags = PyTuple_GetItem(value, 1); 582 pyflags = PyTuple_GetItem(value, 1);
737 copy = PyObject_New(lazymanifest, &lazymanifestType); 737 copy = PyObject_New(lazymanifest, &lazymanifestType);
738 if (!copy) { 738 if (!copy) {
739 goto nomem; 739 goto nomem;
740 } 740 }
741 lazymanifest_init_early(copy); 741 lazymanifest_init_early(copy);
742 copy->nodelen = self->nodelen;
742 copy->numlines = self->numlines; 743 copy->numlines = self->numlines;
743 copy->livelines = self->livelines; 744 copy->livelines = self->livelines;
744 copy->dirty = false; 745 copy->dirty = false;
745 copy->lines = malloc(self->maxlines *sizeof(line)); 746 copy->lines = malloc(self->maxlines *sizeof(line));
746 if (!copy->lines) { 747 if (!copy->lines) {
775 copy = PyObject_New(lazymanifest, &lazymanifestType); 776 copy = PyObject_New(lazymanifest, &lazymanifestType);
776 if (!copy) { 777 if (!copy) {
777 goto nomem; 778 goto nomem;
778 } 779 }
779 lazymanifest_init_early(copy); 780 lazymanifest_init_early(copy);
781 copy->nodelen = self->nodelen;
780 copy->dirty = true; 782 copy->dirty = true;
781 copy->lines = malloc(self->maxlines * sizeof(line)); 783 copy->lines = malloc(self->maxlines * sizeof(line));
782 if (!copy->lines) { 784 if (!copy->lines) {
783 goto nomem; 785 goto nomem;
784 } 786 }
870 PyBytes_FromString(left->start) : 872 PyBytes_FromString(left->start) :
871 PyBytes_FromString(right->start); 873 PyBytes_FromString(right->start);
872 if (!key) 874 if (!key)
873 goto nomem; 875 goto nomem;
874 if (result < 0) { 876 if (result < 0) {
875 PyObject *l = hashflags(left); 877 PyObject *l = hashflags(self->nodelen, left);
876 if (!l) { 878 if (!l) {
877 goto nomem; 879 goto nomem;
878 } 880 }
879 outer = PyTuple_Pack(2, l, emptyTup); 881 outer = PyTuple_Pack(2, l, emptyTup);
880 Py_DECREF(l); 882 Py_DECREF(l);
883 } 885 }
884 PyDict_SetItem(ret, key, outer); 886 PyDict_SetItem(ret, key, outer);
885 Py_DECREF(outer); 887 Py_DECREF(outer);
886 sneedle++; 888 sneedle++;
887 } else if (result > 0) { 889 } else if (result > 0) {
888 PyObject *r = hashflags(right); 890 PyObject *r = hashflags(self->nodelen, right);
889 if (!r) { 891 if (!r) {
890 goto nomem; 892 goto nomem;
891 } 893 }
892 outer = PyTuple_Pack(2, emptyTup, r); 894 outer = PyTuple_Pack(2, emptyTup, r);
893 Py_DECREF(r); 895 Py_DECREF(r);
900 } else { 902 } else {
901 /* file exists in both manifests */ 903 /* file exists in both manifests */
902 if (left->len != right->len 904 if (left->len != right->len
903 || memcmp(left->start, right->start, left->len) 905 || memcmp(left->start, right->start, left->len)
904 || left->hash_suffix != right->hash_suffix) { 906 || left->hash_suffix != right->hash_suffix) {
905 PyObject *l = hashflags(left); 907 PyObject *l = hashflags(self->nodelen, left);
906 PyObject *r; 908 PyObject *r;
907 if (!l) { 909 if (!l) {
908 goto nomem; 910 goto nomem;
909 } 911 }
910 r = hashflags(right); 912 r = hashflags(self->nodelen, right);
911 if (!r) { 913 if (!r) {
912 Py_DECREF(l); 914 Py_DECREF(l);
913 goto nomem; 915 goto nomem;
914 } 916 }
915 outer = PyTuple_Pack(2, l, r); 917 outer = PyTuple_Pack(2, l, r);