diff mercurial/cext/manifest.c @ 47043:12450fbea288

manifests: push down expected node length into the parser This strictly enforces the node length in the manifest lines according to what the repository expects. One test case moves large hash testing into the non-treemanifest part as treemanifests don't provide an interface for overriding just the node length for now. Differential Revision: https://phab.mercurial-scm.org/D10533
author Joerg Sonnenberger <joerg@bec.de>
date Fri, 30 Apr 2021 02:11:58 +0200
parents 9719e118e4af
children be3af7eb2bbb
line wrap: on
line diff
--- a/mercurial/cext/manifest.c	Fri Apr 30 03:19:45 2021 +0200
+++ b/mercurial/cext/manifest.c	Fri Apr 30 02:11:58 2021 +0200
@@ -28,6 +28,7 @@
 typedef struct {
 	PyObject_HEAD
 	PyObject *pydata;
+	Py_ssize_t nodelen;
 	line *lines;
 	int numlines; /* number of line entries */
 	int livelines; /* number of non-deleted lines */
@@ -49,12 +50,11 @@
 }
 
 /* get the node value of a single line */
-static PyObject *nodeof(line *l, char *flag)
+static PyObject *nodeof(Py_ssize_t nodelen, line *l, char *flag)
 {
 	char *s = l->start;
 	Py_ssize_t llen = pathlen(l);
 	Py_ssize_t hlen = l->len - llen - 2;
-	Py_ssize_t hlen_raw;
 	PyObject *hash;
 	if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
 		PyErr_SetString(PyExc_ValueError, "manifest line too short");
@@ -73,36 +73,29 @@
 		break;
 	}
 
-	switch (hlen) {
-	case 40: /* sha1 */
-		hlen_raw = 20;
-		break;
-	case 64: /* new hash */
-		hlen_raw = 32;
-		break;
-	default:
+	if (hlen != 2 * nodelen) {
 		PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
 		return NULL;
 	}
-	hash = unhexlify(s + llen + 1, hlen_raw * 2);
+	hash = unhexlify(s + llen + 1, nodelen * 2);
 	if (!hash) {
 		return NULL;
 	}
 	if (l->hash_suffix != '\0') {
 		char newhash[33];
-		memcpy(newhash, PyBytes_AsString(hash), hlen_raw);
+		memcpy(newhash, PyBytes_AsString(hash), nodelen);
 		Py_DECREF(hash);
-		newhash[hlen_raw] = l->hash_suffix;
-		hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1);
+		newhash[nodelen] = l->hash_suffix;
+		hash = PyBytes_FromStringAndSize(newhash, nodelen + 1);
 	}
 	return hash;
 }
 
 /* get the node hash and flags of a line as a tuple */
-static PyObject *hashflags(line *l)
+static PyObject *hashflags(Py_ssize_t nodelen, line *l)
 {
 	char flag;
-	PyObject *hash = nodeof(l, &flag);
+	PyObject *hash = nodeof(nodelen, l, &flag);
 	PyObject *flags;
 	PyObject *tup;
 
@@ -190,17 +183,23 @@
 static int lazymanifest_init(lazymanifest *self, PyObject *args)
 {
 	char *data;
-	Py_ssize_t len;
+	Py_ssize_t nodelen, len;
 	int err, ret;
 	PyObject *pydata;
 
 	lazymanifest_init_early(self);
-	if (!PyArg_ParseTuple(args, "S", &pydata)) {
+	if (!PyArg_ParseTuple(args, "nS", &nodelen, &pydata)) {
 		return -1;
 	}
-	err = PyBytes_AsStringAndSize(pydata, &data, &len);
+	if (nodelen != 20 && nodelen != 32) {
+		/* See fixed buffer in nodeof */
+		PyErr_Format(PyExc_ValueError, "Unsupported node length");
+		return -1;
+	}
+	self->nodelen = nodelen;
+	self->dirty = false;
 
-	self->dirty = false;
+	err = PyBytes_AsStringAndSize(pydata, &data, &len);
 	if (err == -1)
 		return -1;
 	self->pydata = pydata;
@@ -291,17 +290,18 @@
 
 static PyObject *lmiter_iterentriesnext(PyObject *o)
 {
+	lmIter *self = (lmIter *)o;
 	Py_ssize_t pl;
 	line *l;
 	char flag;
 	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
-	l = lmiter_nextline((lmIter *)o);
+	l = lmiter_nextline(self);
 	if (!l) {
 		goto done;
 	}
 	pl = pathlen(l);
 	path = PyBytes_FromStringAndSize(l->start, pl);
-	hash = nodeof(l, &flag);
+	hash = nodeof(self->m->nodelen, l, &flag);
 	if (!path || !hash) {
 		goto done;
 	}
@@ -471,7 +471,7 @@
 		PyErr_Format(PyExc_KeyError, "No such manifest entry.");
 		return NULL;
 	}
-	return hashflags(hit);
+	return hashflags(self->nodelen, hit);
 }
 
 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
@@ -568,13 +568,13 @@
 	pyhash = PyTuple_GetItem(value, 0);
 	if (!PyBytes_Check(pyhash)) {
 		PyErr_Format(PyExc_TypeError,
-			     "node must be a 20 or 32 bytes string");
+			     "node must be a %zi bytes string", self->nodelen);
 		return -1;
 	}
 	hlen = PyBytes_Size(pyhash);
-	if (hlen != 20 && hlen != 32) {
+	if (hlen != self->nodelen) {
 		PyErr_Format(PyExc_TypeError,
-			     "node must be a 20 or 32 bytes string");
+			     "node must be a %zi bytes string", self->nodelen);
 		return -1;
 	}
 	hash = PyBytes_AsString(pyhash);
@@ -739,6 +739,7 @@
 		goto nomem;
 	}
 	lazymanifest_init_early(copy);
+	copy->nodelen = self->nodelen;
 	copy->numlines = self->numlines;
 	copy->livelines = self->livelines;
 	copy->dirty = false;
@@ -777,6 +778,7 @@
 		goto nomem;
 	}
 	lazymanifest_init_early(copy);
+	copy->nodelen = self->nodelen;
 	copy->dirty = true;
 	copy->lines = malloc(self->maxlines * sizeof(line));
 	if (!copy->lines) {
@@ -872,7 +874,7 @@
 		if (!key)
 			goto nomem;
 		if (result < 0) {
-			PyObject *l = hashflags(left);
+			PyObject *l = hashflags(self->nodelen, left);
 			if (!l) {
 				goto nomem;
 			}
@@ -885,7 +887,7 @@
 			Py_DECREF(outer);
 			sneedle++;
 		} else if (result > 0) {
-			PyObject *r = hashflags(right);
+			PyObject *r = hashflags(self->nodelen, right);
 			if (!r) {
 				goto nomem;
 			}
@@ -902,12 +904,12 @@
 			if (left->len != right->len
 			    || memcmp(left->start, right->start, left->len)
 			    || left->hash_suffix != right->hash_suffix) {
-				PyObject *l = hashflags(left);
+				PyObject *l = hashflags(self->nodelen, left);
 				PyObject *r;
 				if (!l) {
 					goto nomem;
 				}
-				r = hashflags(right);
+				r = hashflags(self->nodelen, right);
 				if (!r) {
 					Py_DECREF(l);
 					goto nomem;