# HG changeset patch # User Bernhard Leiner # Date 1224198218 -7200 # Node ID 1ca878d7b849bac64c5034798133678ea14b7067 # Parent 125c8fedcbe0f4448b9f28fa7ecccfbcebc4a701 C implementation of revlog index parsing diff -r 125c8fedcbe0 -r 1ca878d7b849 mercurial/parsers.c --- a/mercurial/parsers.c Fri Oct 17 11:34:31 2008 -0700 +++ b/mercurial/parsers.c Fri Oct 17 01:03:38 2008 +0200 @@ -234,11 +234,178 @@ return ret; } + +static inline uint64_t ntohll(uint64_t x) +{ + return (((uint64_t)ntohl((uint32_t)x)) << 32) | + (uint64_t)ntohl((uint32_t)(x >> 32)); +} + +const char nullid[20]; +const int nullrev = -1; + +/* RevlogNG format (all in big endian, data may be inlined): + * 6 bytes: offset + * 2 bytes: flags + * 4 bytes: compressed length + * 4 bytes: uncompressed length + * 4 bytes: base revision + * 4 bytes: link revision + * 4 bytes: parent 1 revision + * 4 bytes: parent 2 revision + * 32 bytes: nodeid (only 20 bytes used) + */ +static int _parse_index_ng (const char *data, int size, int inlined, + PyObject *index, PyObject *nodemap) +{ + PyObject *entry = NULL, *node_id = NULL, *n_obj = NULL; + PyObject *nullrev_obj = NULL, *nullid_obj = NULL; + int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + uint64_t offset_flags; + int n = 0; + const char *end = data + size; + + while (data < end) { + offset_flags = ntohll(*((uint64_t *) data)); + if (n == 0) /* mask out version number for the first entry */ + offset_flags &= 0xFFFF; + + comp_len = ntohl(*((uint32_t *) (data + 8))); + uncomp_len = ntohl(*((uint32_t *) (data + 12))); + base_rev = ntohl(*((uint32_t *) (data + 16))); + link_rev = ntohl(*((uint32_t *) (data + 20))); + parent_1 = ntohl(*((uint32_t *) (data + 24))); + parent_2 = ntohl(*((uint32_t *) (data + 28))); + node_id = PyString_FromStringAndSize(data + 32, 20); + n_obj = PyInt_FromLong(n); + if (!node_id || !n_obj || + PyDict_SetItem(nodemap, node_id, n_obj) != 0) + goto quit; + Py_DECREF(n_obj); + + entry = Py_BuildValue("LiiiiiiN", offset_flags, comp_len, + uncomp_len, base_rev, link_rev, + parent_1, parent_2, node_id); + PyObject_GC_UnTrack(entry); /* don't waste time with this */ + if (!entry) + goto quit; + + /* append to or set value in the index list */ + if (inlined) { + if (PyList_Append(index, entry) != 0) + goto quit; + Py_DECREF(entry); + } else { + PyList_SET_ITEM(index, n, entry); /* steals reference */ + } + + data += 64 + (inlined ? comp_len : 0); + n++; + } + if (data > end) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, "corrupt index file"); + goto quit; + } + + /* create the nullid/nullrev entry in the nodemap and the + * magic nullid entry in the index at [-1] */ + nullid_obj = PyString_FromStringAndSize(nullid, 20); + nullrev_obj = PyInt_FromLong(nullrev); + if (!nodemap || !nullid_obj || !nullrev_obj || + PyDict_SetItem(nodemap, nullid_obj, nullrev_obj) != 0) + goto quit; + Py_DECREF(nullrev_obj); + + entry = Py_BuildValue("iiiiiiiN", 0, 0, 0, -1, -1, -1, -1, nullid_obj); + PyObject_GC_UnTrack(entry); /* don't waste time with this */ + if (!entry) + goto quit; + if (inlined) { + if (PyList_Append(index, entry) != 0) + goto quit; + Py_DECREF(entry); + } else { + PyList_SET_ITEM(index, n, entry); /* steals reference */ + } + + return 1; + +quit: + Py_XDECREF(n_obj); + Py_XDECREF(node_id); + Py_XDECREF(entry); + Py_XDECREF(nullrev_obj); + Py_XDECREF(nullid_obj); + return 0; +} + + + +/* This function parses a index file and returns a Python tuple of the + * following format: (index, nodemap, cache) + * + * index: a list of tuples containing the RevlogNG records + * nodemap: a dict mapping node ids to indices in the index list + * cache: if data is inlined, a tuple (index_file_content, 0) else None + */ +static PyObject *parse_index(PyObject *self, PyObject *args) +{ + const char *data; + int size, inlined; + PyObject *rval = NULL, *index = NULL, *nodemap = NULL, *cache = NULL; + PyObject *data_obj = NULL, *inlined_obj; + + if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj)) + return NULL; + inlined = inlined_obj && PyObject_IsTrue(inlined_obj); + + /* If no data is inlined, we know the size of the index list in + * advance: size divided by size of one one revlog record (64 bytes) + * plus one for the nullid */ + index = inlined ? PyList_New(0) : PyList_New(size / 64 + 1); + if (!index) + goto quit; + + nodemap = PyDict_New(); + + /* set up the cache return value */ + if (inlined) { + /* Note that the reference to data_obj is only borrowed */ + data_obj = PyTuple_GET_ITEM(args, 0); + cache = Py_BuildValue("iO", 0, data_obj); + if (!cache) + goto quit; + } else { + cache = Py_None; + Py_INCREF(Py_None); + } + + /* actually populate the index and the nodemap with data */ + if (!_parse_index_ng (data, size, inlined, index, nodemap)) + goto quit; + + rval = Py_BuildValue("NNN", index, nodemap, cache); + if (!rval) + goto quit; + return rval; + +quit: + Py_XDECREF(index); + Py_XDECREF(nodemap); + Py_XDECREF(cache); + Py_XDECREF(rval); + Py_XDECREF(data_obj); + return NULL; +} + + static char parsers_doc[] = "Efficient content parsing."; static PyMethodDef methods[] = { {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, + {"parse_index", parse_index, METH_VARARGS, "parse a revlog index\n"}, {NULL, NULL} };