Mercurial > hg
changeset 32372:df448de7cf3b
parsers: switch to policy importer
# no-check-commit
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 13 Aug 2016 12:23:56 +0900 |
parents | 151cc3b3d799 |
children | 5700825889fb |
files | contrib/check-py3-compat.py contrib/import-checker.py contrib/wix/dist.wxs mercurial/__init__.py mercurial/cext/dirs.c mercurial/cext/manifest.c mercurial/cext/parsers.c mercurial/cext/pathencode.c mercurial/dirs.c mercurial/dirstate.py mercurial/encoding.py mercurial/manifest.c mercurial/manifest.py mercurial/obsolete.py mercurial/parsers.c mercurial/pathencode.c mercurial/pure/parsers.py mercurial/revlog.py mercurial/store.py mercurial/util.py setup.py tests/fakedirstatewritetime.py tests/test-parseindex2.py |
diffstat | 23 files changed, 4998 insertions(+), 4987 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/check-py3-compat.py Sat Aug 13 12:18:58 2016 +0900 +++ b/contrib/check-py3-compat.py Sat Aug 13 12:23:56 2016 +0900 @@ -17,7 +17,6 @@ # Modules that have both Python and C implementations. _dualmodules = ( - 'parsers.py', ) def check_compat_py2(f):
--- a/contrib/import-checker.py Sat Aug 13 12:18:58 2016 +0900 +++ b/contrib/import-checker.py Sat Aug 13 12:23:56 2016 +0900 @@ -26,7 +26,6 @@ # Modules that have both Python and C implementations. _dualmodules = ( - 'parsers.py', ) # Modules that must be aliased because they are commonly confused with
--- a/contrib/wix/dist.wxs Sat Aug 13 12:18:58 2016 +0900 +++ b/contrib/wix/dist.wxs Sat Aug 13 12:23:56 2016 +0900 @@ -17,7 +17,7 @@ <File Name="mercurial.cext.diffhelpers.pyd" /> <File Name="mercurial.cext.mpatch.pyd" /> <File Name="mercurial.cext.osutil.pyd" /> - <File Name="mercurial.parsers.pyd" /> + <File Name="mercurial.cext.parsers.pyd" /> <File Name="pyexpat.pyd" /> <File Name="bz2.pyd" /> <File Name="select.pyd" />
--- a/mercurial/__init__.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/__init__.py Sat Aug 13 12:23:56 2016 +0900 @@ -23,7 +23,6 @@ # Modules that have both Python and C implementations. See also the # set of .py files under mercurial/pure/. _dualmodules = { - 'mercurial.parsers', } class hgimporter(object):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/cext/dirs.c Sat Aug 13 12:23:56 2016 +0900 @@ -0,0 +1,315 @@ +/* + dirs.c - dynamic directory diddling for dirstates + + Copyright 2013 Facebook + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include "util.h" + +#ifdef IS_PY3K +#define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1] +#else +#define PYLONG_VALUE(o) PyInt_AS_LONG(o) +#endif + +/* + * This is a multiset of directory names, built from the files that + * appear in a dirstate or manifest. + * + * A few implementation notes: + * + * We modify Python integers for refcounting, but those integers are + * never visible to Python code. + * + * We mutate strings in-place, but leave them immutable once they can + * be seen by Python code. + */ +typedef struct { + PyObject_HEAD + PyObject *dict; +} dirsObject; + +static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos) +{ + while (pos != -1) { + if (path[pos] == '/') + break; + pos -= 1; + } + + return pos; +} + +static int _addpath(PyObject *dirs, PyObject *path) +{ + const char *cpath = PyBytes_AS_STRING(path); + Py_ssize_t pos = PyBytes_GET_SIZE(path); + PyObject *key = NULL; + int ret = -1; + + /* This loop is super critical for performance. That's why we inline + * access to Python structs instead of going through a supported API. + * The implementation, therefore, is heavily dependent on CPython + * implementation details. We also commit violations of the Python + * "protocol" such as mutating immutable objects. But since we only + * mutate objects created in this function or in other well-defined + * locations, the references are known so these violations should go + * unnoticed. The code for adjusting the length of a PyBytesObject is + * essentially a minimal version of _PyBytes_Resize. */ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + + /* It's likely that every prefix already has an entry + in our dict. Try to avoid allocating and + deallocating a string for each prefix we check. */ + if (key != NULL) + ((PyBytesObject *)key)->ob_shash = -1; + else { + /* Force Python to not reuse a small shared string. */ + key = PyBytes_FromStringAndSize(cpath, + pos < 2 ? 2 : pos); + if (key == NULL) + goto bail; + } + /* Py_SIZE(o) refers to the ob_size member of the struct. Yes, + * assigning to what looks like a function seems wrong. */ + Py_SIZE(key) = pos; + ((PyBytesObject *)key)->ob_sval[pos] = '\0'; + + val = PyDict_GetItem(dirs, key); + if (val != NULL) { + PYLONG_VALUE(val) += 1; + break; + } + + /* Force Python to not reuse a small shared int. */ +#ifdef IS_PY3K + val = PyLong_FromLong(0x1eadbeef); +#else + val = PyInt_FromLong(0x1eadbeef); +#endif + + if (val == NULL) + goto bail; + + PYLONG_VALUE(val) = 1; + ret = PyDict_SetItem(dirs, key, val); + Py_DECREF(val); + if (ret == -1) + goto bail; + Py_CLEAR(key); + } + ret = 0; + +bail: + Py_XDECREF(key); + + return ret; +} + +static int _delpath(PyObject *dirs, PyObject *path) +{ + char *cpath = PyBytes_AS_STRING(path); + Py_ssize_t pos = PyBytes_GET_SIZE(path); + PyObject *key = NULL; + int ret = -1; + + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + + key = PyBytes_FromStringAndSize(cpath, pos); + + if (key == NULL) + goto bail; + + val = PyDict_GetItem(dirs, key); + if (val == NULL) { + PyErr_SetString(PyExc_ValueError, + "expected a value, found none"); + goto bail; + } + + if (--PYLONG_VALUE(val) <= 0) { + if (PyDict_DelItem(dirs, key) == -1) + goto bail; + } else + break; + Py_CLEAR(key); + } + ret = 0; + +bail: + Py_XDECREF(key); + + return ret; +} + +static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar) +{ + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(source, &pos, &key, &value)) { + if (!PyBytes_Check(key)) { + PyErr_SetString(PyExc_TypeError, "expected string key"); + return -1; + } + if (skipchar) { + if (!dirstate_tuple_check(value)) { + PyErr_SetString(PyExc_TypeError, + "expected a dirstate tuple"); + return -1; + } + if (((dirstateTupleObject *)value)->state == skipchar) + continue; + } + + if (_addpath(dirs, key) == -1) + return -1; + } + + return 0; +} + +static int dirs_fromiter(PyObject *dirs, PyObject *source) +{ + PyObject *iter, *item = NULL; + int ret; + + iter = PyObject_GetIter(source); + if (iter == NULL) + return -1; + + while ((item = PyIter_Next(iter)) != NULL) { + if (!PyBytes_Check(item)) { + PyErr_SetString(PyExc_TypeError, "expected string"); + break; + } + + if (_addpath(dirs, item) == -1) + break; + Py_CLEAR(item); + } + + ret = PyErr_Occurred() ? -1 : 0; + Py_DECREF(iter); + Py_XDECREF(item); + return ret; +} + +/* + * Calculate a refcounted set of directory names for the files in a + * dirstate. + */ +static int dirs_init(dirsObject *self, PyObject *args) +{ + PyObject *dirs = NULL, *source = NULL; + char skipchar = 0; + int ret = -1; + + self->dict = NULL; + + if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar)) + return -1; + + dirs = PyDict_New(); + + if (dirs == NULL) + return -1; + + if (source == NULL) + ret = 0; + else if (PyDict_Check(source)) + ret = dirs_fromdict(dirs, source, skipchar); + else if (skipchar) + PyErr_SetString(PyExc_ValueError, + "skip character is only supported " + "with a dict source"); + else + ret = dirs_fromiter(dirs, source); + + if (ret == -1) + Py_XDECREF(dirs); + else + self->dict = dirs; + + return ret; +} + +PyObject *dirs_addpath(dirsObject *self, PyObject *args) +{ + PyObject *path; + + if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path)) + return NULL; + + if (_addpath(self->dict, path) == -1) + return NULL; + + Py_RETURN_NONE; +} + +static PyObject *dirs_delpath(dirsObject *self, PyObject *args) +{ + PyObject *path; + + if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path)) + return NULL; + + if (_delpath(self->dict, path) == -1) + return NULL; + + Py_RETURN_NONE; +} + +static int dirs_contains(dirsObject *self, PyObject *value) +{ + return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0; +} + +static void dirs_dealloc(dirsObject *self) +{ + Py_XDECREF(self->dict); + PyObject_Del(self); +} + +static PyObject *dirs_iter(dirsObject *self) +{ + return PyObject_GetIter(self->dict); +} + +static PySequenceMethods dirs_sequence_methods; + +static PyMethodDef dirs_methods[] = { + {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"}, + {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) }; + +void dirs_module_init(PyObject *mod) +{ + dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains; + dirsType.tp_name = "parsers.dirs"; + dirsType.tp_new = PyType_GenericNew; + dirsType.tp_basicsize = sizeof(dirsObject); + dirsType.tp_dealloc = (destructor)dirs_dealloc; + dirsType.tp_as_sequence = &dirs_sequence_methods; + dirsType.tp_flags = Py_TPFLAGS_DEFAULT; + dirsType.tp_doc = "dirs"; + dirsType.tp_iter = (getiterfunc)dirs_iter; + dirsType.tp_methods = dirs_methods; + dirsType.tp_init = (initproc)dirs_init; + + if (PyType_Ready(&dirsType) < 0) + return; + Py_INCREF(&dirsType); + + PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/cext/manifest.c Sat Aug 13 12:23:56 2016 +0900 @@ -0,0 +1,939 @@ +/* + * manifest.c - manifest type that does on-demand parsing. + * + * Copyright 2015, Google Inc. + * + * This software may be used and distributed according to the terms of + * the GNU General Public License, incorporated herein by reference. + */ +#include <Python.h> + +#include <assert.h> +#include <string.h> +#include <stdlib.h> + +#include "util.h" + +#define DEFAULT_LINES 100000 + +typedef struct { + char *start; + Py_ssize_t len; /* length of line including terminal newline */ + char hash_suffix; + bool from_malloc; + bool deleted; +} line; + +typedef struct { + PyObject_HEAD + PyObject *pydata; + line *lines; + int numlines; /* number of line entries */ + int livelines; /* number of non-deleted lines */ + int maxlines; /* allocated number of lines */ + bool dirty; +} lazymanifest; + +#define MANIFEST_OOM -1 +#define MANIFEST_NOT_SORTED -2 +#define MANIFEST_MALFORMED -3 + +/* defined in parsers.c */ +PyObject *unhexlify(const char *str, int len); + +/* get the length of the path for a line */ +static size_t pathlen(line *l) { + return strlen(l->start); +} + +/* get the node value of a single line */ +static PyObject *nodeof(line *l) { + char *s = l->start; + ssize_t llen = pathlen(l); + PyObject *hash = unhexlify(s + llen + 1, 40); + if (!hash) { + return NULL; + } + if (l->hash_suffix != '\0') { + char newhash[21]; + memcpy(newhash, PyBytes_AsString(hash), 20); + Py_DECREF(hash); + newhash[20] = l->hash_suffix; + hash = PyBytes_FromStringAndSize(newhash, 21); + } + return hash; +} + +/* get the node hash and flags of a line as a tuple */ +static PyObject *hashflags(line *l) +{ + char *s = l->start; + size_t plen = pathlen(l); + PyObject *hash = nodeof(l); + + /* 40 for hash, 1 for null byte, 1 for newline */ + size_t hplen = plen + 42; + Py_ssize_t flen = l->len - hplen; + PyObject *flags; + PyObject *tup; + + if (!hash) + return NULL; + flags = PyBytes_FromStringAndSize(s + hplen - 1, flen); + if (!flags) { + Py_DECREF(hash); + return NULL; + } + tup = PyTuple_Pack(2, hash, flags); + Py_DECREF(flags); + Py_DECREF(hash); + return tup; +} + +/* if we're about to run out of space in the line index, add more */ +static bool realloc_if_full(lazymanifest *self) +{ + if (self->numlines == self->maxlines) { + self->maxlines *= 2; + self->lines = realloc(self->lines, self->maxlines * sizeof(line)); + } + return !!self->lines; +} + +/* + * Find the line boundaries in the manifest that 'data' points to and store + * information about each line in 'self'. + */ +static int find_lines(lazymanifest *self, char *data, Py_ssize_t len) +{ + char *prev = NULL; + while (len > 0) { + line *l; + char *next = memchr(data, '\n', len); + if (!next) { + return MANIFEST_MALFORMED; + } + next++; /* advance past newline */ + if (!realloc_if_full(self)) { + return MANIFEST_OOM; /* no memory */ + } + if (prev && strcmp(prev, data) > -1) { + /* This data isn't sorted, so we have to abort. */ + return MANIFEST_NOT_SORTED; + } + l = self->lines + ((self->numlines)++); + l->start = data; + l->len = next - data; + l->hash_suffix = '\0'; + l->from_malloc = false; + l->deleted = false; + len = len - l->len; + prev = data; + data = next; + } + self->livelines = self->numlines; + return 0; +} + +static int lazymanifest_init(lazymanifest *self, PyObject *args) +{ + char *data; + Py_ssize_t len; + int err, ret; + PyObject *pydata; + if (!PyArg_ParseTuple(args, "S", &pydata)) { + return -1; + } + err = PyBytes_AsStringAndSize(pydata, &data, &len); + + self->dirty = false; + if (err == -1) + return -1; + self->pydata = pydata; + Py_INCREF(self->pydata); + Py_BEGIN_ALLOW_THREADS + self->lines = malloc(DEFAULT_LINES * sizeof(line)); + self->maxlines = DEFAULT_LINES; + self->numlines = 0; + if (!self->lines) + ret = MANIFEST_OOM; + else + ret = find_lines(self, data, len); + Py_END_ALLOW_THREADS + switch (ret) { + case 0: + break; + case MANIFEST_OOM: + PyErr_NoMemory(); + break; + case MANIFEST_NOT_SORTED: + PyErr_Format(PyExc_ValueError, + "Manifest lines not in sorted order."); + break; + case MANIFEST_MALFORMED: + PyErr_Format(PyExc_ValueError, + "Manifest did not end in a newline."); + break; + default: + PyErr_Format(PyExc_ValueError, + "Unknown problem parsing manifest."); + } + return ret == 0 ? 0 : -1; +} + +static void lazymanifest_dealloc(lazymanifest *self) +{ + /* free any extra lines we had to allocate */ + int i; + for (i = 0; i < self->numlines; i++) { + if (self->lines[i].from_malloc) { + free(self->lines[i].start); + } + } + if (self->lines) { + free(self->lines); + self->lines = NULL; + } + if (self->pydata) { + Py_DECREF(self->pydata); + self->pydata = NULL; + } + PyObject_Del(self); +} + +/* iteration support */ + +typedef struct { + PyObject_HEAD lazymanifest *m; + Py_ssize_t pos; +} lmIter; + +static void lmiter_dealloc(PyObject *o) +{ + lmIter *self = (lmIter *)o; + Py_DECREF(self->m); + PyObject_Del(self); +} + +static line *lmiter_nextline(lmIter *self) +{ + do { + self->pos++; + if (self->pos >= self->m->numlines) { + return NULL; + } + /* skip over deleted manifest entries */ + } while (self->m->lines[self->pos].deleted); + return self->m->lines + self->pos; +} + +static PyObject *lmiter_iterentriesnext(PyObject *o) +{ + size_t pl; + line *l; + Py_ssize_t consumed; + PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; + l = lmiter_nextline((lmIter *)o); + if (!l) { + goto done; + } + pl = pathlen(l); + path = PyBytes_FromStringAndSize(l->start, pl); + hash = nodeof(l); + consumed = pl + 41; + flags = PyBytes_FromStringAndSize(l->start + consumed, + l->len - consumed - 1); + if (!path || !hash || !flags) { + goto done; + } + ret = PyTuple_Pack(3, path, hash, flags); +done: + Py_XDECREF(path); + Py_XDECREF(hash); + Py_XDECREF(flags); + return ret; +} + +#ifdef IS_PY3K +#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT +#else +#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ + | Py_TPFLAGS_HAVE_ITER +#endif + +static PyTypeObject lazymanifestEntriesIterator = { + PyVarObject_HEAD_INIT(NULL, 0) + "parsers.lazymanifest.entriesiterator", /*tp_name */ + sizeof(lmIter), /*tp_basicsize */ + 0, /*tp_itemsize */ + lmiter_dealloc, /*tp_dealloc */ + 0, /*tp_print */ + 0, /*tp_getattr */ + 0, /*tp_setattr */ + 0, /*tp_compare */ + 0, /*tp_repr */ + 0, /*tp_as_number */ + 0, /*tp_as_sequence */ + 0, /*tp_as_mapping */ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */ + "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter: __iter__() method */ + lmiter_iterentriesnext, /* tp_iternext: next() method */ +}; + +static PyObject *lmiter_iterkeysnext(PyObject *o) +{ + size_t pl; + line *l = lmiter_nextline((lmIter *)o); + if (!l) { + return NULL; + } + pl = pathlen(l); + return PyBytes_FromStringAndSize(l->start, pl); +} + +#ifdef IS_PY3K +#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT +#else +#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ + | Py_TPFLAGS_HAVE_ITER +#endif + +static PyTypeObject lazymanifestKeysIterator = { + PyVarObject_HEAD_INIT(NULL, 0) + "parsers.lazymanifest.keysiterator", /*tp_name */ + sizeof(lmIter), /*tp_basicsize */ + 0, /*tp_itemsize */ + lmiter_dealloc, /*tp_dealloc */ + 0, /*tp_print */ + 0, /*tp_getattr */ + 0, /*tp_setattr */ + 0, /*tp_compare */ + 0, /*tp_repr */ + 0, /*tp_as_number */ + 0, /*tp_as_sequence */ + 0, /*tp_as_mapping */ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */ + "Keys iterator for a lazymanifest.", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter: __iter__() method */ + lmiter_iterkeysnext, /* tp_iternext: next() method */ +}; + +static lazymanifest *lazymanifest_copy(lazymanifest *self); + +static PyObject *lazymanifest_getentriesiter(lazymanifest *self) +{ + lmIter *i = NULL; + lazymanifest *t = lazymanifest_copy(self); + if (!t) { + PyErr_NoMemory(); + return NULL; + } + i = PyObject_New(lmIter, &lazymanifestEntriesIterator); + if (i) { + i->m = t; + i->pos = -1; + } else { + Py_DECREF(t); + PyErr_NoMemory(); + } + return (PyObject *)i; +} + +static PyObject *lazymanifest_getkeysiter(lazymanifest *self) +{ + lmIter *i = NULL; + lazymanifest *t = lazymanifest_copy(self); + if (!t) { + PyErr_NoMemory(); + return NULL; + } + i = PyObject_New(lmIter, &lazymanifestKeysIterator); + if (i) { + i->m = t; + i->pos = -1; + } else { + Py_DECREF(t); + PyErr_NoMemory(); + } + return (PyObject *)i; +} + +/* __getitem__ and __setitem__ support */ + +static Py_ssize_t lazymanifest_size(lazymanifest *self) +{ + return self->livelines; +} + +static int linecmp(const void *left, const void *right) +{ + return strcmp(((const line *)left)->start, + ((const line *)right)->start); +} + +static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key) +{ + line needle; + line *hit; + if (!PyBytes_Check(key)) { + PyErr_Format(PyExc_TypeError, + "getitem: manifest keys must be a string."); + return NULL; + } + needle.start = PyBytes_AsString(key); + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), + &linecmp); + if (!hit || hit->deleted) { + PyErr_Format(PyExc_KeyError, "No such manifest entry."); + return NULL; + } + return hashflags(hit); +} + +static int lazymanifest_delitem(lazymanifest *self, PyObject *key) +{ + line needle; + line *hit; + if (!PyBytes_Check(key)) { + PyErr_Format(PyExc_TypeError, + "delitem: manifest keys must be a string."); + return -1; + } + needle.start = PyBytes_AsString(key); + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), + &linecmp); + if (!hit || hit->deleted) { + PyErr_Format(PyExc_KeyError, + "Tried to delete nonexistent manifest entry."); + return -1; + } + self->dirty = true; + hit->deleted = true; + self->livelines--; + return 0; +} + +/* Do a binary search for the insertion point for new, creating the + * new entry if needed. */ +static int internalsetitem(lazymanifest *self, line *new) { + int start = 0, end = self->numlines; + while (start < end) { + int pos = start + (end - start) / 2; + int c = linecmp(new, self->lines + pos); + if (c < 0) + end = pos; + else if (c > 0) + start = pos + 1; + else { + if (self->lines[pos].deleted) + self->livelines++; + if (self->lines[pos].from_malloc) + free(self->lines[pos].start); + start = pos; + goto finish; + } + } + /* being here means we need to do an insert */ + if (!realloc_if_full(self)) { + PyErr_NoMemory(); + return -1; + } + memmove(self->lines + start + 1, self->lines + start, + (self->numlines - start) * sizeof(line)); + self->numlines++; + self->livelines++; +finish: + self->lines[start] = *new; + self->dirty = true; + return 0; +} + +static int lazymanifest_setitem( + lazymanifest *self, PyObject *key, PyObject *value) +{ + char *path; + Py_ssize_t plen; + PyObject *pyhash; + Py_ssize_t hlen; + char *hash; + PyObject *pyflags; + char *flags; + Py_ssize_t flen; + size_t dlen; + char *dest; + int i; + line new; + if (!PyBytes_Check(key)) { + PyErr_Format(PyExc_TypeError, + "setitem: manifest keys must be a string."); + return -1; + } + if (!value) { + return lazymanifest_delitem(self, key); + } + if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) { + PyErr_Format(PyExc_TypeError, + "Manifest values must be a tuple of (node, flags)."); + return -1; + } + if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) { + return -1; + } + + pyhash = PyTuple_GetItem(value, 0); + if (!PyBytes_Check(pyhash)) { + PyErr_Format(PyExc_TypeError, + "node must be a 20-byte string"); + return -1; + } + hlen = PyBytes_Size(pyhash); + /* Some parts of the codebase try and set 21 or 22 + * byte "hash" values in order to perturb things for + * status. We have to preserve at least the 21st + * byte. Sigh. If there's a 22nd byte, we drop it on + * the floor, which works fine. + */ + if (hlen != 20 && hlen != 21 && hlen != 22) { + PyErr_Format(PyExc_TypeError, + "node must be a 20-byte string"); + return -1; + } + hash = PyBytes_AsString(pyhash); + + pyflags = PyTuple_GetItem(value, 1); + if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) { + PyErr_Format(PyExc_TypeError, + "flags must a 0 or 1 byte string"); + return -1; + } + if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) { + return -1; + } + /* one null byte and one newline */ + dlen = plen + 41 + flen + 1; + dest = malloc(dlen); + if (!dest) { + PyErr_NoMemory(); + return -1; + } + memcpy(dest, path, plen + 1); + for (i = 0; i < 20; i++) { + /* Cast to unsigned, so it will not get sign-extended when promoted + * to int (as is done when passing to a variadic function) + */ + sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]); + } + memcpy(dest + plen + 41, flags, flen); + dest[plen + 41 + flen] = '\n'; + new.start = dest; + new.len = dlen; + new.hash_suffix = '\0'; + if (hlen > 20) { + new.hash_suffix = hash[20]; + } + new.from_malloc = true; /* is `start` a pointer we allocated? */ + new.deleted = false; /* is this entry deleted? */ + if (internalsetitem(self, &new)) { + return -1; + } + return 0; +} + +static PyMappingMethods lazymanifest_mapping_methods = { + (lenfunc)lazymanifest_size, /* mp_length */ + (binaryfunc)lazymanifest_getitem, /* mp_subscript */ + (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */ +}; + +/* sequence methods (important or __contains__ builds an iterator) */ + +static int lazymanifest_contains(lazymanifest *self, PyObject *key) +{ + line needle; + line *hit; + if (!PyBytes_Check(key)) { + /* Our keys are always strings, so if the contains + * check is for a non-string, just return false. */ + return 0; + } + needle.start = PyBytes_AsString(key); + hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), + &linecmp); + if (!hit || hit->deleted) { + return 0; + } + return 1; +} + +static PySequenceMethods lazymanifest_seq_meths = { + (lenfunc)lazymanifest_size, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + 0, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)lazymanifest_contains, /* sq_contains */ + 0, /* sq_inplace_concat */ + 0, /* sq_inplace_repeat */ +}; + + +/* Other methods (copy, diff, etc) */ +static PyTypeObject lazymanifestType; + +/* If the manifest has changes, build the new manifest text and reindex it. */ +static int compact(lazymanifest *self) { + int i; + ssize_t need = 0; + char *data; + line *src, *dst; + PyObject *pydata; + if (!self->dirty) + return 0; + for (i = 0; i < self->numlines; i++) { + if (!self->lines[i].deleted) { + need += self->lines[i].len; + } + } + pydata = PyBytes_FromStringAndSize(NULL, need); + if (!pydata) + return -1; + data = PyBytes_AsString(pydata); + if (!data) { + return -1; + } + src = self->lines; + dst = self->lines; + for (i = 0; i < self->numlines; i++, src++) { + char *tofree = NULL; + if (src->from_malloc) { + tofree = src->start; + } + if (!src->deleted) { + memcpy(data, src->start, src->len); + *dst = *src; + dst->start = data; + dst->from_malloc = false; + data += dst->len; + dst++; + } + free(tofree); + } + Py_DECREF(self->pydata); + self->pydata = pydata; + self->numlines = self->livelines; + self->dirty = false; + return 0; +} + +static PyObject *lazymanifest_text(lazymanifest *self) +{ + if (compact(self) != 0) { + PyErr_NoMemory(); + return NULL; + } + Py_INCREF(self->pydata); + return self->pydata; +} + +static lazymanifest *lazymanifest_copy(lazymanifest *self) +{ + lazymanifest *copy = NULL; + if (compact(self) != 0) { + goto nomem; + } + copy = PyObject_New(lazymanifest, &lazymanifestType); + if (!copy) { + goto nomem; + } + copy->numlines = self->numlines; + copy->livelines = self->livelines; + copy->dirty = false; + copy->lines = malloc(self->maxlines *sizeof(line)); + if (!copy->lines) { + goto nomem; + } + memcpy(copy->lines, self->lines, self->numlines * sizeof(line)); + copy->maxlines = self->maxlines; + copy->pydata = self->pydata; + Py_INCREF(copy->pydata); + return copy; +nomem: + PyErr_NoMemory(); + Py_XDECREF(copy); + return NULL; +} + +static lazymanifest *lazymanifest_filtercopy( + lazymanifest *self, PyObject *matchfn) +{ + lazymanifest *copy = NULL; + int i; + if (!PyCallable_Check(matchfn)) { + PyErr_SetString(PyExc_TypeError, "matchfn must be callable"); + return NULL; + } + /* compact ourselves first to avoid double-frees later when we + * compact tmp so that it doesn't have random pointers to our + * underlying from_malloc-data (self->pydata is safe) */ + if (compact(self) != 0) { + goto nomem; + } + copy = PyObject_New(lazymanifest, &lazymanifestType); + if (!copy) { + goto nomem; + } + copy->dirty = true; + copy->lines = malloc(self->maxlines * sizeof(line)); + if (!copy->lines) { + goto nomem; + } + copy->maxlines = self->maxlines; + copy->numlines = 0; + copy->pydata = self->pydata; + Py_INCREF(self->pydata); + for (i = 0; i < self->numlines; i++) { + PyObject *arglist = NULL, *result = NULL; + arglist = Py_BuildValue("(s)", self->lines[i].start); + if (!arglist) { + return NULL; + } + result = PyObject_CallObject(matchfn, arglist); + Py_DECREF(arglist); + /* if the callback raised an exception, just let it + * through and give up */ + if (!result) { + free(copy->lines); + Py_DECREF(self->pydata); + return NULL; + } + if (PyObject_IsTrue(result)) { + assert(!(self->lines[i].from_malloc)); + copy->lines[copy->numlines++] = self->lines[i]; + } + Py_DECREF(result); + } + copy->livelines = copy->numlines; + return copy; +nomem: + PyErr_NoMemory(); + Py_XDECREF(copy); + return NULL; +} + +static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args) +{ + lazymanifest *other; + PyObject *pyclean = NULL; + bool listclean; + PyObject *emptyTup = NULL, *ret = NULL; + PyObject *es; + int sneedle = 0, oneedle = 0; + if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) { + return NULL; + } + listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean); + es = PyBytes_FromString(""); + if (!es) { + goto nomem; + } + emptyTup = PyTuple_Pack(2, Py_None, es); + Py_DECREF(es); + if (!emptyTup) { + goto nomem; + } + ret = PyDict_New(); + if (!ret) { + goto nomem; + } + while (sneedle != self->numlines || oneedle != other->numlines) { + line *left = self->lines + sneedle; + line *right = other->lines + oneedle; + int result; + PyObject *key; + PyObject *outer; + /* If we're looking at a deleted entry and it's not + * the end of the manifest, just skip it. */ + if (left->deleted && sneedle < self->numlines) { + sneedle++; + continue; + } + if (right->deleted && oneedle < other->numlines) { + oneedle++; + continue; + } + /* if we're at the end of either manifest, then we + * know the remaining items are adds so we can skip + * the strcmp. */ + if (sneedle == self->numlines) { + result = 1; + } else if (oneedle == other->numlines) { + result = -1; + } else { + result = linecmp(left, right); + } + key = result <= 0 ? + PyBytes_FromString(left->start) : + PyBytes_FromString(right->start); + if (!key) + goto nomem; + if (result < 0) { + PyObject *l = hashflags(left); + if (!l) { + goto nomem; + } + outer = PyTuple_Pack(2, l, emptyTup); + Py_DECREF(l); + if (!outer) { + goto nomem; + } + PyDict_SetItem(ret, key, outer); + Py_DECREF(outer); + sneedle++; + } else if (result > 0) { + PyObject *r = hashflags(right); + if (!r) { + goto nomem; + } + outer = PyTuple_Pack(2, emptyTup, r); + Py_DECREF(r); + if (!outer) { + goto nomem; + } + PyDict_SetItem(ret, key, outer); + Py_DECREF(outer); + oneedle++; + } else { + /* file exists in both manifests */ + if (left->len != right->len + || memcmp(left->start, right->start, left->len) + || left->hash_suffix != right->hash_suffix) { + PyObject *l = hashflags(left); + PyObject *r; + if (!l) { + goto nomem; + } + r = hashflags(right); + if (!r) { + Py_DECREF(l); + goto nomem; + } + outer = PyTuple_Pack(2, l, r); + Py_DECREF(l); + Py_DECREF(r); + if (!outer) { + goto nomem; + } + PyDict_SetItem(ret, key, outer); + Py_DECREF(outer); + } else if (listclean) { + PyDict_SetItem(ret, key, Py_None); + } + sneedle++; + oneedle++; + } + Py_DECREF(key); + } + Py_DECREF(emptyTup); + return ret; +nomem: + PyErr_NoMemory(); + Py_XDECREF(ret); + Py_XDECREF(emptyTup); + return NULL; +} + +static PyMethodDef lazymanifest_methods[] = { + {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS, + "Iterate over file names in this lazymanifest."}, + {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS, + "Iterate over (path, nodeid, flags) tuples in this lazymanifest."}, + {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS, + "Make a copy of this lazymanifest."}, + {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O, + "Make a copy of this manifest filtered by matchfn."}, + {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS, + "Compare this lazymanifest to another one."}, + {"text", (PyCFunction)lazymanifest_text, METH_NOARGS, + "Encode this manifest to text."}, + {NULL}, +}; + +#ifdef IS_PY3K +#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT +#else +#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN +#endif + +static PyTypeObject lazymanifestType = { + PyVarObject_HEAD_INIT(NULL, 0) + "parsers.lazymanifest", /* tp_name */ + sizeof(lazymanifest), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)lazymanifest_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &lazymanifest_seq_meths, /* tp_as_sequence */ + &lazymanifest_mapping_methods, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + LAZYMANIFEST_TPFLAGS, /* tp_flags */ + "TODO(augie)", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */ + 0, /* tp_iternext */ + lazymanifest_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)lazymanifest_init, /* tp_init */ + 0, /* tp_alloc */ +}; + +void manifest_module_init(PyObject * mod) +{ + lazymanifestType.tp_new = PyType_GenericNew; + if (PyType_Ready(&lazymanifestType) < 0) + return; + Py_INCREF(&lazymanifestType); + + PyModule_AddObject(mod, "lazymanifest", + (PyObject *)&lazymanifestType); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/cext/parsers.c Sat Aug 13 12:23:56 2016 +0900 @@ -0,0 +1,2947 @@ +/* + parsers.c - efficient content parsing + + Copyright 2008 Matt Mackall <mpm@selenic.com> and others + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#include <Python.h> +#include <ctype.h> +#include <stddef.h> +#include <string.h> + +#include "util.h" +#include "bitmanipulation.h" + +#ifdef IS_PY3K +/* The mapping of Python types is meant to be temporary to get Python + * 3 to compile. We should remove this once Python 3 support is fully + * supported and proper types are used in the extensions themselves. */ +#define PyInt_Type PyLong_Type +#define PyInt_Check PyLong_Check +#define PyInt_FromLong PyLong_FromLong +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AS_LONG PyLong_AS_LONG +#define PyInt_AsLong PyLong_AsLong +#endif + +static char *versionerrortext = "Python minor version mismatch"; + +static int8_t hextable[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */ + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +static char lowertable[128] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', + '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ + '\x78', '\x79', '\x7a', /* X-Z */ + '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' +}; + +static char uppertable[128] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', + '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ + '\x58', '\x59', '\x5a', /* x-z */ + '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' +}; + +static inline int hexdigit(const char *p, Py_ssize_t off) +{ + int8_t val = hextable[(unsigned char)p[off]]; + + if (val >= 0) { + return val; + } + + PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); + return 0; +} + +/* + * Turn a hex-encoded string into binary. + */ +PyObject *unhexlify(const char *str, int len) +{ + PyObject *ret; + char *d; + int i; + + ret = PyBytes_FromStringAndSize(NULL, len / 2); + + if (!ret) + return NULL; + + d = PyBytes_AsString(ret); + + for (i = 0; i < len;) { + int hi = hexdigit(str, i++); + int lo = hexdigit(str, i++); + *d++ = (hi << 4) | lo; + } + + return ret; +} + +static inline PyObject *_asciitransform(PyObject *str_obj, + const char table[128], + PyObject *fallback_fn) +{ + char *str, *newstr; + Py_ssize_t i, len; + PyObject *newobj = NULL; + PyObject *ret = NULL; + + str = PyBytes_AS_STRING(str_obj); + len = PyBytes_GET_SIZE(str_obj); + + newobj = PyBytes_FromStringAndSize(NULL, len); + if (!newobj) + goto quit; + + newstr = PyBytes_AS_STRING(newobj); + + for (i = 0; i < len; i++) { + char c = str[i]; + if (c & 0x80) { + if (fallback_fn != NULL) { + ret = PyObject_CallFunctionObjArgs(fallback_fn, + str_obj, NULL); + } else { + PyObject *err = PyUnicodeDecodeError_Create( + "ascii", str, len, i, (i + 1), + "unexpected code byte"); + PyErr_SetObject(PyExc_UnicodeDecodeError, err); + Py_XDECREF(err); + } + goto quit; + } + newstr[i] = table[(unsigned char)c]; + } + + ret = newobj; + Py_INCREF(ret); +quit: + Py_XDECREF(newobj); + return ret; +} + +static PyObject *asciilower(PyObject *self, PyObject *args) +{ + PyObject *str_obj; + if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) + return NULL; + return _asciitransform(str_obj, lowertable, NULL); +} + +static PyObject *asciiupper(PyObject *self, PyObject *args) +{ + PyObject *str_obj; + if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) + return NULL; + return _asciitransform(str_obj, uppertable, NULL); +} + +static inline PyObject *_dict_new_presized(Py_ssize_t expected_size) +{ + /* _PyDict_NewPresized expects a minused parameter, but it actually + creates a dictionary that's the nearest power of two bigger than the + parameter. For example, with the initial minused = 1000, the + dictionary created has size 1024. Of course in a lot of cases that + can be greater than the maximum load factor Python's dict object + expects (= 2/3), so as soon as we cross the threshold we'll resize + anyway. So create a dictionary that's at least 3/2 the size. */ + return _PyDict_NewPresized(((1 + expected_size) / 2) * 3); +} + +static PyObject *dict_new_presized(PyObject *self, PyObject *args) +{ + Py_ssize_t expected_size; + + if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) + return NULL; + + return _dict_new_presized(expected_size); +} + +static PyObject *make_file_foldmap(PyObject *self, PyObject *args) +{ + PyObject *dmap, *spec_obj, *normcase_fallback; + PyObject *file_foldmap = NULL; + enum normcase_spec spec; + PyObject *k, *v; + dirstateTupleObject *tuple; + Py_ssize_t pos = 0; + const char *table; + + if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", + &PyDict_Type, &dmap, + &PyInt_Type, &spec_obj, + &PyFunction_Type, &normcase_fallback)) + goto quit; + + spec = (int)PyInt_AS_LONG(spec_obj); + switch (spec) { + case NORMCASE_LOWER: + table = lowertable; + break; + case NORMCASE_UPPER: + table = uppertable; + break; + case NORMCASE_OTHER: + table = NULL; + break; + default: + PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); + goto quit; + } + + /* Add some more entries to deal with additions outside this + function. */ + file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); + if (file_foldmap == NULL) + goto quit; + + while (PyDict_Next(dmap, &pos, &k, &v)) { + if (!dirstate_tuple_check(v)) { + PyErr_SetString(PyExc_TypeError, + "expected a dirstate tuple"); + goto quit; + } + + tuple = (dirstateTupleObject *)v; + if (tuple->state != 'r') { + PyObject *normed; + if (table != NULL) { + normed = _asciitransform(k, table, + normcase_fallback); + } else { + normed = PyObject_CallFunctionObjArgs( + normcase_fallback, k, NULL); + } + + if (normed == NULL) + goto quit; + if (PyDict_SetItem(file_foldmap, normed, k) == -1) { + Py_DECREF(normed); + goto quit; + } + Py_DECREF(normed); + } + } + return file_foldmap; +quit: + Py_XDECREF(file_foldmap); + return NULL; +} + +/* + * This code assumes that a manifest is stitched together with newline + * ('\n') characters. + */ +static PyObject *parse_manifest(PyObject *self, PyObject *args) +{ + PyObject *mfdict, *fdict; + char *str, *start, *end; + int len; + + if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", + &PyDict_Type, &mfdict, + &PyDict_Type, &fdict, + &str, &len)) + goto quit; + + start = str; + end = str + len; + while (start < end) { + PyObject *file = NULL, *node = NULL; + PyObject *flags = NULL; + char *zero = NULL, *newline = NULL; + ptrdiff_t nlen; + + zero = memchr(start, '\0', end - start); + if (!zero) { + PyErr_SetString(PyExc_ValueError, + "manifest entry has no separator"); + goto quit; + } + + newline = memchr(zero + 1, '\n', end - (zero + 1)); + if (!newline) { + PyErr_SetString(PyExc_ValueError, + "manifest contains trailing garbage"); + goto quit; + } + + file = PyBytes_FromStringAndSize(start, zero - start); + + if (!file) + goto bail; + + nlen = newline - zero - 1; + + node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen); + if (!node) + goto bail; + + if (nlen > 40) { + flags = PyBytes_FromStringAndSize(zero + 41, + nlen - 40); + if (!flags) + goto bail; + + if (PyDict_SetItem(fdict, file, flags) == -1) + goto bail; + } + + if (PyDict_SetItem(mfdict, file, node) == -1) + goto bail; + + start = newline + 1; + + Py_XDECREF(flags); + Py_XDECREF(node); + Py_XDECREF(file); + continue; + bail: + Py_XDECREF(flags); + Py_XDECREF(node); + Py_XDECREF(file); + goto quit; + } + + Py_INCREF(Py_None); + return Py_None; +quit: + return NULL; +} + +static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode, + int size, int mtime) +{ + dirstateTupleObject *t = PyObject_New(dirstateTupleObject, + &dirstateTupleType); + if (!t) + return NULL; + t->state = state; + t->mode = mode; + t->size = size; + t->mtime = mtime; + return t; +} + +static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args, + PyObject *kwds) +{ + /* We do all the initialization here and not a tp_init function because + * dirstate_tuple is immutable. */ + dirstateTupleObject *t; + char state; + int size, mode, mtime; + if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) + return NULL; + + t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1); + if (!t) + return NULL; + t->state = state; + t->mode = mode; + t->size = size; + t->mtime = mtime; + + return (PyObject *)t; +} + +static void dirstate_tuple_dealloc(PyObject *o) +{ + PyObject_Del(o); +} + +static Py_ssize_t dirstate_tuple_length(PyObject *o) +{ + return 4; +} + +static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i) +{ + dirstateTupleObject *t = (dirstateTupleObject *)o; + switch (i) { + case 0: + return PyBytes_FromStringAndSize(&t->state, 1); + case 1: + return PyInt_FromLong(t->mode); + case 2: + return PyInt_FromLong(t->size); + case 3: + return PyInt_FromLong(t->mtime); + default: + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } +} + +static PySequenceMethods dirstate_tuple_sq = { + dirstate_tuple_length, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + dirstate_tuple_item, /* sq_item */ + 0, /* sq_ass_item */ + 0, /* sq_contains */ + 0, /* sq_inplace_concat */ + 0 /* sq_inplace_repeat */ +}; + +PyTypeObject dirstateTupleType = { + PyVarObject_HEAD_INIT(NULL, 0) + "dirstate_tuple", /* tp_name */ + sizeof(dirstateTupleObject),/* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)dirstate_tuple_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &dirstate_tuple_sq, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "dirstate tuple", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + dirstate_tuple_new, /* tp_new */ +}; + +static PyObject *parse_dirstate(PyObject *self, PyObject *args) +{ + PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; + PyObject *fname = NULL, *cname = NULL, *entry = NULL; + char state, *cur, *str, *cpos; + int mode, size, mtime; + unsigned int flen, len, pos = 40; + int readlen; + + if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", + &PyDict_Type, &dmap, + &PyDict_Type, &cmap, + &str, &readlen)) + goto quit; + + len = readlen; + + /* read parents */ + if (len < 40) { + PyErr_SetString( + PyExc_ValueError, "too little data for parents"); + goto quit; + } + + parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); + if (!parents) + goto quit; + + /* read filenames */ + while (pos >= 40 && pos < len) { + if (pos + 17 > len) { + PyErr_SetString(PyExc_ValueError, + "overflow in dirstate"); + goto quit; + } + cur = str + pos; + /* unpack header */ + state = *cur; + mode = getbe32(cur + 1); + size = getbe32(cur + 5); + mtime = getbe32(cur + 9); + flen = getbe32(cur + 13); + pos += 17; + cur += 17; + if (flen > len - pos) { + PyErr_SetString(PyExc_ValueError, "overflow in dirstate"); + goto quit; + } + + entry = (PyObject *)make_dirstate_tuple(state, mode, size, + mtime); + cpos = memchr(cur, 0, flen); + if (cpos) { + fname = PyBytes_FromStringAndSize(cur, cpos - cur); + cname = PyBytes_FromStringAndSize(cpos + 1, + flen - (cpos - cur) - 1); + if (!fname || !cname || + PyDict_SetItem(cmap, fname, cname) == -1 || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + Py_DECREF(cname); + } else { + fname = PyBytes_FromStringAndSize(cur, flen); + if (!fname || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + } + Py_DECREF(fname); + Py_DECREF(entry); + fname = cname = entry = NULL; + pos += flen; + } + + ret = parents; + Py_INCREF(ret); +quit: + Py_XDECREF(fname); + Py_XDECREF(cname); + Py_XDECREF(entry); + Py_XDECREF(parents); + return ret; +} + +/* + * Build a set of non-normal and other parent entries from the dirstate dmap +*/ +static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) { + PyObject *dmap, *fname, *v; + PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL; + Py_ssize_t pos; + + if (!PyArg_ParseTuple(args, "O!:nonnormalentries", + &PyDict_Type, &dmap)) + goto bail; + + nonnset = PySet_New(NULL); + if (nonnset == NULL) + goto bail; + + otherpset = PySet_New(NULL); + if (otherpset == NULL) + goto bail; + + pos = 0; + while (PyDict_Next(dmap, &pos, &fname, &v)) { + dirstateTupleObject *t; + if (!dirstate_tuple_check(v)) { + PyErr_SetString(PyExc_TypeError, + "expected a dirstate tuple"); + goto bail; + } + t = (dirstateTupleObject *)v; + + if (t->state == 'n' && t->size == -2) { + if (PySet_Add(otherpset, fname) == -1) { + goto bail; + } + } + + if (t->state == 'n' && t->mtime != -1) + continue; + if (PySet_Add(nonnset, fname) == -1) + goto bail; + } + + result = Py_BuildValue("(OO)", nonnset, otherpset); + if (result == NULL) + goto bail; + Py_DECREF(nonnset); + Py_DECREF(otherpset); + return result; +bail: + Py_XDECREF(nonnset); + Py_XDECREF(otherpset); + Py_XDECREF(result); + return NULL; +} + +/* + * Efficiently pack a dirstate object into its on-disk format. + */ +static PyObject *pack_dirstate(PyObject *self, PyObject *args) +{ + PyObject *packobj = NULL; + PyObject *map, *copymap, *pl, *mtime_unset = NULL; + Py_ssize_t nbytes, pos, l; + PyObject *k, *v = NULL, *pn; + char *p, *s; + int now; + + if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", + &PyDict_Type, &map, &PyDict_Type, ©map, + &pl, &now)) + return NULL; + + if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) { + PyErr_SetString(PyExc_TypeError, "expected 2-element sequence"); + return NULL; + } + + /* Figure out how much we need to allocate. */ + for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) { + PyObject *c; + if (!PyBytes_Check(k)) { + PyErr_SetString(PyExc_TypeError, "expected string key"); + goto bail; + } + nbytes += PyBytes_GET_SIZE(k) + 17; + c = PyDict_GetItem(copymap, k); + if (c) { + if (!PyBytes_Check(c)) { + PyErr_SetString(PyExc_TypeError, + "expected string key"); + goto bail; + } + nbytes += PyBytes_GET_SIZE(c) + 1; + } + } + + packobj = PyBytes_FromStringAndSize(NULL, nbytes); + if (packobj == NULL) + goto bail; + + p = PyBytes_AS_STRING(packobj); + + pn = PySequence_ITEM(pl, 0); + if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { + PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); + goto bail; + } + memcpy(p, s, l); + p += 20; + pn = PySequence_ITEM(pl, 1); + if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { + PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); + goto bail; + } + memcpy(p, s, l); + p += 20; + + for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) { + dirstateTupleObject *tuple; + char state; + int mode, size, mtime; + Py_ssize_t len, l; + PyObject *o; + char *t; + + if (!dirstate_tuple_check(v)) { + PyErr_SetString(PyExc_TypeError, + "expected a dirstate tuple"); + goto bail; + } + tuple = (dirstateTupleObject *)v; + + state = tuple->state; + mode = tuple->mode; + size = tuple->size; + mtime = tuple->mtime; + if (state == 'n' && mtime == now) { + /* See pure/parsers.py:pack_dirstate for why we do + * this. */ + mtime = -1; + mtime_unset = (PyObject *)make_dirstate_tuple( + state, mode, size, mtime); + if (!mtime_unset) + goto bail; + if (PyDict_SetItem(map, k, mtime_unset) == -1) + goto bail; + Py_DECREF(mtime_unset); + mtime_unset = NULL; + } + *p++ = state; + putbe32((uint32_t)mode, p); + putbe32((uint32_t)size, p + 4); + putbe32((uint32_t)mtime, p + 8); + t = p + 12; + p += 16; + len = PyBytes_GET_SIZE(k); + memcpy(p, PyBytes_AS_STRING(k), len); + p += len; + o = PyDict_GetItem(copymap, k); + if (o) { + *p++ = '\0'; + l = PyBytes_GET_SIZE(o); + memcpy(p, PyBytes_AS_STRING(o), l); + p += l; + len += l + 1; + } + putbe32((uint32_t)len, t); + } + + pos = p - PyBytes_AS_STRING(packobj); + if (pos != nbytes) { + PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld", + (long)pos, (long)nbytes); + goto bail; + } + + return packobj; +bail: + Py_XDECREF(mtime_unset); + Py_XDECREF(packobj); + Py_XDECREF(v); + return NULL; +} + +/* + * A base-16 trie for fast node->rev mapping. + * + * Positive value is index of the next node in the trie + * Negative value is a leaf: -(rev + 1) + * Zero is empty + */ +typedef struct { + int children[16]; +} nodetree; + +/* + * This class has two behaviors. + * + * When used in a list-like way (with integer keys), we decode an + * entry in a RevlogNG index file on demand. Our last entry is a + * sentinel, always a nullid. We have limited support for + * integer-keyed insert and delete, only at elements right before the + * sentinel. + * + * With string keys, we lazily perform a reverse mapping from node to + * rev, using a base-16 trie. + */ +typedef struct { + PyObject_HEAD + /* Type-specific fields go here. */ + PyObject *data; /* raw bytes of index */ + Py_buffer buf; /* buffer of data */ + PyObject **cache; /* cached tuples */ + const char **offsets; /* populated on demand */ + Py_ssize_t raw_length; /* original number of elements */ + Py_ssize_t length; /* current number of elements */ + PyObject *added; /* populated on demand */ + PyObject *headrevs; /* cache, invalidated on changes */ + PyObject *filteredrevs;/* filtered revs set */ + nodetree *nt; /* base-16 trie */ + unsigned ntlength; /* # nodes in use */ + unsigned ntcapacity; /* # nodes allocated */ + int ntdepth; /* maximum depth of tree */ + int ntsplits; /* # splits performed */ + int ntrev; /* last rev scanned */ + int ntlookups; /* # lookups */ + int ntmisses; /* # lookups that miss the cache */ + int inlined; +} indexObject; + +static Py_ssize_t index_length(const indexObject *self) +{ + if (self->added == NULL) + return self->length; + return self->length + PyList_GET_SIZE(self->added); +} + +static PyObject *nullentry; +static const char nullid[20]; + +static Py_ssize_t inline_scan(indexObject *self, const char **offsets); + +#if LONG_MAX == 0x7fffffffL +static char *tuple_format = "Kiiiiiis#"; +#else +static char *tuple_format = "kiiiiiis#"; +#endif + +/* A RevlogNG v1 index entry is 64 bytes long. */ +static const long v1_hdrsize = 64; + +/* + * Return a pointer to the beginning of a RevlogNG record. + */ +static const char *index_deref(indexObject *self, Py_ssize_t pos) +{ + if (self->inlined && pos > 0) { + if (self->offsets == NULL) { + self->offsets = PyMem_Malloc(self->raw_length * + sizeof(*self->offsets)); + if (self->offsets == NULL) + return (const char *)PyErr_NoMemory(); + inline_scan(self, self->offsets); + } + return self->offsets[pos]; + } + + return (const char *)(self->buf.buf) + pos * v1_hdrsize; +} + +static inline int index_get_parents(indexObject *self, Py_ssize_t rev, + int *ps, int maxrev) +{ + if (rev >= self->length - 1) { + PyObject *tuple = PyList_GET_ITEM(self->added, + rev - self->length + 1); + ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); + ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); + } else { + const char *data = index_deref(self, rev); + ps[0] = getbe32(data + 24); + ps[1] = getbe32(data + 28); + } + /* If index file is corrupted, ps[] may point to invalid revisions. So + * there is a risk of buffer overflow to trust them unconditionally. */ + if (ps[0] > maxrev || ps[1] > maxrev) { + PyErr_SetString(PyExc_ValueError, "parent out of range"); + return -1; + } + return 0; +} + + +/* + * RevlogNG format (all in big endian, data may be inlined): + * 6 bytes: offset + * 2 bytes: flags + * 4 bytes: compressed length + * 4 bytes: uncompressed length + * 4 bytes: base revision + * 4 bytes: link revision + * 4 bytes: parent 1 revision + * 4 bytes: parent 2 revision + * 32 bytes: nodeid (only 20 bytes used) + */ +static PyObject *index_get(indexObject *self, Py_ssize_t pos) +{ + uint64_t offset_flags; + int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + const char *c_node_id; + const char *data; + Py_ssize_t length = index_length(self); + PyObject *entry; + + if (pos < 0) + pos += length; + + if (pos < 0 || pos >= length) { + PyErr_SetString(PyExc_IndexError, "revlog index out of range"); + return NULL; + } + + if (pos == length - 1) { + Py_INCREF(nullentry); + return nullentry; + } + + if (pos >= self->length - 1) { + PyObject *obj; + obj = PyList_GET_ITEM(self->added, pos - self->length + 1); + Py_INCREF(obj); + return obj; + } + + if (self->cache) { + if (self->cache[pos]) { + Py_INCREF(self->cache[pos]); + return self->cache[pos]; + } + } else { + self->cache = calloc(self->raw_length, sizeof(PyObject *)); + if (self->cache == NULL) + return PyErr_NoMemory(); + } + + data = index_deref(self, pos); + if (data == NULL) + return NULL; + + offset_flags = getbe32(data + 4); + if (pos == 0) /* mask out version number for the first entry */ + offset_flags &= 0xFFFF; + else { + uint32_t offset_high = getbe32(data); + offset_flags |= ((uint64_t)offset_high) << 32; + } + + comp_len = getbe32(data + 8); + uncomp_len = getbe32(data + 12); + base_rev = getbe32(data + 16); + link_rev = getbe32(data + 20); + parent_1 = getbe32(data + 24); + parent_2 = getbe32(data + 28); + c_node_id = data + 32; + + entry = Py_BuildValue(tuple_format, offset_flags, comp_len, + uncomp_len, base_rev, link_rev, + parent_1, parent_2, c_node_id, 20); + + if (entry) { + PyObject_GC_UnTrack(entry); + Py_INCREF(entry); + } + + self->cache[pos] = entry; + + return entry; +} + +/* + * Return the 20-byte SHA of the node corresponding to the given rev. + */ +static const char *index_node(indexObject *self, Py_ssize_t pos) +{ + Py_ssize_t length = index_length(self); + const char *data; + + if (pos == length - 1 || pos == INT_MAX) + return nullid; + + if (pos >= length) + return NULL; + + if (pos >= self->length - 1) { + PyObject *tuple, *str; + tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); + str = PyTuple_GetItem(tuple, 7); + return str ? PyBytes_AS_STRING(str) : NULL; + } + + data = index_deref(self, pos); + return data ? data + 32 : NULL; +} + +static int nt_insert(indexObject *self, const char *node, int rev); + +static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) +{ + if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1) + return -1; + if (*nodelen == 20) + return 0; + PyErr_SetString(PyExc_ValueError, "20-byte hash required"); + return -1; +} + +static PyObject *index_insert(indexObject *self, PyObject *args) +{ + PyObject *obj; + char *node; + int index; + Py_ssize_t len, nodelen; + + if (!PyArg_ParseTuple(args, "iO", &index, &obj)) + return NULL; + + if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { + PyErr_SetString(PyExc_TypeError, "8-tuple required"); + return NULL; + } + + if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) + return NULL; + + len = index_length(self); + + if (index < 0) + index += len; + + if (index != len - 1) { + PyErr_SetString(PyExc_IndexError, + "insert only supported at index -1"); + return NULL; + } + + if (self->added == NULL) { + self->added = PyList_New(0); + if (self->added == NULL) + return NULL; + } + + if (PyList_Append(self->added, obj) == -1) + return NULL; + + if (self->nt) + nt_insert(self, node, index); + + Py_CLEAR(self->headrevs); + Py_RETURN_NONE; +} + +static void _index_clearcaches(indexObject *self) +{ + if (self->cache) { + Py_ssize_t i; + + for (i = 0; i < self->raw_length; i++) + Py_CLEAR(self->cache[i]); + free(self->cache); + self->cache = NULL; + } + if (self->offsets) { + PyMem_Free(self->offsets); + self->offsets = NULL; + } + if (self->nt) { + free(self->nt); + self->nt = NULL; + } + Py_CLEAR(self->headrevs); +} + +static PyObject *index_clearcaches(indexObject *self) +{ + _index_clearcaches(self); + self->ntlength = self->ntcapacity = 0; + self->ntdepth = self->ntsplits = 0; + self->ntrev = -1; + self->ntlookups = self->ntmisses = 0; + Py_RETURN_NONE; +} + +static PyObject *index_stats(indexObject *self) +{ + PyObject *obj = PyDict_New(); + PyObject *t = NULL; + + if (obj == NULL) + return NULL; + +#define istat(__n, __d) \ + do { \ + t = PyInt_FromSsize_t(self->__n); \ + if (!t) \ + goto bail; \ + if (PyDict_SetItemString(obj, __d, t) == -1) \ + goto bail; \ + Py_DECREF(t); \ + } while (0) + + if (self->added) { + Py_ssize_t len = PyList_GET_SIZE(self->added); + t = PyInt_FromSsize_t(len); + if (!t) + goto bail; + if (PyDict_SetItemString(obj, "index entries added", t) == -1) + goto bail; + Py_DECREF(t); + } + + if (self->raw_length != self->length - 1) + istat(raw_length, "revs on disk"); + istat(length, "revs in memory"); + istat(ntcapacity, "node trie capacity"); + istat(ntdepth, "node trie depth"); + istat(ntlength, "node trie count"); + istat(ntlookups, "node trie lookups"); + istat(ntmisses, "node trie misses"); + istat(ntrev, "node trie last rev scanned"); + istat(ntsplits, "node trie splits"); + +#undef istat + + return obj; + +bail: + Py_XDECREF(obj); + Py_XDECREF(t); + return NULL; +} + +/* + * When we cache a list, we want to be sure the caller can't mutate + * the cached copy. + */ +static PyObject *list_copy(PyObject *list) +{ + Py_ssize_t len = PyList_GET_SIZE(list); + PyObject *newlist = PyList_New(len); + Py_ssize_t i; + + if (newlist == NULL) + return NULL; + + for (i = 0; i < len; i++) { + PyObject *obj = PyList_GET_ITEM(list, i); + Py_INCREF(obj); + PyList_SET_ITEM(newlist, i, obj); + } + + return newlist; +} + +static int check_filter(PyObject *filter, Py_ssize_t arg) { + if (filter) { + PyObject *arglist, *result; + int isfiltered; + + arglist = Py_BuildValue("(n)", arg); + if (!arglist) { + return -1; + } + + result = PyEval_CallObject(filter, arglist); + Py_DECREF(arglist); + if (!result) { + return -1; + } + + /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error, + * same as this function, so we can just return it directly.*/ + isfiltered = PyObject_IsTrue(result); + Py_DECREF(result); + return isfiltered; + } else { + return 0; + } +} + +static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, + Py_ssize_t marker, char *phases) +{ + PyObject *iter = NULL; + PyObject *iter_item = NULL; + Py_ssize_t min_idx = index_length(self) + 1; + long iter_item_long; + + if (PyList_GET_SIZE(list) != 0) { + iter = PyObject_GetIter(list); + if (iter == NULL) + return -2; + while ((iter_item = PyIter_Next(iter))) + { + iter_item_long = PyInt_AS_LONG(iter_item); + Py_DECREF(iter_item); + if (iter_item_long < min_idx) + min_idx = iter_item_long; + phases[iter_item_long] = marker; + } + Py_DECREF(iter); + } + + return min_idx; +} + +static inline void set_phase_from_parents(char *phases, int parent_1, + int parent_2, Py_ssize_t i) +{ + if (parent_1 >= 0 && phases[parent_1] > phases[i]) + phases[i] = phases[parent_1]; + if (parent_2 >= 0 && phases[parent_2] > phases[i]) + phases[i] = phases[parent_2]; +} + +static PyObject *reachableroots2(indexObject *self, PyObject *args) +{ + + /* Input */ + long minroot; + PyObject *includepatharg = NULL; + int includepath = 0; + /* heads and roots are lists */ + PyObject *heads = NULL; + PyObject *roots = NULL; + PyObject *reachable = NULL; + + PyObject *val; + Py_ssize_t len = index_length(self) - 1; + long revnum; + Py_ssize_t k; + Py_ssize_t i; + Py_ssize_t l; + int r; + int parents[2]; + + /* Internal data structure: + * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit + * revstates: array of length len+1 (all revs + nullrev) */ + int *tovisit = NULL; + long lentovisit = 0; + enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 }; + char *revstates = NULL; + + /* Get arguments */ + if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads, + &PyList_Type, &roots, + &PyBool_Type, &includepatharg)) + goto bail; + + if (includepatharg == Py_True) + includepath = 1; + + /* Initialize return set */ + reachable = PyList_New(0); + if (reachable == NULL) + goto bail; + + /* Initialize internal datastructures */ + tovisit = (int *)malloc((len + 1) * sizeof(int)); + if (tovisit == NULL) { + PyErr_NoMemory(); + goto bail; + } + + revstates = (char *)calloc(len + 1, 1); + if (revstates == NULL) { + PyErr_NoMemory(); + goto bail; + } + + l = PyList_GET_SIZE(roots); + for (i = 0; i < l; i++) { + revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i)); + if (revnum == -1 && PyErr_Occurred()) + goto bail; + /* If root is out of range, e.g. wdir(), it must be unreachable + * from heads. So we can just ignore it. */ + if (revnum + 1 < 0 || revnum + 1 >= len + 1) + continue; + revstates[revnum + 1] |= RS_ROOT; + } + + /* Populate tovisit with all the heads */ + l = PyList_GET_SIZE(heads); + for (i = 0; i < l; i++) { + revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i)); + if (revnum == -1 && PyErr_Occurred()) + goto bail; + if (revnum + 1 < 0 || revnum + 1 >= len + 1) { + PyErr_SetString(PyExc_IndexError, "head out of range"); + goto bail; + } + if (!(revstates[revnum + 1] & RS_SEEN)) { + tovisit[lentovisit++] = (int)revnum; + revstates[revnum + 1] |= RS_SEEN; + } + } + + /* Visit the tovisit list and find the reachable roots */ + k = 0; + while (k < lentovisit) { + /* Add the node to reachable if it is a root*/ + revnum = tovisit[k++]; + if (revstates[revnum + 1] & RS_ROOT) { + revstates[revnum + 1] |= RS_REACHABLE; + val = PyInt_FromLong(revnum); + if (val == NULL) + goto bail; + r = PyList_Append(reachable, val); + Py_DECREF(val); + if (r < 0) + goto bail; + if (includepath == 0) + continue; + } + + /* Add its parents to the list of nodes to visit */ + if (revnum == -1) + continue; + r = index_get_parents(self, revnum, parents, (int)len - 1); + if (r < 0) + goto bail; + for (i = 0; i < 2; i++) { + if (!(revstates[parents[i] + 1] & RS_SEEN) + && parents[i] >= minroot) { + tovisit[lentovisit++] = parents[i]; + revstates[parents[i] + 1] |= RS_SEEN; + } + } + } + + /* Find all the nodes in between the roots we found and the heads + * and add them to the reachable set */ + if (includepath == 1) { + long minidx = minroot; + if (minidx < 0) + minidx = 0; + for (i = minidx; i < len; i++) { + if (!(revstates[i + 1] & RS_SEEN)) + continue; + r = index_get_parents(self, i, parents, (int)len - 1); + /* Corrupted index file, error is set from + * index_get_parents */ + if (r < 0) + goto bail; + if (((revstates[parents[0] + 1] | + revstates[parents[1] + 1]) & RS_REACHABLE) + && !(revstates[i + 1] & RS_REACHABLE)) { + revstates[i + 1] |= RS_REACHABLE; + val = PyInt_FromLong(i); + if (val == NULL) + goto bail; + r = PyList_Append(reachable, val); + Py_DECREF(val); + if (r < 0) + goto bail; + } + } + } + + free(revstates); + free(tovisit); + return reachable; +bail: + Py_XDECREF(reachable); + free(revstates); + free(tovisit); + return NULL; +} + +static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args) +{ + PyObject *roots = Py_None; + PyObject *ret = NULL; + PyObject *phaseslist = NULL; + PyObject *phaseroots = NULL; + PyObject *phaseset = NULL; + PyObject *phasessetlist = NULL; + PyObject *rev = NULL; + Py_ssize_t len = index_length(self) - 1; + Py_ssize_t numphase = 0; + Py_ssize_t minrevallphases = 0; + Py_ssize_t minrevphase = 0; + Py_ssize_t i = 0; + char *phases = NULL; + long phase; + + if (!PyArg_ParseTuple(args, "O", &roots)) + goto done; + if (roots == NULL || !PyList_Check(roots)) + goto done; + + phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */ + if (phases == NULL) { + PyErr_NoMemory(); + goto done; + } + /* Put the phase information of all the roots in phases */ + numphase = PyList_GET_SIZE(roots)+1; + minrevallphases = len + 1; + phasessetlist = PyList_New(numphase); + if (phasessetlist == NULL) + goto done; + + PyList_SET_ITEM(phasessetlist, 0, Py_None); + Py_INCREF(Py_None); + + for (i = 0; i < numphase-1; i++) { + phaseroots = PyList_GET_ITEM(roots, i); + phaseset = PySet_New(NULL); + if (phaseset == NULL) + goto release; + PyList_SET_ITEM(phasessetlist, i+1, phaseset); + if (!PyList_Check(phaseroots)) + goto release; + minrevphase = add_roots_get_min(self, phaseroots, i+1, phases); + if (minrevphase == -2) /* Error from add_roots_get_min */ + goto release; + minrevallphases = MIN(minrevallphases, minrevphase); + } + /* Propagate the phase information from the roots to the revs */ + if (minrevallphases != -1) { + int parents[2]; + for (i = minrevallphases; i < len; i++) { + if (index_get_parents(self, i, parents, + (int)len - 1) < 0) + goto release; + set_phase_from_parents(phases, parents[0], parents[1], i); + } + } + /* Transform phase list to a python list */ + phaseslist = PyList_New(len); + if (phaseslist == NULL) + goto release; + for (i = 0; i < len; i++) { + PyObject *phaseval; + + phase = phases[i]; + /* We only store the sets of phase for non public phase, the public phase + * is computed as a difference */ + if (phase != 0) { + phaseset = PyList_GET_ITEM(phasessetlist, phase); + rev = PyInt_FromLong(i); + if (rev == NULL) + goto release; + PySet_Add(phaseset, rev); + Py_XDECREF(rev); + } + phaseval = PyInt_FromLong(phase); + if (phaseval == NULL) + goto release; + PyList_SET_ITEM(phaseslist, i, phaseval); + } + ret = PyTuple_Pack(2, phaseslist, phasessetlist); + +release: + Py_XDECREF(phaseslist); + Py_XDECREF(phasessetlist); +done: + free(phases); + return ret; +} + +static PyObject *index_headrevs(indexObject *self, PyObject *args) +{ + Py_ssize_t i, j, len; + char *nothead = NULL; + PyObject *heads = NULL; + PyObject *filter = NULL; + PyObject *filteredrevs = Py_None; + + if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) { + return NULL; + } + + if (self->headrevs && filteredrevs == self->filteredrevs) + return list_copy(self->headrevs); + + Py_DECREF(self->filteredrevs); + self->filteredrevs = filteredrevs; + Py_INCREF(filteredrevs); + + if (filteredrevs != Py_None) { + filter = PyObject_GetAttrString(filteredrevs, "__contains__"); + if (!filter) { + PyErr_SetString(PyExc_TypeError, + "filteredrevs has no attribute __contains__"); + goto bail; + } + } + + len = index_length(self) - 1; + heads = PyList_New(0); + if (heads == NULL) + goto bail; + if (len == 0) { + PyObject *nullid = PyInt_FromLong(-1); + if (nullid == NULL || PyList_Append(heads, nullid) == -1) { + Py_XDECREF(nullid); + goto bail; + } + goto done; + } + + nothead = calloc(len, 1); + if (nothead == NULL) { + PyErr_NoMemory(); + goto bail; + } + + for (i = len - 1; i >= 0; i--) { + int isfiltered; + int parents[2]; + + /* If nothead[i] == 1, it means we've seen an unfiltered child of this + * node already, and therefore this node is not filtered. So we can skip + * the expensive check_filter step. + */ + if (nothead[i] != 1) { + isfiltered = check_filter(filter, i); + if (isfiltered == -1) { + PyErr_SetString(PyExc_TypeError, + "unable to check filter"); + goto bail; + } + + if (isfiltered) { + nothead[i] = 1; + continue; + } + } + + if (index_get_parents(self, i, parents, (int)len - 1) < 0) + goto bail; + for (j = 0; j < 2; j++) { + if (parents[j] >= 0) + nothead[parents[j]] = 1; + } + } + + for (i = 0; i < len; i++) { + PyObject *head; + + if (nothead[i]) + continue; + head = PyInt_FromSsize_t(i); + if (head == NULL || PyList_Append(heads, head) == -1) { + Py_XDECREF(head); + goto bail; + } + } + +done: + self->headrevs = heads; + Py_XDECREF(filter); + free(nothead); + return list_copy(self->headrevs); +bail: + Py_XDECREF(filter); + Py_XDECREF(heads); + free(nothead); + return NULL; +} + +static inline int nt_level(const char *node, Py_ssize_t level) +{ + int v = node[level>>1]; + if (!(level & 1)) + v >>= 4; + return v & 0xf; +} + +/* + * Return values: + * + * -4: match is ambiguous (multiple candidates) + * -2: not found + * rest: valid rev + */ +static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen, + int hex) +{ + int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level; + int level, maxlevel, off; + + if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) + return -1; + + if (self->nt == NULL) + return -2; + + if (hex) + maxlevel = nodelen > 40 ? 40 : (int)nodelen; + else + maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); + + for (level = off = 0; level < maxlevel; level++) { + int k = getnybble(node, level); + nodetree *n = &self->nt[off]; + int v = n->children[k]; + + if (v < 0) { + const char *n; + Py_ssize_t i; + + v = -(v + 1); + n = index_node(self, v); + if (n == NULL) + return -2; + for (i = level; i < maxlevel; i++) + if (getnybble(node, i) != nt_level(n, i)) + return -2; + return v; + } + if (v == 0) + return -2; + off = v; + } + /* multiple matches against an ambiguous prefix */ + return -4; +} + +static int nt_new(indexObject *self) +{ + if (self->ntlength == self->ntcapacity) { + if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) { + PyErr_SetString(PyExc_MemoryError, + "overflow in nt_new"); + return -1; + } + self->ntcapacity *= 2; + self->nt = realloc(self->nt, + self->ntcapacity * sizeof(nodetree)); + if (self->nt == NULL) { + PyErr_SetString(PyExc_MemoryError, "out of memory"); + return -1; + } + memset(&self->nt[self->ntlength], 0, + sizeof(nodetree) * (self->ntcapacity - self->ntlength)); + } + return self->ntlength++; +} + +static int nt_insert(indexObject *self, const char *node, int rev) +{ + int level = 0; + int off = 0; + + while (level < 40) { + int k = nt_level(node, level); + nodetree *n; + int v; + + n = &self->nt[off]; + v = n->children[k]; + + if (v == 0) { + n->children[k] = -rev - 1; + return 0; + } + if (v < 0) { + const char *oldnode = index_node(self, -(v + 1)); + int noff; + + if (!oldnode || !memcmp(oldnode, node, 20)) { + n->children[k] = -rev - 1; + return 0; + } + noff = nt_new(self); + if (noff == -1) + return -1; + /* self->nt may have been changed by realloc */ + self->nt[off].children[k] = noff; + off = noff; + n = &self->nt[off]; + n->children[nt_level(oldnode, ++level)] = v; + if (level > self->ntdepth) + self->ntdepth = level; + self->ntsplits += 1; + } else { + level += 1; + off = v; + } + } + + return -1; +} + +static int nt_init(indexObject *self) +{ + if (self->nt == NULL) { + if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) { + PyErr_SetString(PyExc_ValueError, "overflow in nt_init"); + return -1; + } + self->ntcapacity = self->raw_length < 4 + ? 4 : (int)self->raw_length / 2; + + self->nt = calloc(self->ntcapacity, sizeof(nodetree)); + if (self->nt == NULL) { + PyErr_NoMemory(); + return -1; + } + self->ntlength = 1; + self->ntrev = (int)index_length(self) - 1; + self->ntlookups = 1; + self->ntmisses = 0; + if (nt_insert(self, nullid, INT_MAX) == -1) + return -1; + } + return 0; +} + +/* + * Return values: + * + * -3: error (exception set) + * -2: not found (no exception set) + * rest: valid rev + */ +static int index_find_node(indexObject *self, + const char *node, Py_ssize_t nodelen) +{ + int rev; + + self->ntlookups++; + rev = nt_find(self, node, nodelen, 0); + if (rev >= -1) + return rev; + + if (nt_init(self) == -1) + return -3; + + /* + * For the first handful of lookups, we scan the entire index, + * and cache only the matching nodes. This optimizes for cases + * like "hg tip", where only a few nodes are accessed. + * + * After that, we cache every node we visit, using a single + * scan amortized over multiple lookups. This gives the best + * bulk performance, e.g. for "hg log". + */ + if (self->ntmisses++ < 4) { + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { + if (nt_insert(self, n, rev) == -1) + return -3; + break; + } + } + } else { + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) { + self->ntrev = rev + 1; + return -2; + } + if (nt_insert(self, n, rev) == -1) { + self->ntrev = rev + 1; + return -3; + } + if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { + break; + } + } + self->ntrev = rev; + } + + if (rev >= 0) + return rev; + return -2; +} + +static void raise_revlog_error(void) +{ + PyObject *mod = NULL, *dict = NULL, *errclass = NULL; + + mod = PyImport_ImportModule("mercurial.error"); + if (mod == NULL) { + goto cleanup; + } + + dict = PyModule_GetDict(mod); + if (dict == NULL) { + goto cleanup; + } + Py_INCREF(dict); + + errclass = PyDict_GetItemString(dict, "RevlogError"); + if (errclass == NULL) { + PyErr_SetString(PyExc_SystemError, + "could not find RevlogError"); + goto cleanup; + } + + /* value of exception is ignored by callers */ + PyErr_SetString(errclass, "RevlogError"); + +cleanup: + Py_XDECREF(dict); + Py_XDECREF(mod); +} + +static PyObject *index_getitem(indexObject *self, PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + int rev; + + if (PyInt_Check(value)) + return index_get(self, PyInt_AS_LONG(value)); + + if (node_check(value, &node, &nodelen) == -1) + return NULL; + rev = index_find_node(self, node, nodelen); + if (rev >= -1) + return PyInt_FromLong(rev); + if (rev == -2) + raise_revlog_error(); + return NULL; +} + +static int nt_partialmatch(indexObject *self, const char *node, + Py_ssize_t nodelen) +{ + int rev; + + if (nt_init(self) == -1) + return -3; + + if (self->ntrev > 0) { + /* ensure that the radix tree is fully populated */ + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (nt_insert(self, n, rev) == -1) + return -3; + } + self->ntrev = rev; + } + + return nt_find(self, node, nodelen, 1); +} + +static PyObject *index_partialmatch(indexObject *self, PyObject *args) +{ + const char *fullnode; + int nodelen; + char *node; + int rev, i; + + if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) + return NULL; + + if (nodelen < 4) { + PyErr_SetString(PyExc_ValueError, "key too short"); + return NULL; + } + + if (nodelen > 40) { + PyErr_SetString(PyExc_ValueError, "key too long"); + return NULL; + } + + for (i = 0; i < nodelen; i++) + hexdigit(node, i); + if (PyErr_Occurred()) { + /* input contains non-hex characters */ + PyErr_Clear(); + Py_RETURN_NONE; + } + + rev = nt_partialmatch(self, node, nodelen); + + switch (rev) { + case -4: + raise_revlog_error(); + case -3: + return NULL; + case -2: + Py_RETURN_NONE; + case -1: + return PyBytes_FromStringAndSize(nullid, 20); + } + + fullnode = index_node(self, rev); + if (fullnode == NULL) { + PyErr_Format(PyExc_IndexError, + "could not access rev %d", rev); + return NULL; + } + return PyBytes_FromStringAndSize(fullnode, 20); +} + +static PyObject *index_m_get(indexObject *self, PyObject *args) +{ + Py_ssize_t nodelen; + PyObject *val; + char *node; + int rev; + + if (!PyArg_ParseTuple(args, "O", &val)) + return NULL; + if (node_check(val, &node, &nodelen) == -1) + return NULL; + rev = index_find_node(self, node, nodelen); + if (rev == -3) + return NULL; + if (rev == -2) + Py_RETURN_NONE; + return PyInt_FromLong(rev); +} + +static int index_contains(indexObject *self, PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + + if (PyInt_Check(value)) { + long rev = PyInt_AS_LONG(value); + return rev >= -1 && rev < index_length(self); + } + + if (node_check(value, &node, &nodelen) == -1) + return -1; + + switch (index_find_node(self, node, nodelen)) { + case -3: + return -1; + case -2: + return 0; + default: + return 1; + } +} + +typedef uint64_t bitmask; + +/* + * Given a disjoint set of revs, return all candidates for the + * greatest common ancestor. In revset notation, this is the set + * "heads(::a and ::b and ...)" + */ +static PyObject *find_gca_candidates(indexObject *self, const int *revs, + int revcount) +{ + const bitmask allseen = (1ull << revcount) - 1; + const bitmask poison = 1ull << revcount; + PyObject *gca = PyList_New(0); + int i, v, interesting; + int maxrev = -1; + bitmask sp; + bitmask *seen; + + if (gca == NULL) + return PyErr_NoMemory(); + + for (i = 0; i < revcount; i++) { + if (revs[i] > maxrev) + maxrev = revs[i]; + } + + seen = calloc(sizeof(*seen), maxrev + 1); + if (seen == NULL) { + Py_DECREF(gca); + return PyErr_NoMemory(); + } + + for (i = 0; i < revcount; i++) + seen[revs[i]] = 1ull << i; + + interesting = revcount; + + for (v = maxrev; v >= 0 && interesting; v--) { + bitmask sv = seen[v]; + int parents[2]; + + if (!sv) + continue; + + if (sv < poison) { + interesting -= 1; + if (sv == allseen) { + PyObject *obj = PyInt_FromLong(v); + if (obj == NULL) + goto bail; + if (PyList_Append(gca, obj) == -1) { + Py_DECREF(obj); + goto bail; + } + sv |= poison; + for (i = 0; i < revcount; i++) { + if (revs[i] == v) + goto done; + } + } + } + if (index_get_parents(self, v, parents, maxrev) < 0) + goto bail; + + for (i = 0; i < 2; i++) { + int p = parents[i]; + if (p == -1) + continue; + sp = seen[p]; + if (sv < poison) { + if (sp == 0) { + seen[p] = sv; + interesting++; + } + else if (sp != sv) + seen[p] |= sv; + } else { + if (sp && sp < poison) + interesting--; + seen[p] = sv; + } + } + } + +done: + free(seen); + return gca; +bail: + free(seen); + Py_XDECREF(gca); + return NULL; +} + +/* + * Given a disjoint set of revs, return the subset with the longest + * path to the root. + */ +static PyObject *find_deepest(indexObject *self, PyObject *revs) +{ + const Py_ssize_t revcount = PyList_GET_SIZE(revs); + static const Py_ssize_t capacity = 24; + int *depth, *interesting = NULL; + int i, j, v, ninteresting; + PyObject *dict = NULL, *keys = NULL; + long *seen = NULL; + int maxrev = -1; + long final; + + if (revcount > capacity) { + PyErr_Format(PyExc_OverflowError, + "bitset size (%ld) > capacity (%ld)", + (long)revcount, (long)capacity); + return NULL; + } + + for (i = 0; i < revcount; i++) { + int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); + if (n > maxrev) + maxrev = n; + } + + depth = calloc(sizeof(*depth), maxrev + 1); + if (depth == NULL) + return PyErr_NoMemory(); + + seen = calloc(sizeof(*seen), maxrev + 1); + if (seen == NULL) { + PyErr_NoMemory(); + goto bail; + } + + interesting = calloc(sizeof(*interesting), 2 << revcount); + if (interesting == NULL) { + PyErr_NoMemory(); + goto bail; + } + + if (PyList_Sort(revs) == -1) + goto bail; + + for (i = 0; i < revcount; i++) { + int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); + long b = 1l << i; + depth[n] = 1; + seen[n] = b; + interesting[b] = 1; + } + + ninteresting = (int)revcount; + + for (v = maxrev; v >= 0 && ninteresting > 1; v--) { + int dv = depth[v]; + int parents[2]; + long sv; + + if (dv == 0) + continue; + + sv = seen[v]; + if (index_get_parents(self, v, parents, maxrev) < 0) + goto bail; + + for (i = 0; i < 2; i++) { + int p = parents[i]; + long sp; + int dp; + + if (p == -1) + continue; + + dp = depth[p]; + sp = seen[p]; + if (dp <= dv) { + depth[p] = dv + 1; + if (sp != sv) { + interesting[sv] += 1; + seen[p] = sv; + if (sp) { + interesting[sp] -= 1; + if (interesting[sp] == 0) + ninteresting -= 1; + } + } + } + else if (dv == dp - 1) { + long nsp = sp | sv; + if (nsp == sp) + continue; + seen[p] = nsp; + interesting[sp] -= 1; + if (interesting[sp] == 0 && interesting[nsp] > 0) + ninteresting -= 1; + interesting[nsp] += 1; + } + } + interesting[sv] -= 1; + if (interesting[sv] == 0) + ninteresting -= 1; + } + + final = 0; + j = ninteresting; + for (i = 0; i < (int)(2 << revcount) && j > 0; i++) { + if (interesting[i] == 0) + continue; + final |= i; + j -= 1; + } + if (final == 0) { + keys = PyList_New(0); + goto bail; + } + + dict = PyDict_New(); + if (dict == NULL) + goto bail; + + for (i = 0; i < revcount; i++) { + PyObject *key; + + if ((final & (1 << i)) == 0) + continue; + + key = PyList_GET_ITEM(revs, i); + Py_INCREF(key); + Py_INCREF(Py_None); + if (PyDict_SetItem(dict, key, Py_None) == -1) { + Py_DECREF(key); + Py_DECREF(Py_None); + goto bail; + } + } + + keys = PyDict_Keys(dict); + +bail: + free(depth); + free(seen); + free(interesting); + Py_XDECREF(dict); + + return keys; +} + +/* + * Given a (possibly overlapping) set of revs, return all the + * common ancestors heads: heads(::args[0] and ::a[1] and ...) + */ +static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args) +{ + PyObject *ret = NULL; + Py_ssize_t argcount, i, len; + bitmask repeat = 0; + int revcount = 0; + int *revs; + + argcount = PySequence_Length(args); + revs = PyMem_Malloc(argcount * sizeof(*revs)); + if (argcount > 0 && revs == NULL) + return PyErr_NoMemory(); + len = index_length(self) - 1; + + for (i = 0; i < argcount; i++) { + static const int capacity = 24; + PyObject *obj = PySequence_GetItem(args, i); + bitmask x; + long val; + + if (!PyInt_Check(obj)) { + PyErr_SetString(PyExc_TypeError, + "arguments must all be ints"); + Py_DECREF(obj); + goto bail; + } + val = PyInt_AsLong(obj); + Py_DECREF(obj); + if (val == -1) { + ret = PyList_New(0); + goto done; + } + if (val < 0 || val >= len) { + PyErr_SetString(PyExc_IndexError, + "index out of range"); + goto bail; + } + /* this cheesy bloom filter lets us avoid some more + * expensive duplicate checks in the common set-is-disjoint + * case */ + x = 1ull << (val & 0x3f); + if (repeat & x) { + int k; + for (k = 0; k < revcount; k++) { + if (val == revs[k]) + goto duplicate; + } + } + else repeat |= x; + if (revcount >= capacity) { + PyErr_Format(PyExc_OverflowError, + "bitset size (%d) > capacity (%d)", + revcount, capacity); + goto bail; + } + revs[revcount++] = (int)val; + duplicate:; + } + + if (revcount == 0) { + ret = PyList_New(0); + goto done; + } + if (revcount == 1) { + PyObject *obj; + ret = PyList_New(1); + if (ret == NULL) + goto bail; + obj = PyInt_FromLong(revs[0]); + if (obj == NULL) + goto bail; + PyList_SET_ITEM(ret, 0, obj); + goto done; + } + + ret = find_gca_candidates(self, revs, revcount); + if (ret == NULL) + goto bail; + +done: + PyMem_Free(revs); + return ret; + +bail: + PyMem_Free(revs); + Py_XDECREF(ret); + return NULL; +} + +/* + * Given a (possibly overlapping) set of revs, return the greatest + * common ancestors: those with the longest path to the root. + */ +static PyObject *index_ancestors(indexObject *self, PyObject *args) +{ + PyObject *ret; + PyObject *gca = index_commonancestorsheads(self, args); + if (gca == NULL) + return NULL; + + if (PyList_GET_SIZE(gca) <= 1) { + return gca; + } + + ret = find_deepest(self, gca); + Py_DECREF(gca); + return ret; +} + +/* + * Invalidate any trie entries introduced by added revs. + */ +static void nt_invalidate_added(indexObject *self, Py_ssize_t start) +{ + Py_ssize_t i, len = PyList_GET_SIZE(self->added); + + for (i = start; i < len; i++) { + PyObject *tuple = PyList_GET_ITEM(self->added, i); + PyObject *node = PyTuple_GET_ITEM(tuple, 7); + + nt_insert(self, PyBytes_AS_STRING(node), -1); + } + + if (start == 0) + Py_CLEAR(self->added); +} + +/* + * Delete a numeric range of revs, which must be at the end of the + * range, but exclude the sentinel nullid entry. + */ +static int index_slice_del(indexObject *self, PyObject *item) +{ + Py_ssize_t start, stop, step, slicelength; + Py_ssize_t length = index_length(self); + int ret = 0; + +/* Argument changed from PySliceObject* to PyObject* in Python 3. */ +#ifdef IS_PY3K + if (PySlice_GetIndicesEx(item, length, +#else + if (PySlice_GetIndicesEx((PySliceObject*)item, length, +#endif + &start, &stop, &step, &slicelength) < 0) + return -1; + + if (slicelength <= 0) + return 0; + + if ((step < 0 && start < stop) || (step > 0 && start > stop)) + stop = start; + + if (step < 0) { + stop = start + 1; + start = stop + step*(slicelength - 1) - 1; + step = -step; + } + + if (step != 1) { + PyErr_SetString(PyExc_ValueError, + "revlog index delete requires step size of 1"); + return -1; + } + + if (stop != length - 1) { + PyErr_SetString(PyExc_IndexError, + "revlog index deletion indices are invalid"); + return -1; + } + + if (start < self->length - 1) { + if (self->nt) { + Py_ssize_t i; + + for (i = start + 1; i < self->length - 1; i++) { + const char *node = index_node(self, i); + + if (node) + nt_insert(self, node, -1); + } + if (self->added) + nt_invalidate_added(self, 0); + if (self->ntrev > start) + self->ntrev = (int)start; + } + self->length = start + 1; + if (start < self->raw_length) { + if (self->cache) { + Py_ssize_t i; + for (i = start; i < self->raw_length; i++) + Py_CLEAR(self->cache[i]); + } + self->raw_length = start; + } + goto done; + } + + if (self->nt) { + nt_invalidate_added(self, start - self->length + 1); + if (self->ntrev > start) + self->ntrev = (int)start; + } + if (self->added) + ret = PyList_SetSlice(self->added, start - self->length + 1, + PyList_GET_SIZE(self->added), NULL); +done: + Py_CLEAR(self->headrevs); + return ret; +} + +/* + * Supported ops: + * + * slice deletion + * string assignment (extend node->rev mapping) + * string deletion (shrink node->rev mapping) + */ +static int index_assign_subscript(indexObject *self, PyObject *item, + PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + long rev; + + if (PySlice_Check(item) && value == NULL) + return index_slice_del(self, item); + + if (node_check(item, &node, &nodelen) == -1) + return -1; + + if (value == NULL) + return self->nt ? nt_insert(self, node, -1) : 0; + rev = PyInt_AsLong(value); + if (rev > INT_MAX || rev < 0) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, "rev out of range"); + return -1; + } + + if (nt_init(self) == -1) + return -1; + return nt_insert(self, node, (int)rev); +} + +/* + * Find all RevlogNG entries in an index that has inline data. Update + * the optional "offsets" table with those entries. + */ +static Py_ssize_t inline_scan(indexObject *self, const char **offsets) +{ + const char *data = (const char *)self->buf.buf; + Py_ssize_t pos = 0; + Py_ssize_t end = self->buf.len; + long incr = v1_hdrsize; + Py_ssize_t len = 0; + + while (pos + v1_hdrsize <= end && pos >= 0) { + uint32_t comp_len; + /* 3rd element of header is length of compressed inline data */ + comp_len = getbe32(data + pos + 8); + incr = v1_hdrsize + comp_len; + if (offsets) + offsets[len] = data + pos; + len++; + pos += incr; + } + + if (pos != end) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, "corrupt index file"); + return -1; + } + + return len; +} + +static int index_init(indexObject *self, PyObject *args) +{ + PyObject *data_obj, *inlined_obj; + Py_ssize_t size; + + /* Initialize before argument-checking to avoid index_dealloc() crash. */ + self->raw_length = 0; + self->added = NULL; + self->cache = NULL; + self->data = NULL; + memset(&self->buf, 0, sizeof(self->buf)); + self->headrevs = NULL; + self->filteredrevs = Py_None; + Py_INCREF(Py_None); + self->nt = NULL; + self->offsets = NULL; + + if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) + return -1; + if (!PyObject_CheckBuffer(data_obj)) { + PyErr_SetString(PyExc_TypeError, + "data does not support buffer interface"); + return -1; + } + + if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1) + return -1; + size = self->buf.len; + + self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); + self->data = data_obj; + + self->ntlength = self->ntcapacity = 0; + self->ntdepth = self->ntsplits = 0; + self->ntlookups = self->ntmisses = 0; + self->ntrev = -1; + Py_INCREF(self->data); + + if (self->inlined) { + Py_ssize_t len = inline_scan(self, NULL); + if (len == -1) + goto bail; + self->raw_length = len; + self->length = len + 1; + } else { + if (size % v1_hdrsize) { + PyErr_SetString(PyExc_ValueError, "corrupt index file"); + goto bail; + } + self->raw_length = size / v1_hdrsize; + self->length = self->raw_length + 1; + } + + return 0; +bail: + return -1; +} + +static PyObject *index_nodemap(indexObject *self) +{ + Py_INCREF(self); + return (PyObject *)self; +} + +static void index_dealloc(indexObject *self) +{ + _index_clearcaches(self); + Py_XDECREF(self->filteredrevs); + if (self->buf.buf) { + PyBuffer_Release(&self->buf); + memset(&self->buf, 0, sizeof(self->buf)); + } + Py_XDECREF(self->data); + Py_XDECREF(self->added); + PyObject_Del(self); +} + +static PySequenceMethods index_sequence_methods = { + (lenfunc)index_length, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + (ssizeargfunc)index_get, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)index_contains, /* sq_contains */ +}; + +static PyMappingMethods index_mapping_methods = { + (lenfunc)index_length, /* mp_length */ + (binaryfunc)index_getitem, /* mp_subscript */ + (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ +}; + +static PyMethodDef index_methods[] = { + {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS, + "return the gca set of the given revs"}, + {"commonancestorsheads", (PyCFunction)index_commonancestorsheads, + METH_VARARGS, + "return the heads of the common ancestors of the given revs"}, + {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, + "clear the index caches"}, + {"get", (PyCFunction)index_m_get, METH_VARARGS, + "get an index entry"}, + {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, + METH_VARARGS, "compute phases"}, + {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS, + "reachableroots"}, + {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS, + "get head revisions"}, /* Can do filtering since 3.2 */ + {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, + "get filtered head revisions"}, /* Can always do filtering */ + {"insert", (PyCFunction)index_insert, METH_VARARGS, + "insert an index entry"}, + {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, + "match a potentially ambiguous node ID"}, + {"stats", (PyCFunction)index_stats, METH_NOARGS, + "stats for the index"}, + {NULL} /* Sentinel */ +}; + +static PyGetSetDef index_getset[] = { + {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject indexType = { + PyVarObject_HEAD_INIT(NULL, 0) + "parsers.index", /* tp_name */ + sizeof(indexObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)index_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &index_sequence_methods, /* tp_as_sequence */ + &index_mapping_methods, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "revlog index", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + index_methods, /* tp_methods */ + 0, /* tp_members */ + index_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)index_init, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* + * returns a tuple of the form (index, index, cache) with elements as + * follows: + * + * index: an index object that lazily parses RevlogNG records + * cache: if data is inlined, a tuple (0, index_file_content), else None + * index_file_content could be a string, or a buffer + * + * added complications are for backwards compatibility + */ +static PyObject *parse_index2(PyObject *self, PyObject *args) +{ + PyObject *tuple = NULL, *cache = NULL; + indexObject *idx; + int ret; + + idx = PyObject_New(indexObject, &indexType); + if (idx == NULL) + goto bail; + + ret = index_init(idx, args); + if (ret == -1) + goto bail; + + if (idx->inlined) { + cache = Py_BuildValue("iO", 0, idx->data); + if (cache == NULL) + goto bail; + } else { + cache = Py_None; + Py_INCREF(cache); + } + + tuple = Py_BuildValue("NN", idx, cache); + if (!tuple) + goto bail; + return tuple; + +bail: + Py_XDECREF(idx); + Py_XDECREF(cache); + Py_XDECREF(tuple); + return NULL; +} + +#define BUMPED_FIX 1 +#define USING_SHA_256 2 +#define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1) + +static PyObject *readshas( + const char *source, unsigned char num, Py_ssize_t hashwidth) +{ + int i; + PyObject *list = PyTuple_New(num); + if (list == NULL) { + return NULL; + } + for (i = 0; i < num; i++) { + PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth); + if (hash == NULL) { + Py_DECREF(list); + return NULL; + } + PyTuple_SET_ITEM(list, i, hash); + source += hashwidth; + } + return list; +} + +static PyObject *fm1readmarker(const char *databegin, const char *dataend, + uint32_t *msize) +{ + const char *data = databegin; + const char *meta; + + double mtime; + int16_t tz; + uint16_t flags; + unsigned char nsuccs, nparents, nmetadata; + Py_ssize_t hashwidth = 20; + + PyObject *prec = NULL, *parents = NULL, *succs = NULL; + PyObject *metadata = NULL, *ret = NULL; + int i; + + if (data + FM1_HEADER_SIZE > dataend) { + goto overflow; + } + + *msize = getbe32(data); + data += 4; + mtime = getbefloat64(data); + data += 8; + tz = getbeint16(data); + data += 2; + flags = getbeuint16(data); + data += 2; + + if (flags & USING_SHA_256) { + hashwidth = 32; + } + + nsuccs = (unsigned char)(*data++); + nparents = (unsigned char)(*data++); + nmetadata = (unsigned char)(*data++); + + if (databegin + *msize > dataend) { + goto overflow; + } + dataend = databegin + *msize; /* narrow down to marker size */ + + if (data + hashwidth > dataend) { + goto overflow; + } + prec = PyBytes_FromStringAndSize(data, hashwidth); + data += hashwidth; + if (prec == NULL) { + goto bail; + } + + if (data + nsuccs * hashwidth > dataend) { + goto overflow; + } + succs = readshas(data, nsuccs, hashwidth); + if (succs == NULL) { + goto bail; + } + data += nsuccs * hashwidth; + + if (nparents == 1 || nparents == 2) { + if (data + nparents * hashwidth > dataend) { + goto overflow; + } + parents = readshas(data, nparents, hashwidth); + if (parents == NULL) { + goto bail; + } + data += nparents * hashwidth; + } else { + parents = Py_None; + Py_INCREF(parents); + } + + if (data + 2 * nmetadata > dataend) { + goto overflow; + } + meta = data + (2 * nmetadata); + metadata = PyTuple_New(nmetadata); + if (metadata == NULL) { + goto bail; + } + for (i = 0; i < nmetadata; i++) { + PyObject *tmp, *left = NULL, *right = NULL; + Py_ssize_t leftsize = (unsigned char)(*data++); + Py_ssize_t rightsize = (unsigned char)(*data++); + if (meta + leftsize + rightsize > dataend) { + goto overflow; + } + left = PyBytes_FromStringAndSize(meta, leftsize); + meta += leftsize; + right = PyBytes_FromStringAndSize(meta, rightsize); + meta += rightsize; + tmp = PyTuple_New(2); + if (!left || !right || !tmp) { + Py_XDECREF(left); + Py_XDECREF(right); + Py_XDECREF(tmp); + goto bail; + } + PyTuple_SET_ITEM(tmp, 0, left); + PyTuple_SET_ITEM(tmp, 1, right); + PyTuple_SET_ITEM(metadata, i, tmp); + } + ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, + metadata, mtime, (int)tz * 60, parents); + goto bail; /* return successfully */ + +overflow: + PyErr_SetString(PyExc_ValueError, "overflow in obsstore"); +bail: + Py_XDECREF(prec); + Py_XDECREF(succs); + Py_XDECREF(metadata); + Py_XDECREF(parents); + return ret; +} + + +static PyObject *fm1readmarkers(PyObject *self, PyObject *args) { + const char *data, *dataend; + int datalen; + Py_ssize_t offset, stop; + PyObject *markers = NULL; + + if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) { + return NULL; + } + dataend = data + datalen; + data += offset; + markers = PyList_New(0); + if (!markers) { + return NULL; + } + while (offset < stop) { + uint32_t msize; + int error; + PyObject *record = fm1readmarker(data, dataend, &msize); + if (!record) { + goto bail; + } + error = PyList_Append(markers, record); + Py_DECREF(record); + if (error) { + goto bail; + } + data += msize; + offset += msize; + } + return markers; +bail: + Py_DECREF(markers); + return NULL; +} + +static char parsers_doc[] = "Efficient content parsing."; + +PyObject *encodedir(PyObject *self, PyObject *args); +PyObject *pathencode(PyObject *self, PyObject *args); +PyObject *lowerencode(PyObject *self, PyObject *args); + +static PyMethodDef methods[] = { + {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"}, + {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS, + "create a set containing non-normal and other parent entries of given " + "dirstate\n"}, + {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, + {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, + {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"}, + {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"}, + {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"}, + {"dict_new_presized", dict_new_presized, METH_VARARGS, + "construct a dict with an expected size\n"}, + {"make_file_foldmap", make_file_foldmap, METH_VARARGS, + "make file foldmap\n"}, + {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"}, + {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"}, + {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"}, + {"fm1readmarkers", fm1readmarkers, METH_VARARGS, + "parse v1 obsolete markers\n"}, + {NULL, NULL} +}; + +void dirs_module_init(PyObject *mod); +void manifest_module_init(PyObject *mod); + +static const int version = 1; + +static void module_init(PyObject *mod) +{ + PyModule_AddIntConstant(mod, "version", version); + + /* This module constant has two purposes. First, it lets us unit test + * the ImportError raised without hard-coding any error text. This + * means we can change the text in the future without breaking tests, + * even across changesets without a recompile. Second, its presence + * can be used to determine whether the version-checking logic is + * present, which also helps in testing across changesets without a + * recompile. Note that this means the pure-Python version of parsers + * should not have this module constant. */ + PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext); + + dirs_module_init(mod); + manifest_module_init(mod); + + indexType.tp_new = PyType_GenericNew; + if (PyType_Ready(&indexType) < 0 || + PyType_Ready(&dirstateTupleType) < 0) + return; + Py_INCREF(&indexType); + PyModule_AddObject(mod, "index", (PyObject *)&indexType); + Py_INCREF(&dirstateTupleType); + PyModule_AddObject(mod, "dirstatetuple", + (PyObject *)&dirstateTupleType); + + nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0, + -1, -1, -1, -1, nullid, 20); + if (nullentry) + PyObject_GC_UnTrack(nullentry); +} + +static int check_python_version(void) +{ + PyObject *sys = PyImport_ImportModule("sys"), *ver; + long hexversion; + if (!sys) + return -1; + ver = PyObject_GetAttrString(sys, "hexversion"); + Py_DECREF(sys); + if (!ver) + return -1; + hexversion = PyInt_AsLong(ver); + Py_DECREF(ver); + /* sys.hexversion is a 32-bit number by default, so the -1 case + * should only occur in unusual circumstances (e.g. if sys.hexversion + * is manually set to an invalid value). */ + if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) { + PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension " + "modules were compiled with Python " PY_VERSION ", but " + "Mercurial is currently using Python with sys.hexversion=%ld: " + "Python %s\n at: %s", versionerrortext, hexversion, + Py_GetVersion(), Py_GetProgramFullPath()); + return -1; + } + return 0; +} + +#ifdef IS_PY3K +static struct PyModuleDef parsers_module = { + PyModuleDef_HEAD_INIT, + "parsers", + parsers_doc, + -1, + methods +}; + +PyMODINIT_FUNC PyInit_parsers(void) +{ + PyObject *mod; + + if (check_python_version() == -1) + return NULL; + mod = PyModule_Create(&parsers_module); + module_init(mod); + return mod; +} +#else +PyMODINIT_FUNC initparsers(void) +{ + PyObject *mod; + + if (check_python_version() == -1) + return; + mod = Py_InitModule3("parsers", methods, parsers_doc); + module_init(mod); +} +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/cext/pathencode.c Sat Aug 13 12:23:56 2016 +0900 @@ -0,0 +1,765 @@ +/* + pathencode.c - efficient path name encoding + + Copyright 2012 Facebook + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +/* + * An implementation of the name encoding scheme used by the fncache + * store. The common case is of a path < 120 bytes long, which is + * handled either in a single pass with no allocations or two passes + * with a single allocation. For longer paths, multiple passes are + * required. + */ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "util.h" + +/* state machine for the fast path */ +enum path_state { + START, /* first byte of a path component */ + A, /* "AUX" */ + AU, + THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */ + C, /* "CON" or "COMn" */ + CO, + COMLPT, /* "COM" or "LPT" */ + COMLPTn, + L, + LP, + N, + NU, + P, /* "PRN" */ + PR, + LDOT, /* leading '.' */ + DOT, /* '.' in a non-leading position */ + H, /* ".h" */ + HGDI, /* ".hg", ".d", or ".i" */ + SPACE, + DEFAULT /* byte of a path component after the first */ +}; + +/* state machine for dir-encoding */ +enum dir_state { + DDOT, + DH, + DHGDI, + DDEFAULT +}; + +static inline int inset(const uint32_t bitset[], char c) +{ + return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31)); +} + +static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize, + char c) +{ + if (dest) { + assert(*destlen < destsize); + dest[*destlen] = c; + } + (*destlen)++; +} + +static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize, + const void *src, Py_ssize_t len) +{ + if (dest) { + assert(*destlen + len < destsize); + memcpy((void *)&dest[*destlen], src, len); + } + *destlen += len; +} + +static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize, + uint8_t c) +{ + static const char hexdigit[] = "0123456789abcdef"; + + charcopy(dest, destlen, destsize, hexdigit[c >> 4]); + charcopy(dest, destlen, destsize, hexdigit[c & 15]); +} + +/* 3-byte escape: tilde followed by two hex digits */ +static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize, + char c) +{ + charcopy(dest, destlen, destsize, '~'); + hexencode(dest, destlen, destsize, c); +} + +static Py_ssize_t _encodedir(char *dest, size_t destsize, + const char *src, Py_ssize_t len) +{ + enum dir_state state = DDEFAULT; + Py_ssize_t i = 0, destlen = 0; + + while (i < len) { + switch (state) { + case DDOT: + switch (src[i]) { + case 'd': + case 'i': + state = DHGDI; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'h': + state = DH; + charcopy(dest, &destlen, destsize, src[i++]); + break; + default: + state = DDEFAULT; + break; + } + break; + case DH: + if (src[i] == 'g') { + state = DHGDI; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DDEFAULT; + break; + case DHGDI: + if (src[i] == '/') { + memcopy(dest, &destlen, destsize, ".hg", 3); + charcopy(dest, &destlen, destsize, src[i++]); + } + state = DDEFAULT; + break; + case DDEFAULT: + if (src[i] == '.') + state = DDOT; + charcopy(dest, &destlen, destsize, src[i++]); + break; + } + } + + return destlen; +} + +PyObject *encodedir(PyObject *self, PyObject *args) +{ + Py_ssize_t len, newlen; + PyObject *pathobj, *newobj; + char *path; + + if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) + return NULL; + + if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { + PyErr_SetString(PyExc_TypeError, "expected a string"); + return NULL; + } + + newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1; + + if (newlen == len + 1) { + Py_INCREF(pathobj); + return pathobj; + } + + newobj = PyBytes_FromStringAndSize(NULL, newlen); + + if (newobj) { + assert(PyBytes_Check(newobj)); + Py_SIZE(newobj)--; + _encodedir(PyBytes_AS_STRING(newobj), newlen, path, + len + 1); + } + + return newobj; +} + +static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8], + char *dest, Py_ssize_t destlen, size_t destsize, + const char *src, Py_ssize_t len, + int encodedir) +{ + enum path_state state = START; + Py_ssize_t i = 0; + + /* + * Python strings end with a zero byte, which we use as a + * terminal token as they are not valid inside path names. + */ + + while (i < len) { + switch (state) { + case START: + switch (src[i]) { + case '/': + charcopy(dest, &destlen, destsize, src[i++]); + break; + case '.': + state = LDOT; + escape3(dest, &destlen, destsize, src[i++]); + break; + case ' ': + state = DEFAULT; + escape3(dest, &destlen, destsize, src[i++]); + break; + case 'a': + state = A; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'c': + state = C; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'l': + state = L; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'n': + state = N; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'p': + state = P; + charcopy(dest, &destlen, destsize, src[i++]); + break; + default: + state = DEFAULT; + break; + } + break; + case A: + if (src[i] == 'u') { + state = AU; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case AU: + if (src[i] == 'x') { + state = THIRD; + i++; + } + else state = DEFAULT; + break; + case THIRD: + state = DEFAULT; + switch (src[i]) { + case '.': + case '/': + case '\0': + escape3(dest, &destlen, destsize, src[i - 1]); + break; + default: + i--; + break; + } + break; + case C: + if (src[i] == 'o') { + state = CO; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case CO: + if (src[i] == 'm') { + state = COMLPT; + i++; + } + else if (src[i] == 'n') { + state = THIRD; + i++; + } + else state = DEFAULT; + break; + case COMLPT: + switch (src[i]) { + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + state = COMLPTn; + i++; + break; + default: + state = DEFAULT; + charcopy(dest, &destlen, destsize, src[i - 1]); + break; + } + break; + case COMLPTn: + state = DEFAULT; + switch (src[i]) { + case '.': + case '/': + case '\0': + escape3(dest, &destlen, destsize, src[i - 2]); + charcopy(dest, &destlen, destsize, src[i - 1]); + break; + default: + memcopy(dest, &destlen, destsize, + &src[i - 2], 2); + break; + } + break; + case L: + if (src[i] == 'p') { + state = LP; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case LP: + if (src[i] == 't') { + state = COMLPT; + i++; + } + else state = DEFAULT; + break; + case N: + if (src[i] == 'u') { + state = NU; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case NU: + if (src[i] == 'l') { + state = THIRD; + i++; + } + else state = DEFAULT; + break; + case P: + if (src[i] == 'r') { + state = PR; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case PR: + if (src[i] == 'n') { + state = THIRD; + i++; + } + else state = DEFAULT; + break; + case LDOT: + switch (src[i]) { + case 'd': + case 'i': + state = HGDI; + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'h': + state = H; + charcopy(dest, &destlen, destsize, src[i++]); + break; + default: + state = DEFAULT; + break; + } + break; + case DOT: + switch (src[i]) { + case '/': + case '\0': + state = START; + memcopy(dest, &destlen, destsize, "~2e", 3); + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'd': + case 'i': + state = HGDI; + charcopy(dest, &destlen, destsize, '.'); + charcopy(dest, &destlen, destsize, src[i++]); + break; + case 'h': + state = H; + memcopy(dest, &destlen, destsize, ".h", 2); + i++; + break; + default: + state = DEFAULT; + charcopy(dest, &destlen, destsize, '.'); + break; + } + break; + case H: + if (src[i] == 'g') { + state = HGDI; + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case HGDI: + if (src[i] == '/') { + state = START; + if (encodedir) + memcopy(dest, &destlen, destsize, ".hg", + 3); + charcopy(dest, &destlen, destsize, src[i++]); + } + else state = DEFAULT; + break; + case SPACE: + switch (src[i]) { + case '/': + case '\0': + state = START; + memcopy(dest, &destlen, destsize, "~20", 3); + charcopy(dest, &destlen, destsize, src[i++]); + break; + default: + state = DEFAULT; + charcopy(dest, &destlen, destsize, ' '); + break; + } + break; + case DEFAULT: + while (inset(onebyte, src[i])) { + charcopy(dest, &destlen, destsize, src[i++]); + if (i == len) + goto done; + } + switch (src[i]) { + case '.': + state = DOT; + i++; + break; + case ' ': + state = SPACE; + i++; + break; + case '/': + state = START; + charcopy(dest, &destlen, destsize, '/'); + i++; + break; + default: + if (inset(onebyte, src[i])) { + do { + charcopy(dest, &destlen, + destsize, src[i++]); + } while (i < len && + inset(onebyte, src[i])); + } + else if (inset(twobytes, src[i])) { + char c = src[i++]; + charcopy(dest, &destlen, destsize, '_'); + charcopy(dest, &destlen, destsize, + c == '_' ? '_' : c + 32); + } + else + escape3(dest, &destlen, destsize, + src[i++]); + break; + } + break; + } + } +done: + return destlen; +} + +static Py_ssize_t basicencode(char *dest, size_t destsize, + const char *src, Py_ssize_t len) +{ + static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe }; + + static const uint32_t onebyte[8] = { + 1, 0x2bff3bfa, 0x68000001, 0x2fffffff, + }; + + Py_ssize_t destlen = 0; + + return _encode(twobytes, onebyte, dest, destlen, destsize, + src, len, 1); +} + +static const Py_ssize_t maxstorepathlen = 120; + +static Py_ssize_t _lowerencode(char *dest, size_t destsize, + const char *src, Py_ssize_t len) +{ + static const uint32_t onebyte[8] = { + 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff + }; + + static const uint32_t lower[8] = { 0, 0, 0x7fffffe }; + + Py_ssize_t i, destlen = 0; + + for (i = 0; i < len; i++) { + if (inset(onebyte, src[i])) + charcopy(dest, &destlen, destsize, src[i]); + else if (inset(lower, src[i])) + charcopy(dest, &destlen, destsize, src[i] + 32); + else + escape3(dest, &destlen, destsize, src[i]); + } + + return destlen; +} + +PyObject *lowerencode(PyObject *self, PyObject *args) +{ + char *path; + Py_ssize_t len, newlen; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len)) + return NULL; + + newlen = _lowerencode(NULL, 0, path, len); + ret = PyBytes_FromStringAndSize(NULL, newlen); + if (ret) + _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len); + + return ret; +} + +/* See store.py:_auxencode for a description. */ +static Py_ssize_t auxencode(char *dest, size_t destsize, + const char *src, Py_ssize_t len) +{ + static const uint32_t twobytes[8]; + + static const uint32_t onebyte[8] = { + ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, + }; + + return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0); +} + +static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20]) +{ + static const Py_ssize_t dirprefixlen = 8; + static const Py_ssize_t maxshortdirslen = 68; + char *dest; + PyObject *ret; + + Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1; + Py_ssize_t destsize, destlen = 0, slop, used; + + while (lastslash >= 0 && src[lastslash] != '/') { + if (src[lastslash] == '.' && lastdot == -1) + lastdot = lastslash; + lastslash--; + } + +#if 0 + /* All paths should end in a suffix of ".i" or ".d". + Unfortunately, the file names in test-hybridencode.py + violate this rule. */ + if (lastdot != len - 3) { + PyErr_SetString(PyExc_ValueError, + "suffix missing or wrong length"); + return NULL; + } +#endif + + /* If src contains a suffix, we will append it to the end of + the new string, so make room. */ + destsize = 120; + if (lastdot >= 0) + destsize += len - lastdot - 1; + + ret = PyBytes_FromStringAndSize(NULL, destsize); + if (ret == NULL) + return NULL; + + dest = PyBytes_AS_STRING(ret); + memcopy(dest, &destlen, destsize, "dh/", 3); + + /* Copy up to dirprefixlen bytes of each path component, up to + a limit of maxshortdirslen bytes. */ + for (i = d = p = 0; i < lastslash; i++, p++) { + if (src[i] == '/') { + char d = dest[destlen - 1]; + /* After truncation, a directory name may end + in a space or dot, which are unportable. */ + if (d == '.' || d == ' ') + dest[destlen - 1] = '_'; + /* The + 3 is to account for "dh/" in the beginning */ + if (destlen > maxshortdirslen + 3) + break; + charcopy(dest, &destlen, destsize, src[i]); + p = -1; + } + else if (p < dirprefixlen) + charcopy(dest, &destlen, destsize, src[i]); + } + + /* Rewind to just before the last slash copied. */ + if (destlen > maxshortdirslen + 3) + do { + destlen--; + } while (destlen > 0 && dest[destlen] != '/'); + + if (destlen > 3) { + if (lastslash > 0) { + char d = dest[destlen - 1]; + /* The last directory component may be + truncated, so make it safe. */ + if (d == '.' || d == ' ') + dest[destlen - 1] = '_'; + } + + charcopy(dest, &destlen, destsize, '/'); + } + + /* Add a prefix of the original file's name. Its length + depends on the number of bytes left after accounting for + hash and suffix. */ + used = destlen + 40; + if (lastdot >= 0) + used += len - lastdot - 1; + slop = maxstorepathlen - used; + if (slop > 0) { + Py_ssize_t basenamelen = + lastslash >= 0 ? len - lastslash - 2 : len - 1; + + if (basenamelen > slop) + basenamelen = slop; + if (basenamelen > 0) + memcopy(dest, &destlen, destsize, &src[lastslash + 1], + basenamelen); + } + + /* Add hash and suffix. */ + for (i = 0; i < 20; i++) + hexencode(dest, &destlen, destsize, sha[i]); + + if (lastdot >= 0) + memcopy(dest, &destlen, destsize, &src[lastdot], + len - lastdot - 1); + + assert(PyBytes_Check(ret)); + Py_SIZE(ret) = destlen; + + return ret; +} + +/* + * Avoiding a trip through Python would improve performance by 50%, + * but we don't encounter enough long names to be worth the code. + */ +static int sha1hash(char hash[20], const char *str, Py_ssize_t len) +{ + static PyObject *shafunc; + PyObject *shaobj, *hashobj; + + if (shafunc == NULL) { + PyObject *hashlib, *name = PyBytes_FromString("hashlib"); + + if (name == NULL) + return -1; + + hashlib = PyImport_Import(name); + Py_DECREF(name); + + if (hashlib == NULL) { + PyErr_SetString(PyExc_ImportError, "hashlib"); + return -1; + } + shafunc = PyObject_GetAttrString(hashlib, "sha1"); + Py_DECREF(hashlib); + + if (shafunc == NULL) { + PyErr_SetString(PyExc_AttributeError, + "module 'hashlib' has no " + "attribute 'sha1'"); + return -1; + } + } + + shaobj = PyObject_CallFunction(shafunc, "s#", str, len); + + if (shaobj == NULL) + return -1; + + hashobj = PyObject_CallMethod(shaobj, "digest", ""); + Py_DECREF(shaobj); + if (hashobj == NULL) + return -1; + + if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) { + PyErr_SetString(PyExc_TypeError, + "result of digest is not a 20-byte hash"); + Py_DECREF(hashobj); + return -1; + } + + memcpy(hash, PyBytes_AS_STRING(hashobj), 20); + Py_DECREF(hashobj); + return 0; +} + +#define MAXENCODE 4096 * 4 + +static PyObject *hashencode(const char *src, Py_ssize_t len) +{ + char dired[MAXENCODE]; + char lowered[MAXENCODE]; + char auxed[MAXENCODE]; + Py_ssize_t dirlen, lowerlen, auxlen, baselen; + char sha[20]; + + baselen = (len - 5) * 3; + if (baselen >= MAXENCODE) { + PyErr_SetString(PyExc_ValueError, "string too long"); + return NULL; + } + + dirlen = _encodedir(dired, baselen, src, len); + if (sha1hash(sha, dired, dirlen - 1) == -1) + return NULL; + lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5); + auxlen = auxencode(auxed, baselen, lowered, lowerlen); + return hashmangle(auxed, auxlen, sha); +} + +PyObject *pathencode(PyObject *self, PyObject *args) +{ + Py_ssize_t len, newlen; + PyObject *pathobj, *newobj; + char *path; + + if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) + return NULL; + + if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { + PyErr_SetString(PyExc_TypeError, "expected a string"); + return NULL; + } + + if (len > maxstorepathlen) + newlen = maxstorepathlen + 2; + else + newlen = len ? basicencode(NULL, 0, path, len + 1) : 1; + + if (newlen <= maxstorepathlen + 1) { + if (newlen == len + 1) { + Py_INCREF(pathobj); + return pathobj; + } + + newobj = PyBytes_FromStringAndSize(NULL, newlen); + + if (newobj) { + assert(PyBytes_Check(newobj)); + Py_SIZE(newobj)--; + basicencode(PyBytes_AS_STRING(newobj), newlen, path, + len + 1); + } + } + else + newobj = hashencode(path, len + 1); + + return newobj; +}
--- a/mercurial/dirs.c Sat Aug 13 12:18:58 2016 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,315 +0,0 @@ -/* - dirs.c - dynamic directory diddling for dirstates - - Copyright 2013 Facebook - - This software may be used and distributed according to the terms of - the GNU General Public License, incorporated herein by reference. -*/ - -#define PY_SSIZE_T_CLEAN -#include <Python.h> -#include "util.h" - -#ifdef IS_PY3K -#define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1] -#else -#define PYLONG_VALUE(o) PyInt_AS_LONG(o) -#endif - -/* - * This is a multiset of directory names, built from the files that - * appear in a dirstate or manifest. - * - * A few implementation notes: - * - * We modify Python integers for refcounting, but those integers are - * never visible to Python code. - * - * We mutate strings in-place, but leave them immutable once they can - * be seen by Python code. - */ -typedef struct { - PyObject_HEAD - PyObject *dict; -} dirsObject; - -static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos) -{ - while (pos != -1) { - if (path[pos] == '/') - break; - pos -= 1; - } - - return pos; -} - -static int _addpath(PyObject *dirs, PyObject *path) -{ - const char *cpath = PyBytes_AS_STRING(path); - Py_ssize_t pos = PyBytes_GET_SIZE(path); - PyObject *key = NULL; - int ret = -1; - - /* This loop is super critical for performance. That's why we inline - * access to Python structs instead of going through a supported API. - * The implementation, therefore, is heavily dependent on CPython - * implementation details. We also commit violations of the Python - * "protocol" such as mutating immutable objects. But since we only - * mutate objects created in this function or in other well-defined - * locations, the references are known so these violations should go - * unnoticed. The code for adjusting the length of a PyBytesObject is - * essentially a minimal version of _PyBytes_Resize. */ - while ((pos = _finddir(cpath, pos - 1)) != -1) { - PyObject *val; - - /* It's likely that every prefix already has an entry - in our dict. Try to avoid allocating and - deallocating a string for each prefix we check. */ - if (key != NULL) - ((PyBytesObject *)key)->ob_shash = -1; - else { - /* Force Python to not reuse a small shared string. */ - key = PyBytes_FromStringAndSize(cpath, - pos < 2 ? 2 : pos); - if (key == NULL) - goto bail; - } - /* Py_SIZE(o) refers to the ob_size member of the struct. Yes, - * assigning to what looks like a function seems wrong. */ - Py_SIZE(key) = pos; - ((PyBytesObject *)key)->ob_sval[pos] = '\0'; - - val = PyDict_GetItem(dirs, key); - if (val != NULL) { - PYLONG_VALUE(val) += 1; - break; - } - - /* Force Python to not reuse a small shared int. */ -#ifdef IS_PY3K - val = PyLong_FromLong(0x1eadbeef); -#else - val = PyInt_FromLong(0x1eadbeef); -#endif - - if (val == NULL) - goto bail; - - PYLONG_VALUE(val) = 1; - ret = PyDict_SetItem(dirs, key, val); - Py_DECREF(val); - if (ret == -1) - goto bail; - Py_CLEAR(key); - } - ret = 0; - -bail: - Py_XDECREF(key); - - return ret; -} - -static int _delpath(PyObject *dirs, PyObject *path) -{ - char *cpath = PyBytes_AS_STRING(path); - Py_ssize_t pos = PyBytes_GET_SIZE(path); - PyObject *key = NULL; - int ret = -1; - - while ((pos = _finddir(cpath, pos - 1)) != -1) { - PyObject *val; - - key = PyBytes_FromStringAndSize(cpath, pos); - - if (key == NULL) - goto bail; - - val = PyDict_GetItem(dirs, key); - if (val == NULL) { - PyErr_SetString(PyExc_ValueError, - "expected a value, found none"); - goto bail; - } - - if (--PYLONG_VALUE(val) <= 0) { - if (PyDict_DelItem(dirs, key) == -1) - goto bail; - } else - break; - Py_CLEAR(key); - } - ret = 0; - -bail: - Py_XDECREF(key); - - return ret; -} - -static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar) -{ - PyObject *key, *value; - Py_ssize_t pos = 0; - - while (PyDict_Next(source, &pos, &key, &value)) { - if (!PyBytes_Check(key)) { - PyErr_SetString(PyExc_TypeError, "expected string key"); - return -1; - } - if (skipchar) { - if (!dirstate_tuple_check(value)) { - PyErr_SetString(PyExc_TypeError, - "expected a dirstate tuple"); - return -1; - } - if (((dirstateTupleObject *)value)->state == skipchar) - continue; - } - - if (_addpath(dirs, key) == -1) - return -1; - } - - return 0; -} - -static int dirs_fromiter(PyObject *dirs, PyObject *source) -{ - PyObject *iter, *item = NULL; - int ret; - - iter = PyObject_GetIter(source); - if (iter == NULL) - return -1; - - while ((item = PyIter_Next(iter)) != NULL) { - if (!PyBytes_Check(item)) { - PyErr_SetString(PyExc_TypeError, "expected string"); - break; - } - - if (_addpath(dirs, item) == -1) - break; - Py_CLEAR(item); - } - - ret = PyErr_Occurred() ? -1 : 0; - Py_DECREF(iter); - Py_XDECREF(item); - return ret; -} - -/* - * Calculate a refcounted set of directory names for the files in a - * dirstate. - */ -static int dirs_init(dirsObject *self, PyObject *args) -{ - PyObject *dirs = NULL, *source = NULL; - char skipchar = 0; - int ret = -1; - - self->dict = NULL; - - if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar)) - return -1; - - dirs = PyDict_New(); - - if (dirs == NULL) - return -1; - - if (source == NULL) - ret = 0; - else if (PyDict_Check(source)) - ret = dirs_fromdict(dirs, source, skipchar); - else if (skipchar) - PyErr_SetString(PyExc_ValueError, - "skip character is only supported " - "with a dict source"); - else - ret = dirs_fromiter(dirs, source); - - if (ret == -1) - Py_XDECREF(dirs); - else - self->dict = dirs; - - return ret; -} - -PyObject *dirs_addpath(dirsObject *self, PyObject *args) -{ - PyObject *path; - - if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path)) - return NULL; - - if (_addpath(self->dict, path) == -1) - return NULL; - - Py_RETURN_NONE; -} - -static PyObject *dirs_delpath(dirsObject *self, PyObject *args) -{ - PyObject *path; - - if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path)) - return NULL; - - if (_delpath(self->dict, path) == -1) - return NULL; - - Py_RETURN_NONE; -} - -static int dirs_contains(dirsObject *self, PyObject *value) -{ - return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0; -} - -static void dirs_dealloc(dirsObject *self) -{ - Py_XDECREF(self->dict); - PyObject_Del(self); -} - -static PyObject *dirs_iter(dirsObject *self) -{ - return PyObject_GetIter(self->dict); -} - -static PySequenceMethods dirs_sequence_methods; - -static PyMethodDef dirs_methods[] = { - {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"}, - {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) }; - -void dirs_module_init(PyObject *mod) -{ - dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains; - dirsType.tp_name = "parsers.dirs"; - dirsType.tp_new = PyType_GenericNew; - dirsType.tp_basicsize = sizeof(dirsObject); - dirsType.tp_dealloc = (destructor)dirs_dealloc; - dirsType.tp_as_sequence = &dirs_sequence_methods; - dirsType.tp_flags = Py_TPFLAGS_DEFAULT; - dirsType.tp_doc = "dirs"; - dirsType.tp_iter = (getiterfunc)dirs_iter; - dirsType.tp_methods = dirs_methods; - dirsType.tp_init = (initproc)dirs_init; - - if (PyType_Ready(&dirsType) < 0) - return; - Py_INCREF(&dirsType); - - PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType); -}
--- a/mercurial/dirstate.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/dirstate.py Sat Aug 13 12:23:56 2016 +0900 @@ -19,14 +19,16 @@ encoding, error, match as matchmod, - parsers, pathutil, + policy, pycompat, scmutil, txnutil, util, ) +parsers = policy.importmod(r'parsers') + propertycache = util.propertycache filecache = scmutil.filecache _rangemask = 0x7fffffff
--- a/mercurial/encoding.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/encoding.py Sat Aug 13 12:23:56 2016 +0900 @@ -14,6 +14,7 @@ from . import ( error, + policy, pycompat, ) @@ -318,7 +319,7 @@ def asciilower(s): # delay importing avoids cyclic dependency around "parsers" in # pure Python build (util => i18n => encoding => parsers => util) - from . import parsers + parsers = policy.importmod(r'parsers') impl = getattr(parsers, 'asciilower', _asciilower) global asciilower asciilower = impl @@ -334,7 +335,7 @@ def asciiupper(s): # delay importing avoids cyclic dependency around "parsers" in # pure Python build (util => i18n => encoding => parsers => util) - from . import parsers + parsers = policy.importmod(r'parsers') impl = getattr(parsers, 'asciiupper', _asciiupper) global asciiupper asciiupper = impl
--- a/mercurial/manifest.c Sat Aug 13 12:18:58 2016 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,939 +0,0 @@ -/* - * manifest.c - manifest type that does on-demand parsing. - * - * Copyright 2015, Google Inc. - * - * This software may be used and distributed according to the terms of - * the GNU General Public License, incorporated herein by reference. - */ -#include <Python.h> - -#include <assert.h> -#include <string.h> -#include <stdlib.h> - -#include "util.h" - -#define DEFAULT_LINES 100000 - -typedef struct { - char *start; - Py_ssize_t len; /* length of line including terminal newline */ - char hash_suffix; - bool from_malloc; - bool deleted; -} line; - -typedef struct { - PyObject_HEAD - PyObject *pydata; - line *lines; - int numlines; /* number of line entries */ - int livelines; /* number of non-deleted lines */ - int maxlines; /* allocated number of lines */ - bool dirty; -} lazymanifest; - -#define MANIFEST_OOM -1 -#define MANIFEST_NOT_SORTED -2 -#define MANIFEST_MALFORMED -3 - -/* defined in parsers.c */ -PyObject *unhexlify(const char *str, int len); - -/* get the length of the path for a line */ -static size_t pathlen(line *l) { - return strlen(l->start); -} - -/* get the node value of a single line */ -static PyObject *nodeof(line *l) { - char *s = l->start; - ssize_t llen = pathlen(l); - PyObject *hash = unhexlify(s + llen + 1, 40); - if (!hash) { - return NULL; - } - if (l->hash_suffix != '\0') { - char newhash[21]; - memcpy(newhash, PyBytes_AsString(hash), 20); - Py_DECREF(hash); - newhash[20] = l->hash_suffix; - hash = PyBytes_FromStringAndSize(newhash, 21); - } - return hash; -} - -/* get the node hash and flags of a line as a tuple */ -static PyObject *hashflags(line *l) -{ - char *s = l->start; - size_t plen = pathlen(l); - PyObject *hash = nodeof(l); - - /* 40 for hash, 1 for null byte, 1 for newline */ - size_t hplen = plen + 42; - Py_ssize_t flen = l->len - hplen; - PyObject *flags; - PyObject *tup; - - if (!hash) - return NULL; - flags = PyBytes_FromStringAndSize(s + hplen - 1, flen); - if (!flags) { - Py_DECREF(hash); - return NULL; - } - tup = PyTuple_Pack(2, hash, flags); - Py_DECREF(flags); - Py_DECREF(hash); - return tup; -} - -/* if we're about to run out of space in the line index, add more */ -static bool realloc_if_full(lazymanifest *self) -{ - if (self->numlines == self->maxlines) { - self->maxlines *= 2; - self->lines = realloc(self->lines, self->maxlines * sizeof(line)); - } - return !!self->lines; -} - -/* - * Find the line boundaries in the manifest that 'data' points to and store - * information about each line in 'self'. - */ -static int find_lines(lazymanifest *self, char *data, Py_ssize_t len) -{ - char *prev = NULL; - while (len > 0) { - line *l; - char *next = memchr(data, '\n', len); - if (!next) { - return MANIFEST_MALFORMED; - } - next++; /* advance past newline */ - if (!realloc_if_full(self)) { - return MANIFEST_OOM; /* no memory */ - } - if (prev && strcmp(prev, data) > -1) { - /* This data isn't sorted, so we have to abort. */ - return MANIFEST_NOT_SORTED; - } - l = self->lines + ((self->numlines)++); - l->start = data; - l->len = next - data; - l->hash_suffix = '\0'; - l->from_malloc = false; - l->deleted = false; - len = len - l->len; - prev = data; - data = next; - } - self->livelines = self->numlines; - return 0; -} - -static int lazymanifest_init(lazymanifest *self, PyObject *args) -{ - char *data; - Py_ssize_t len; - int err, ret; - PyObject *pydata; - if (!PyArg_ParseTuple(args, "S", &pydata)) { - return -1; - } - err = PyBytes_AsStringAndSize(pydata, &data, &len); - - self->dirty = false; - if (err == -1) - return -1; - self->pydata = pydata; - Py_INCREF(self->pydata); - Py_BEGIN_ALLOW_THREADS - self->lines = malloc(DEFAULT_LINES * sizeof(line)); - self->maxlines = DEFAULT_LINES; - self->numlines = 0; - if (!self->lines) - ret = MANIFEST_OOM; - else - ret = find_lines(self, data, len); - Py_END_ALLOW_THREADS - switch (ret) { - case 0: - break; - case MANIFEST_OOM: - PyErr_NoMemory(); - break; - case MANIFEST_NOT_SORTED: - PyErr_Format(PyExc_ValueError, - "Manifest lines not in sorted order."); - break; - case MANIFEST_MALFORMED: - PyErr_Format(PyExc_ValueError, - "Manifest did not end in a newline."); - break; - default: - PyErr_Format(PyExc_ValueError, - "Unknown problem parsing manifest."); - } - return ret == 0 ? 0 : -1; -} - -static void lazymanifest_dealloc(lazymanifest *self) -{ - /* free any extra lines we had to allocate */ - int i; - for (i = 0; i < self->numlines; i++) { - if (self->lines[i].from_malloc) { - free(self->lines[i].start); - } - } - if (self->lines) { - free(self->lines); - self->lines = NULL; - } - if (self->pydata) { - Py_DECREF(self->pydata); - self->pydata = NULL; - } - PyObject_Del(self); -} - -/* iteration support */ - -typedef struct { - PyObject_HEAD lazymanifest *m; - Py_ssize_t pos; -} lmIter; - -static void lmiter_dealloc(PyObject *o) -{ - lmIter *self = (lmIter *)o; - Py_DECREF(self->m); - PyObject_Del(self); -} - -static line *lmiter_nextline(lmIter *self) -{ - do { - self->pos++; - if (self->pos >= self->m->numlines) { - return NULL; - } - /* skip over deleted manifest entries */ - } while (self->m->lines[self->pos].deleted); - return self->m->lines + self->pos; -} - -static PyObject *lmiter_iterentriesnext(PyObject *o) -{ - size_t pl; - line *l; - Py_ssize_t consumed; - PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; - l = lmiter_nextline((lmIter *)o); - if (!l) { - goto done; - } - pl = pathlen(l); - path = PyBytes_FromStringAndSize(l->start, pl); - hash = nodeof(l); - consumed = pl + 41; - flags = PyBytes_FromStringAndSize(l->start + consumed, - l->len - consumed - 1); - if (!path || !hash || !flags) { - goto done; - } - ret = PyTuple_Pack(3, path, hash, flags); -done: - Py_XDECREF(path); - Py_XDECREF(hash); - Py_XDECREF(flags); - return ret; -} - -#ifdef IS_PY3K -#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT -#else -#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ - | Py_TPFLAGS_HAVE_ITER -#endif - -static PyTypeObject lazymanifestEntriesIterator = { - PyVarObject_HEAD_INIT(NULL, 0) - "parsers.lazymanifest.entriesiterator", /*tp_name */ - sizeof(lmIter), /*tp_basicsize */ - 0, /*tp_itemsize */ - lmiter_dealloc, /*tp_dealloc */ - 0, /*tp_print */ - 0, /*tp_getattr */ - 0, /*tp_setattr */ - 0, /*tp_compare */ - 0, /*tp_repr */ - 0, /*tp_as_number */ - 0, /*tp_as_sequence */ - 0, /*tp_as_mapping */ - 0, /*tp_hash */ - 0, /*tp_call */ - 0, /*tp_str */ - 0, /*tp_getattro */ - 0, /*tp_setattro */ - 0, /*tp_as_buffer */ - LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */ - "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter: __iter__() method */ - lmiter_iterentriesnext, /* tp_iternext: next() method */ -}; - -static PyObject *lmiter_iterkeysnext(PyObject *o) -{ - size_t pl; - line *l = lmiter_nextline((lmIter *)o); - if (!l) { - return NULL; - } - pl = pathlen(l); - return PyBytes_FromStringAndSize(l->start, pl); -} - -#ifdef IS_PY3K -#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT -#else -#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ - | Py_TPFLAGS_HAVE_ITER -#endif - -static PyTypeObject lazymanifestKeysIterator = { - PyVarObject_HEAD_INIT(NULL, 0) - "parsers.lazymanifest.keysiterator", /*tp_name */ - sizeof(lmIter), /*tp_basicsize */ - 0, /*tp_itemsize */ - lmiter_dealloc, /*tp_dealloc */ - 0, /*tp_print */ - 0, /*tp_getattr */ - 0, /*tp_setattr */ - 0, /*tp_compare */ - 0, /*tp_repr */ - 0, /*tp_as_number */ - 0, /*tp_as_sequence */ - 0, /*tp_as_mapping */ - 0, /*tp_hash */ - 0, /*tp_call */ - 0, /*tp_str */ - 0, /*tp_getattro */ - 0, /*tp_setattro */ - 0, /*tp_as_buffer */ - LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */ - "Keys iterator for a lazymanifest.", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter: __iter__() method */ - lmiter_iterkeysnext, /* tp_iternext: next() method */ -}; - -static lazymanifest *lazymanifest_copy(lazymanifest *self); - -static PyObject *lazymanifest_getentriesiter(lazymanifest *self) -{ - lmIter *i = NULL; - lazymanifest *t = lazymanifest_copy(self); - if (!t) { - PyErr_NoMemory(); - return NULL; - } - i = PyObject_New(lmIter, &lazymanifestEntriesIterator); - if (i) { - i->m = t; - i->pos = -1; - } else { - Py_DECREF(t); - PyErr_NoMemory(); - } - return (PyObject *)i; -} - -static PyObject *lazymanifest_getkeysiter(lazymanifest *self) -{ - lmIter *i = NULL; - lazymanifest *t = lazymanifest_copy(self); - if (!t) { - PyErr_NoMemory(); - return NULL; - } - i = PyObject_New(lmIter, &lazymanifestKeysIterator); - if (i) { - i->m = t; - i->pos = -1; - } else { - Py_DECREF(t); - PyErr_NoMemory(); - } - return (PyObject *)i; -} - -/* __getitem__ and __setitem__ support */ - -static Py_ssize_t lazymanifest_size(lazymanifest *self) -{ - return self->livelines; -} - -static int linecmp(const void *left, const void *right) -{ - return strcmp(((const line *)left)->start, - ((const line *)right)->start); -} - -static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key) -{ - line needle; - line *hit; - if (!PyBytes_Check(key)) { - PyErr_Format(PyExc_TypeError, - "getitem: manifest keys must be a string."); - return NULL; - } - needle.start = PyBytes_AsString(key); - hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), - &linecmp); - if (!hit || hit->deleted) { - PyErr_Format(PyExc_KeyError, "No such manifest entry."); - return NULL; - } - return hashflags(hit); -} - -static int lazymanifest_delitem(lazymanifest *self, PyObject *key) -{ - line needle; - line *hit; - if (!PyBytes_Check(key)) { - PyErr_Format(PyExc_TypeError, - "delitem: manifest keys must be a string."); - return -1; - } - needle.start = PyBytes_AsString(key); - hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), - &linecmp); - if (!hit || hit->deleted) { - PyErr_Format(PyExc_KeyError, - "Tried to delete nonexistent manifest entry."); - return -1; - } - self->dirty = true; - hit->deleted = true; - self->livelines--; - return 0; -} - -/* Do a binary search for the insertion point for new, creating the - * new entry if needed. */ -static int internalsetitem(lazymanifest *self, line *new) { - int start = 0, end = self->numlines; - while (start < end) { - int pos = start + (end - start) / 2; - int c = linecmp(new, self->lines + pos); - if (c < 0) - end = pos; - else if (c > 0) - start = pos + 1; - else { - if (self->lines[pos].deleted) - self->livelines++; - if (self->lines[pos].from_malloc) - free(self->lines[pos].start); - start = pos; - goto finish; - } - } - /* being here means we need to do an insert */ - if (!realloc_if_full(self)) { - PyErr_NoMemory(); - return -1; - } - memmove(self->lines + start + 1, self->lines + start, - (self->numlines - start) * sizeof(line)); - self->numlines++; - self->livelines++; -finish: - self->lines[start] = *new; - self->dirty = true; - return 0; -} - -static int lazymanifest_setitem( - lazymanifest *self, PyObject *key, PyObject *value) -{ - char *path; - Py_ssize_t plen; - PyObject *pyhash; - Py_ssize_t hlen; - char *hash; - PyObject *pyflags; - char *flags; - Py_ssize_t flen; - size_t dlen; - char *dest; - int i; - line new; - if (!PyBytes_Check(key)) { - PyErr_Format(PyExc_TypeError, - "setitem: manifest keys must be a string."); - return -1; - } - if (!value) { - return lazymanifest_delitem(self, key); - } - if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) { - PyErr_Format(PyExc_TypeError, - "Manifest values must be a tuple of (node, flags)."); - return -1; - } - if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) { - return -1; - } - - pyhash = PyTuple_GetItem(value, 0); - if (!PyBytes_Check(pyhash)) { - PyErr_Format(PyExc_TypeError, - "node must be a 20-byte string"); - return -1; - } - hlen = PyBytes_Size(pyhash); - /* Some parts of the codebase try and set 21 or 22 - * byte "hash" values in order to perturb things for - * status. We have to preserve at least the 21st - * byte. Sigh. If there's a 22nd byte, we drop it on - * the floor, which works fine. - */ - if (hlen != 20 && hlen != 21 && hlen != 22) { - PyErr_Format(PyExc_TypeError, - "node must be a 20-byte string"); - return -1; - } - hash = PyBytes_AsString(pyhash); - - pyflags = PyTuple_GetItem(value, 1); - if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) { - PyErr_Format(PyExc_TypeError, - "flags must a 0 or 1 byte string"); - return -1; - } - if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) { - return -1; - } - /* one null byte and one newline */ - dlen = plen + 41 + flen + 1; - dest = malloc(dlen); - if (!dest) { - PyErr_NoMemory(); - return -1; - } - memcpy(dest, path, plen + 1); - for (i = 0; i < 20; i++) { - /* Cast to unsigned, so it will not get sign-extended when promoted - * to int (as is done when passing to a variadic function) - */ - sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]); - } - memcpy(dest + plen + 41, flags, flen); - dest[plen + 41 + flen] = '\n'; - new.start = dest; - new.len = dlen; - new.hash_suffix = '\0'; - if (hlen > 20) { - new.hash_suffix = hash[20]; - } - new.from_malloc = true; /* is `start` a pointer we allocated? */ - new.deleted = false; /* is this entry deleted? */ - if (internalsetitem(self, &new)) { - return -1; - } - return 0; -} - -static PyMappingMethods lazymanifest_mapping_methods = { - (lenfunc)lazymanifest_size, /* mp_length */ - (binaryfunc)lazymanifest_getitem, /* mp_subscript */ - (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */ -}; - -/* sequence methods (important or __contains__ builds an iterator) */ - -static int lazymanifest_contains(lazymanifest *self, PyObject *key) -{ - line needle; - line *hit; - if (!PyBytes_Check(key)) { - /* Our keys are always strings, so if the contains - * check is for a non-string, just return false. */ - return 0; - } - needle.start = PyBytes_AsString(key); - hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), - &linecmp); - if (!hit || hit->deleted) { - return 0; - } - return 1; -} - -static PySequenceMethods lazymanifest_seq_meths = { - (lenfunc)lazymanifest_size, /* sq_length */ - 0, /* sq_concat */ - 0, /* sq_repeat */ - 0, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ - (objobjproc)lazymanifest_contains, /* sq_contains */ - 0, /* sq_inplace_concat */ - 0, /* sq_inplace_repeat */ -}; - - -/* Other methods (copy, diff, etc) */ -static PyTypeObject lazymanifestType; - -/* If the manifest has changes, build the new manifest text and reindex it. */ -static int compact(lazymanifest *self) { - int i; - ssize_t need = 0; - char *data; - line *src, *dst; - PyObject *pydata; - if (!self->dirty) - return 0; - for (i = 0; i < self->numlines; i++) { - if (!self->lines[i].deleted) { - need += self->lines[i].len; - } - } - pydata = PyBytes_FromStringAndSize(NULL, need); - if (!pydata) - return -1; - data = PyBytes_AsString(pydata); - if (!data) { - return -1; - } - src = self->lines; - dst = self->lines; - for (i = 0; i < self->numlines; i++, src++) { - char *tofree = NULL; - if (src->from_malloc) { - tofree = src->start; - } - if (!src->deleted) { - memcpy(data, src->start, src->len); - *dst = *src; - dst->start = data; - dst->from_malloc = false; - data += dst->len; - dst++; - } - free(tofree); - } - Py_DECREF(self->pydata); - self->pydata = pydata; - self->numlines = self->livelines; - self->dirty = false; - return 0; -} - -static PyObject *lazymanifest_text(lazymanifest *self) -{ - if (compact(self) != 0) { - PyErr_NoMemory(); - return NULL; - } - Py_INCREF(self->pydata); - return self->pydata; -} - -static lazymanifest *lazymanifest_copy(lazymanifest *self) -{ - lazymanifest *copy = NULL; - if (compact(self) != 0) { - goto nomem; - } - copy = PyObject_New(lazymanifest, &lazymanifestType); - if (!copy) { - goto nomem; - } - copy->numlines = self->numlines; - copy->livelines = self->livelines; - copy->dirty = false; - copy->lines = malloc(self->maxlines *sizeof(line)); - if (!copy->lines) { - goto nomem; - } - memcpy(copy->lines, self->lines, self->numlines * sizeof(line)); - copy->maxlines = self->maxlines; - copy->pydata = self->pydata; - Py_INCREF(copy->pydata); - return copy; -nomem: - PyErr_NoMemory(); - Py_XDECREF(copy); - return NULL; -} - -static lazymanifest *lazymanifest_filtercopy( - lazymanifest *self, PyObject *matchfn) -{ - lazymanifest *copy = NULL; - int i; - if (!PyCallable_Check(matchfn)) { - PyErr_SetString(PyExc_TypeError, "matchfn must be callable"); - return NULL; - } - /* compact ourselves first to avoid double-frees later when we - * compact tmp so that it doesn't have random pointers to our - * underlying from_malloc-data (self->pydata is safe) */ - if (compact(self) != 0) { - goto nomem; - } - copy = PyObject_New(lazymanifest, &lazymanifestType); - if (!copy) { - goto nomem; - } - copy->dirty = true; - copy->lines = malloc(self->maxlines * sizeof(line)); - if (!copy->lines) { - goto nomem; - } - copy->maxlines = self->maxlines; - copy->numlines = 0; - copy->pydata = self->pydata; - Py_INCREF(self->pydata); - for (i = 0; i < self->numlines; i++) { - PyObject *arglist = NULL, *result = NULL; - arglist = Py_BuildValue("(s)", self->lines[i].start); - if (!arglist) { - return NULL; - } - result = PyObject_CallObject(matchfn, arglist); - Py_DECREF(arglist); - /* if the callback raised an exception, just let it - * through and give up */ - if (!result) { - free(copy->lines); - Py_DECREF(self->pydata); - return NULL; - } - if (PyObject_IsTrue(result)) { - assert(!(self->lines[i].from_malloc)); - copy->lines[copy->numlines++] = self->lines[i]; - } - Py_DECREF(result); - } - copy->livelines = copy->numlines; - return copy; -nomem: - PyErr_NoMemory(); - Py_XDECREF(copy); - return NULL; -} - -static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args) -{ - lazymanifest *other; - PyObject *pyclean = NULL; - bool listclean; - PyObject *emptyTup = NULL, *ret = NULL; - PyObject *es; - int sneedle = 0, oneedle = 0; - if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) { - return NULL; - } - listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean); - es = PyBytes_FromString(""); - if (!es) { - goto nomem; - } - emptyTup = PyTuple_Pack(2, Py_None, es); - Py_DECREF(es); - if (!emptyTup) { - goto nomem; - } - ret = PyDict_New(); - if (!ret) { - goto nomem; - } - while (sneedle != self->numlines || oneedle != other->numlines) { - line *left = self->lines + sneedle; - line *right = other->lines + oneedle; - int result; - PyObject *key; - PyObject *outer; - /* If we're looking at a deleted entry and it's not - * the end of the manifest, just skip it. */ - if (left->deleted && sneedle < self->numlines) { - sneedle++; - continue; - } - if (right->deleted && oneedle < other->numlines) { - oneedle++; - continue; - } - /* if we're at the end of either manifest, then we - * know the remaining items are adds so we can skip - * the strcmp. */ - if (sneedle == self->numlines) { - result = 1; - } else if (oneedle == other->numlines) { - result = -1; - } else { - result = linecmp(left, right); - } - key = result <= 0 ? - PyBytes_FromString(left->start) : - PyBytes_FromString(right->start); - if (!key) - goto nomem; - if (result < 0) { - PyObject *l = hashflags(left); - if (!l) { - goto nomem; - } - outer = PyTuple_Pack(2, l, emptyTup); - Py_DECREF(l); - if (!outer) { - goto nomem; - } - PyDict_SetItem(ret, key, outer); - Py_DECREF(outer); - sneedle++; - } else if (result > 0) { - PyObject *r = hashflags(right); - if (!r) { - goto nomem; - } - outer = PyTuple_Pack(2, emptyTup, r); - Py_DECREF(r); - if (!outer) { - goto nomem; - } - PyDict_SetItem(ret, key, outer); - Py_DECREF(outer); - oneedle++; - } else { - /* file exists in both manifests */ - if (left->len != right->len - || memcmp(left->start, right->start, left->len) - || left->hash_suffix != right->hash_suffix) { - PyObject *l = hashflags(left); - PyObject *r; - if (!l) { - goto nomem; - } - r = hashflags(right); - if (!r) { - Py_DECREF(l); - goto nomem; - } - outer = PyTuple_Pack(2, l, r); - Py_DECREF(l); - Py_DECREF(r); - if (!outer) { - goto nomem; - } - PyDict_SetItem(ret, key, outer); - Py_DECREF(outer); - } else if (listclean) { - PyDict_SetItem(ret, key, Py_None); - } - sneedle++; - oneedle++; - } - Py_DECREF(key); - } - Py_DECREF(emptyTup); - return ret; -nomem: - PyErr_NoMemory(); - Py_XDECREF(ret); - Py_XDECREF(emptyTup); - return NULL; -} - -static PyMethodDef lazymanifest_methods[] = { - {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS, - "Iterate over file names in this lazymanifest."}, - {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS, - "Iterate over (path, nodeid, flags) tuples in this lazymanifest."}, - {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS, - "Make a copy of this lazymanifest."}, - {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O, - "Make a copy of this manifest filtered by matchfn."}, - {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS, - "Compare this lazymanifest to another one."}, - {"text", (PyCFunction)lazymanifest_text, METH_NOARGS, - "Encode this manifest to text."}, - {NULL}, -}; - -#ifdef IS_PY3K -#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT -#else -#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN -#endif - -static PyTypeObject lazymanifestType = { - PyVarObject_HEAD_INIT(NULL, 0) - "parsers.lazymanifest", /* tp_name */ - sizeof(lazymanifest), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)lazymanifest_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - &lazymanifest_seq_meths, /* tp_as_sequence */ - &lazymanifest_mapping_methods, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - LAZYMANIFEST_TPFLAGS, /* tp_flags */ - "TODO(augie)", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */ - 0, /* tp_iternext */ - lazymanifest_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)lazymanifest_init, /* tp_init */ - 0, /* tp_alloc */ -}; - -void manifest_module_init(PyObject * mod) -{ - lazymanifestType.tp_new = PyType_GenericNew; - if (PyType_Ready(&lazymanifestType) < 0) - return; - Py_INCREF(&lazymanifestType); - - PyModule_AddObject(mod, "lazymanifest", - (PyObject *)&lazymanifestType); -}
--- a/mercurial/manifest.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/manifest.py Sat Aug 13 12:23:56 2016 +0900 @@ -19,11 +19,12 @@ from . import ( error, mdiff, - parsers, + policy, revlog, util, ) +parsers = policy.importmod(r'parsers') propertycache = util.propertycache def _parsev1(data):
--- a/mercurial/obsolete.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/obsolete.py Sat Aug 13 12:23:56 2016 +0900 @@ -76,11 +76,13 @@ from . import ( error, node, - parsers, phases, + policy, util, ) +parsers = policy.importmod(r'parsers') + _pack = struct.pack _unpack = struct.unpack _calcsize = struct.calcsize
--- a/mercurial/parsers.c Sat Aug 13 12:18:58 2016 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2947 +0,0 @@ -/* - parsers.c - efficient content parsing - - Copyright 2008 Matt Mackall <mpm@selenic.com> and others - - This software may be used and distributed according to the terms of - the GNU General Public License, incorporated herein by reference. -*/ - -#include <Python.h> -#include <ctype.h> -#include <stddef.h> -#include <string.h> - -#include "util.h" -#include "bitmanipulation.h" - -#ifdef IS_PY3K -/* The mapping of Python types is meant to be temporary to get Python - * 3 to compile. We should remove this once Python 3 support is fully - * supported and proper types are used in the extensions themselves. */ -#define PyInt_Type PyLong_Type -#define PyInt_Check PyLong_Check -#define PyInt_FromLong PyLong_FromLong -#define PyInt_FromSsize_t PyLong_FromSsize_t -#define PyInt_AS_LONG PyLong_AS_LONG -#define PyInt_AsLong PyLong_AsLong -#endif - -static char *versionerrortext = "Python minor version mismatch"; - -static int8_t hextable[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */ - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -static char lowertable[128] = { - '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', - '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', - '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', - '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', - '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', - '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', - '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', - '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', - '\x40', - '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ - '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ - '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ - '\x78', '\x79', '\x7a', /* X-Z */ - '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', - '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', - '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', - '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', - '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' -}; - -static char uppertable[128] = { - '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', - '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', - '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', - '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', - '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', - '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', - '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', - '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', - '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', - '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', - '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', - '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', - '\x60', - '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ - '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ - '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ - '\x58', '\x59', '\x5a', /* x-z */ - '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' -}; - -static inline int hexdigit(const char *p, Py_ssize_t off) -{ - int8_t val = hextable[(unsigned char)p[off]]; - - if (val >= 0) { - return val; - } - - PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); - return 0; -} - -/* - * Turn a hex-encoded string into binary. - */ -PyObject *unhexlify(const char *str, int len) -{ - PyObject *ret; - char *d; - int i; - - ret = PyBytes_FromStringAndSize(NULL, len / 2); - - if (!ret) - return NULL; - - d = PyBytes_AsString(ret); - - for (i = 0; i < len;) { - int hi = hexdigit(str, i++); - int lo = hexdigit(str, i++); - *d++ = (hi << 4) | lo; - } - - return ret; -} - -static inline PyObject *_asciitransform(PyObject *str_obj, - const char table[128], - PyObject *fallback_fn) -{ - char *str, *newstr; - Py_ssize_t i, len; - PyObject *newobj = NULL; - PyObject *ret = NULL; - - str = PyBytes_AS_STRING(str_obj); - len = PyBytes_GET_SIZE(str_obj); - - newobj = PyBytes_FromStringAndSize(NULL, len); - if (!newobj) - goto quit; - - newstr = PyBytes_AS_STRING(newobj); - - for (i = 0; i < len; i++) { - char c = str[i]; - if (c & 0x80) { - if (fallback_fn != NULL) { - ret = PyObject_CallFunctionObjArgs(fallback_fn, - str_obj, NULL); - } else { - PyObject *err = PyUnicodeDecodeError_Create( - "ascii", str, len, i, (i + 1), - "unexpected code byte"); - PyErr_SetObject(PyExc_UnicodeDecodeError, err); - Py_XDECREF(err); - } - goto quit; - } - newstr[i] = table[(unsigned char)c]; - } - - ret = newobj; - Py_INCREF(ret); -quit: - Py_XDECREF(newobj); - return ret; -} - -static PyObject *asciilower(PyObject *self, PyObject *args) -{ - PyObject *str_obj; - if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) - return NULL; - return _asciitransform(str_obj, lowertable, NULL); -} - -static PyObject *asciiupper(PyObject *self, PyObject *args) -{ - PyObject *str_obj; - if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) - return NULL; - return _asciitransform(str_obj, uppertable, NULL); -} - -static inline PyObject *_dict_new_presized(Py_ssize_t expected_size) -{ - /* _PyDict_NewPresized expects a minused parameter, but it actually - creates a dictionary that's the nearest power of two bigger than the - parameter. For example, with the initial minused = 1000, the - dictionary created has size 1024. Of course in a lot of cases that - can be greater than the maximum load factor Python's dict object - expects (= 2/3), so as soon as we cross the threshold we'll resize - anyway. So create a dictionary that's at least 3/2 the size. */ - return _PyDict_NewPresized(((1 + expected_size) / 2) * 3); -} - -static PyObject *dict_new_presized(PyObject *self, PyObject *args) -{ - Py_ssize_t expected_size; - - if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) - return NULL; - - return _dict_new_presized(expected_size); -} - -static PyObject *make_file_foldmap(PyObject *self, PyObject *args) -{ - PyObject *dmap, *spec_obj, *normcase_fallback; - PyObject *file_foldmap = NULL; - enum normcase_spec spec; - PyObject *k, *v; - dirstateTupleObject *tuple; - Py_ssize_t pos = 0; - const char *table; - - if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", - &PyDict_Type, &dmap, - &PyInt_Type, &spec_obj, - &PyFunction_Type, &normcase_fallback)) - goto quit; - - spec = (int)PyInt_AS_LONG(spec_obj); - switch (spec) { - case NORMCASE_LOWER: - table = lowertable; - break; - case NORMCASE_UPPER: - table = uppertable; - break; - case NORMCASE_OTHER: - table = NULL; - break; - default: - PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); - goto quit; - } - - /* Add some more entries to deal with additions outside this - function. */ - file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); - if (file_foldmap == NULL) - goto quit; - - while (PyDict_Next(dmap, &pos, &k, &v)) { - if (!dirstate_tuple_check(v)) { - PyErr_SetString(PyExc_TypeError, - "expected a dirstate tuple"); - goto quit; - } - - tuple = (dirstateTupleObject *)v; - if (tuple->state != 'r') { - PyObject *normed; - if (table != NULL) { - normed = _asciitransform(k, table, - normcase_fallback); - } else { - normed = PyObject_CallFunctionObjArgs( - normcase_fallback, k, NULL); - } - - if (normed == NULL) - goto quit; - if (PyDict_SetItem(file_foldmap, normed, k) == -1) { - Py_DECREF(normed); - goto quit; - } - Py_DECREF(normed); - } - } - return file_foldmap; -quit: - Py_XDECREF(file_foldmap); - return NULL; -} - -/* - * This code assumes that a manifest is stitched together with newline - * ('\n') characters. - */ -static PyObject *parse_manifest(PyObject *self, PyObject *args) -{ - PyObject *mfdict, *fdict; - char *str, *start, *end; - int len; - - if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", - &PyDict_Type, &mfdict, - &PyDict_Type, &fdict, - &str, &len)) - goto quit; - - start = str; - end = str + len; - while (start < end) { - PyObject *file = NULL, *node = NULL; - PyObject *flags = NULL; - char *zero = NULL, *newline = NULL; - ptrdiff_t nlen; - - zero = memchr(start, '\0', end - start); - if (!zero) { - PyErr_SetString(PyExc_ValueError, - "manifest entry has no separator"); - goto quit; - } - - newline = memchr(zero + 1, '\n', end - (zero + 1)); - if (!newline) { - PyErr_SetString(PyExc_ValueError, - "manifest contains trailing garbage"); - goto quit; - } - - file = PyBytes_FromStringAndSize(start, zero - start); - - if (!file) - goto bail; - - nlen = newline - zero - 1; - - node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen); - if (!node) - goto bail; - - if (nlen > 40) { - flags = PyBytes_FromStringAndSize(zero + 41, - nlen - 40); - if (!flags) - goto bail; - - if (PyDict_SetItem(fdict, file, flags) == -1) - goto bail; - } - - if (PyDict_SetItem(mfdict, file, node) == -1) - goto bail; - - start = newline + 1; - - Py_XDECREF(flags); - Py_XDECREF(node); - Py_XDECREF(file); - continue; - bail: - Py_XDECREF(flags); - Py_XDECREF(node); - Py_XDECREF(file); - goto quit; - } - - Py_INCREF(Py_None); - return Py_None; -quit: - return NULL; -} - -static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode, - int size, int mtime) -{ - dirstateTupleObject *t = PyObject_New(dirstateTupleObject, - &dirstateTupleType); - if (!t) - return NULL; - t->state = state; - t->mode = mode; - t->size = size; - t->mtime = mtime; - return t; -} - -static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args, - PyObject *kwds) -{ - /* We do all the initialization here and not a tp_init function because - * dirstate_tuple is immutable. */ - dirstateTupleObject *t; - char state; - int size, mode, mtime; - if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) - return NULL; - - t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1); - if (!t) - return NULL; - t->state = state; - t->mode = mode; - t->size = size; - t->mtime = mtime; - - return (PyObject *)t; -} - -static void dirstate_tuple_dealloc(PyObject *o) -{ - PyObject_Del(o); -} - -static Py_ssize_t dirstate_tuple_length(PyObject *o) -{ - return 4; -} - -static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i) -{ - dirstateTupleObject *t = (dirstateTupleObject *)o; - switch (i) { - case 0: - return PyBytes_FromStringAndSize(&t->state, 1); - case 1: - return PyInt_FromLong(t->mode); - case 2: - return PyInt_FromLong(t->size); - case 3: - return PyInt_FromLong(t->mtime); - default: - PyErr_SetString(PyExc_IndexError, "index out of range"); - return NULL; - } -} - -static PySequenceMethods dirstate_tuple_sq = { - dirstate_tuple_length, /* sq_length */ - 0, /* sq_concat */ - 0, /* sq_repeat */ - dirstate_tuple_item, /* sq_item */ - 0, /* sq_ass_item */ - 0, /* sq_contains */ - 0, /* sq_inplace_concat */ - 0 /* sq_inplace_repeat */ -}; - -PyTypeObject dirstateTupleType = { - PyVarObject_HEAD_INIT(NULL, 0) - "dirstate_tuple", /* tp_name */ - sizeof(dirstateTupleObject),/* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)dirstate_tuple_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - &dirstate_tuple_sq, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - "dirstate tuple", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - dirstate_tuple_new, /* tp_new */ -}; - -static PyObject *parse_dirstate(PyObject *self, PyObject *args) -{ - PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; - PyObject *fname = NULL, *cname = NULL, *entry = NULL; - char state, *cur, *str, *cpos; - int mode, size, mtime; - unsigned int flen, len, pos = 40; - int readlen; - - if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", - &PyDict_Type, &dmap, - &PyDict_Type, &cmap, - &str, &readlen)) - goto quit; - - len = readlen; - - /* read parents */ - if (len < 40) { - PyErr_SetString( - PyExc_ValueError, "too little data for parents"); - goto quit; - } - - parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); - if (!parents) - goto quit; - - /* read filenames */ - while (pos >= 40 && pos < len) { - if (pos + 17 > len) { - PyErr_SetString(PyExc_ValueError, - "overflow in dirstate"); - goto quit; - } - cur = str + pos; - /* unpack header */ - state = *cur; - mode = getbe32(cur + 1); - size = getbe32(cur + 5); - mtime = getbe32(cur + 9); - flen = getbe32(cur + 13); - pos += 17; - cur += 17; - if (flen > len - pos) { - PyErr_SetString(PyExc_ValueError, "overflow in dirstate"); - goto quit; - } - - entry = (PyObject *)make_dirstate_tuple(state, mode, size, - mtime); - cpos = memchr(cur, 0, flen); - if (cpos) { - fname = PyBytes_FromStringAndSize(cur, cpos - cur); - cname = PyBytes_FromStringAndSize(cpos + 1, - flen - (cpos - cur) - 1); - if (!fname || !cname || - PyDict_SetItem(cmap, fname, cname) == -1 || - PyDict_SetItem(dmap, fname, entry) == -1) - goto quit; - Py_DECREF(cname); - } else { - fname = PyBytes_FromStringAndSize(cur, flen); - if (!fname || - PyDict_SetItem(dmap, fname, entry) == -1) - goto quit; - } - Py_DECREF(fname); - Py_DECREF(entry); - fname = cname = entry = NULL; - pos += flen; - } - - ret = parents; - Py_INCREF(ret); -quit: - Py_XDECREF(fname); - Py_XDECREF(cname); - Py_XDECREF(entry); - Py_XDECREF(parents); - return ret; -} - -/* - * Build a set of non-normal and other parent entries from the dirstate dmap -*/ -static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) { - PyObject *dmap, *fname, *v; - PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL; - Py_ssize_t pos; - - if (!PyArg_ParseTuple(args, "O!:nonnormalentries", - &PyDict_Type, &dmap)) - goto bail; - - nonnset = PySet_New(NULL); - if (nonnset == NULL) - goto bail; - - otherpset = PySet_New(NULL); - if (otherpset == NULL) - goto bail; - - pos = 0; - while (PyDict_Next(dmap, &pos, &fname, &v)) { - dirstateTupleObject *t; - if (!dirstate_tuple_check(v)) { - PyErr_SetString(PyExc_TypeError, - "expected a dirstate tuple"); - goto bail; - } - t = (dirstateTupleObject *)v; - - if (t->state == 'n' && t->size == -2) { - if (PySet_Add(otherpset, fname) == -1) { - goto bail; - } - } - - if (t->state == 'n' && t->mtime != -1) - continue; - if (PySet_Add(nonnset, fname) == -1) - goto bail; - } - - result = Py_BuildValue("(OO)", nonnset, otherpset); - if (result == NULL) - goto bail; - Py_DECREF(nonnset); - Py_DECREF(otherpset); - return result; -bail: - Py_XDECREF(nonnset); - Py_XDECREF(otherpset); - Py_XDECREF(result); - return NULL; -} - -/* - * Efficiently pack a dirstate object into its on-disk format. - */ -static PyObject *pack_dirstate(PyObject *self, PyObject *args) -{ - PyObject *packobj = NULL; - PyObject *map, *copymap, *pl, *mtime_unset = NULL; - Py_ssize_t nbytes, pos, l; - PyObject *k, *v = NULL, *pn; - char *p, *s; - int now; - - if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", - &PyDict_Type, &map, &PyDict_Type, ©map, - &pl, &now)) - return NULL; - - if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) { - PyErr_SetString(PyExc_TypeError, "expected 2-element sequence"); - return NULL; - } - - /* Figure out how much we need to allocate. */ - for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) { - PyObject *c; - if (!PyBytes_Check(k)) { - PyErr_SetString(PyExc_TypeError, "expected string key"); - goto bail; - } - nbytes += PyBytes_GET_SIZE(k) + 17; - c = PyDict_GetItem(copymap, k); - if (c) { - if (!PyBytes_Check(c)) { - PyErr_SetString(PyExc_TypeError, - "expected string key"); - goto bail; - } - nbytes += PyBytes_GET_SIZE(c) + 1; - } - } - - packobj = PyBytes_FromStringAndSize(NULL, nbytes); - if (packobj == NULL) - goto bail; - - p = PyBytes_AS_STRING(packobj); - - pn = PySequence_ITEM(pl, 0); - if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { - PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); - goto bail; - } - memcpy(p, s, l); - p += 20; - pn = PySequence_ITEM(pl, 1); - if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { - PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); - goto bail; - } - memcpy(p, s, l); - p += 20; - - for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) { - dirstateTupleObject *tuple; - char state; - int mode, size, mtime; - Py_ssize_t len, l; - PyObject *o; - char *t; - - if (!dirstate_tuple_check(v)) { - PyErr_SetString(PyExc_TypeError, - "expected a dirstate tuple"); - goto bail; - } - tuple = (dirstateTupleObject *)v; - - state = tuple->state; - mode = tuple->mode; - size = tuple->size; - mtime = tuple->mtime; - if (state == 'n' && mtime == now) { - /* See pure/parsers.py:pack_dirstate for why we do - * this. */ - mtime = -1; - mtime_unset = (PyObject *)make_dirstate_tuple( - state, mode, size, mtime); - if (!mtime_unset) - goto bail; - if (PyDict_SetItem(map, k, mtime_unset) == -1) - goto bail; - Py_DECREF(mtime_unset); - mtime_unset = NULL; - } - *p++ = state; - putbe32((uint32_t)mode, p); - putbe32((uint32_t)size, p + 4); - putbe32((uint32_t)mtime, p + 8); - t = p + 12; - p += 16; - len = PyBytes_GET_SIZE(k); - memcpy(p, PyBytes_AS_STRING(k), len); - p += len; - o = PyDict_GetItem(copymap, k); - if (o) { - *p++ = '\0'; - l = PyBytes_GET_SIZE(o); - memcpy(p, PyBytes_AS_STRING(o), l); - p += l; - len += l + 1; - } - putbe32((uint32_t)len, t); - } - - pos = p - PyBytes_AS_STRING(packobj); - if (pos != nbytes) { - PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld", - (long)pos, (long)nbytes); - goto bail; - } - - return packobj; -bail: - Py_XDECREF(mtime_unset); - Py_XDECREF(packobj); - Py_XDECREF(v); - return NULL; -} - -/* - * A base-16 trie for fast node->rev mapping. - * - * Positive value is index of the next node in the trie - * Negative value is a leaf: -(rev + 1) - * Zero is empty - */ -typedef struct { - int children[16]; -} nodetree; - -/* - * This class has two behaviors. - * - * When used in a list-like way (with integer keys), we decode an - * entry in a RevlogNG index file on demand. Our last entry is a - * sentinel, always a nullid. We have limited support for - * integer-keyed insert and delete, only at elements right before the - * sentinel. - * - * With string keys, we lazily perform a reverse mapping from node to - * rev, using a base-16 trie. - */ -typedef struct { - PyObject_HEAD - /* Type-specific fields go here. */ - PyObject *data; /* raw bytes of index */ - Py_buffer buf; /* buffer of data */ - PyObject **cache; /* cached tuples */ - const char **offsets; /* populated on demand */ - Py_ssize_t raw_length; /* original number of elements */ - Py_ssize_t length; /* current number of elements */ - PyObject *added; /* populated on demand */ - PyObject *headrevs; /* cache, invalidated on changes */ - PyObject *filteredrevs;/* filtered revs set */ - nodetree *nt; /* base-16 trie */ - unsigned ntlength; /* # nodes in use */ - unsigned ntcapacity; /* # nodes allocated */ - int ntdepth; /* maximum depth of tree */ - int ntsplits; /* # splits performed */ - int ntrev; /* last rev scanned */ - int ntlookups; /* # lookups */ - int ntmisses; /* # lookups that miss the cache */ - int inlined; -} indexObject; - -static Py_ssize_t index_length(const indexObject *self) -{ - if (self->added == NULL) - return self->length; - return self->length + PyList_GET_SIZE(self->added); -} - -static PyObject *nullentry; -static const char nullid[20]; - -static Py_ssize_t inline_scan(indexObject *self, const char **offsets); - -#if LONG_MAX == 0x7fffffffL -static char *tuple_format = "Kiiiiiis#"; -#else -static char *tuple_format = "kiiiiiis#"; -#endif - -/* A RevlogNG v1 index entry is 64 bytes long. */ -static const long v1_hdrsize = 64; - -/* - * Return a pointer to the beginning of a RevlogNG record. - */ -static const char *index_deref(indexObject *self, Py_ssize_t pos) -{ - if (self->inlined && pos > 0) { - if (self->offsets == NULL) { - self->offsets = PyMem_Malloc(self->raw_length * - sizeof(*self->offsets)); - if (self->offsets == NULL) - return (const char *)PyErr_NoMemory(); - inline_scan(self, self->offsets); - } - return self->offsets[pos]; - } - - return (const char *)(self->buf.buf) + pos * v1_hdrsize; -} - -static inline int index_get_parents(indexObject *self, Py_ssize_t rev, - int *ps, int maxrev) -{ - if (rev >= self->length - 1) { - PyObject *tuple = PyList_GET_ITEM(self->added, - rev - self->length + 1); - ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); - ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); - } else { - const char *data = index_deref(self, rev); - ps[0] = getbe32(data + 24); - ps[1] = getbe32(data + 28); - } - /* If index file is corrupted, ps[] may point to invalid revisions. So - * there is a risk of buffer overflow to trust them unconditionally. */ - if (ps[0] > maxrev || ps[1] > maxrev) { - PyErr_SetString(PyExc_ValueError, "parent out of range"); - return -1; - } - return 0; -} - - -/* - * RevlogNG format (all in big endian, data may be inlined): - * 6 bytes: offset - * 2 bytes: flags - * 4 bytes: compressed length - * 4 bytes: uncompressed length - * 4 bytes: base revision - * 4 bytes: link revision - * 4 bytes: parent 1 revision - * 4 bytes: parent 2 revision - * 32 bytes: nodeid (only 20 bytes used) - */ -static PyObject *index_get(indexObject *self, Py_ssize_t pos) -{ - uint64_t offset_flags; - int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; - const char *c_node_id; - const char *data; - Py_ssize_t length = index_length(self); - PyObject *entry; - - if (pos < 0) - pos += length; - - if (pos < 0 || pos >= length) { - PyErr_SetString(PyExc_IndexError, "revlog index out of range"); - return NULL; - } - - if (pos == length - 1) { - Py_INCREF(nullentry); - return nullentry; - } - - if (pos >= self->length - 1) { - PyObject *obj; - obj = PyList_GET_ITEM(self->added, pos - self->length + 1); - Py_INCREF(obj); - return obj; - } - - if (self->cache) { - if (self->cache[pos]) { - Py_INCREF(self->cache[pos]); - return self->cache[pos]; - } - } else { - self->cache = calloc(self->raw_length, sizeof(PyObject *)); - if (self->cache == NULL) - return PyErr_NoMemory(); - } - - data = index_deref(self, pos); - if (data == NULL) - return NULL; - - offset_flags = getbe32(data + 4); - if (pos == 0) /* mask out version number for the first entry */ - offset_flags &= 0xFFFF; - else { - uint32_t offset_high = getbe32(data); - offset_flags |= ((uint64_t)offset_high) << 32; - } - - comp_len = getbe32(data + 8); - uncomp_len = getbe32(data + 12); - base_rev = getbe32(data + 16); - link_rev = getbe32(data + 20); - parent_1 = getbe32(data + 24); - parent_2 = getbe32(data + 28); - c_node_id = data + 32; - - entry = Py_BuildValue(tuple_format, offset_flags, comp_len, - uncomp_len, base_rev, link_rev, - parent_1, parent_2, c_node_id, 20); - - if (entry) { - PyObject_GC_UnTrack(entry); - Py_INCREF(entry); - } - - self->cache[pos] = entry; - - return entry; -} - -/* - * Return the 20-byte SHA of the node corresponding to the given rev. - */ -static const char *index_node(indexObject *self, Py_ssize_t pos) -{ - Py_ssize_t length = index_length(self); - const char *data; - - if (pos == length - 1 || pos == INT_MAX) - return nullid; - - if (pos >= length) - return NULL; - - if (pos >= self->length - 1) { - PyObject *tuple, *str; - tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); - str = PyTuple_GetItem(tuple, 7); - return str ? PyBytes_AS_STRING(str) : NULL; - } - - data = index_deref(self, pos); - return data ? data + 32 : NULL; -} - -static int nt_insert(indexObject *self, const char *node, int rev); - -static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) -{ - if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1) - return -1; - if (*nodelen == 20) - return 0; - PyErr_SetString(PyExc_ValueError, "20-byte hash required"); - return -1; -} - -static PyObject *index_insert(indexObject *self, PyObject *args) -{ - PyObject *obj; - char *node; - int index; - Py_ssize_t len, nodelen; - - if (!PyArg_ParseTuple(args, "iO", &index, &obj)) - return NULL; - - if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { - PyErr_SetString(PyExc_TypeError, "8-tuple required"); - return NULL; - } - - if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) - return NULL; - - len = index_length(self); - - if (index < 0) - index += len; - - if (index != len - 1) { - PyErr_SetString(PyExc_IndexError, - "insert only supported at index -1"); - return NULL; - } - - if (self->added == NULL) { - self->added = PyList_New(0); - if (self->added == NULL) - return NULL; - } - - if (PyList_Append(self->added, obj) == -1) - return NULL; - - if (self->nt) - nt_insert(self, node, index); - - Py_CLEAR(self->headrevs); - Py_RETURN_NONE; -} - -static void _index_clearcaches(indexObject *self) -{ - if (self->cache) { - Py_ssize_t i; - - for (i = 0; i < self->raw_length; i++) - Py_CLEAR(self->cache[i]); - free(self->cache); - self->cache = NULL; - } - if (self->offsets) { - PyMem_Free(self->offsets); - self->offsets = NULL; - } - if (self->nt) { - free(self->nt); - self->nt = NULL; - } - Py_CLEAR(self->headrevs); -} - -static PyObject *index_clearcaches(indexObject *self) -{ - _index_clearcaches(self); - self->ntlength = self->ntcapacity = 0; - self->ntdepth = self->ntsplits = 0; - self->ntrev = -1; - self->ntlookups = self->ntmisses = 0; - Py_RETURN_NONE; -} - -static PyObject *index_stats(indexObject *self) -{ - PyObject *obj = PyDict_New(); - PyObject *t = NULL; - - if (obj == NULL) - return NULL; - -#define istat(__n, __d) \ - do { \ - t = PyInt_FromSsize_t(self->__n); \ - if (!t) \ - goto bail; \ - if (PyDict_SetItemString(obj, __d, t) == -1) \ - goto bail; \ - Py_DECREF(t); \ - } while (0) - - if (self->added) { - Py_ssize_t len = PyList_GET_SIZE(self->added); - t = PyInt_FromSsize_t(len); - if (!t) - goto bail; - if (PyDict_SetItemString(obj, "index entries added", t) == -1) - goto bail; - Py_DECREF(t); - } - - if (self->raw_length != self->length - 1) - istat(raw_length, "revs on disk"); - istat(length, "revs in memory"); - istat(ntcapacity, "node trie capacity"); - istat(ntdepth, "node trie depth"); - istat(ntlength, "node trie count"); - istat(ntlookups, "node trie lookups"); - istat(ntmisses, "node trie misses"); - istat(ntrev, "node trie last rev scanned"); - istat(ntsplits, "node trie splits"); - -#undef istat - - return obj; - -bail: - Py_XDECREF(obj); - Py_XDECREF(t); - return NULL; -} - -/* - * When we cache a list, we want to be sure the caller can't mutate - * the cached copy. - */ -static PyObject *list_copy(PyObject *list) -{ - Py_ssize_t len = PyList_GET_SIZE(list); - PyObject *newlist = PyList_New(len); - Py_ssize_t i; - - if (newlist == NULL) - return NULL; - - for (i = 0; i < len; i++) { - PyObject *obj = PyList_GET_ITEM(list, i); - Py_INCREF(obj); - PyList_SET_ITEM(newlist, i, obj); - } - - return newlist; -} - -static int check_filter(PyObject *filter, Py_ssize_t arg) { - if (filter) { - PyObject *arglist, *result; - int isfiltered; - - arglist = Py_BuildValue("(n)", arg); - if (!arglist) { - return -1; - } - - result = PyEval_CallObject(filter, arglist); - Py_DECREF(arglist); - if (!result) { - return -1; - } - - /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error, - * same as this function, so we can just return it directly.*/ - isfiltered = PyObject_IsTrue(result); - Py_DECREF(result); - return isfiltered; - } else { - return 0; - } -} - -static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, - Py_ssize_t marker, char *phases) -{ - PyObject *iter = NULL; - PyObject *iter_item = NULL; - Py_ssize_t min_idx = index_length(self) + 1; - long iter_item_long; - - if (PyList_GET_SIZE(list) != 0) { - iter = PyObject_GetIter(list); - if (iter == NULL) - return -2; - while ((iter_item = PyIter_Next(iter))) - { - iter_item_long = PyInt_AS_LONG(iter_item); - Py_DECREF(iter_item); - if (iter_item_long < min_idx) - min_idx = iter_item_long; - phases[iter_item_long] = marker; - } - Py_DECREF(iter); - } - - return min_idx; -} - -static inline void set_phase_from_parents(char *phases, int parent_1, - int parent_2, Py_ssize_t i) -{ - if (parent_1 >= 0 && phases[parent_1] > phases[i]) - phases[i] = phases[parent_1]; - if (parent_2 >= 0 && phases[parent_2] > phases[i]) - phases[i] = phases[parent_2]; -} - -static PyObject *reachableroots2(indexObject *self, PyObject *args) -{ - - /* Input */ - long minroot; - PyObject *includepatharg = NULL; - int includepath = 0; - /* heads and roots are lists */ - PyObject *heads = NULL; - PyObject *roots = NULL; - PyObject *reachable = NULL; - - PyObject *val; - Py_ssize_t len = index_length(self) - 1; - long revnum; - Py_ssize_t k; - Py_ssize_t i; - Py_ssize_t l; - int r; - int parents[2]; - - /* Internal data structure: - * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit - * revstates: array of length len+1 (all revs + nullrev) */ - int *tovisit = NULL; - long lentovisit = 0; - enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 }; - char *revstates = NULL; - - /* Get arguments */ - if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads, - &PyList_Type, &roots, - &PyBool_Type, &includepatharg)) - goto bail; - - if (includepatharg == Py_True) - includepath = 1; - - /* Initialize return set */ - reachable = PyList_New(0); - if (reachable == NULL) - goto bail; - - /* Initialize internal datastructures */ - tovisit = (int *)malloc((len + 1) * sizeof(int)); - if (tovisit == NULL) { - PyErr_NoMemory(); - goto bail; - } - - revstates = (char *)calloc(len + 1, 1); - if (revstates == NULL) { - PyErr_NoMemory(); - goto bail; - } - - l = PyList_GET_SIZE(roots); - for (i = 0; i < l; i++) { - revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i)); - if (revnum == -1 && PyErr_Occurred()) - goto bail; - /* If root is out of range, e.g. wdir(), it must be unreachable - * from heads. So we can just ignore it. */ - if (revnum + 1 < 0 || revnum + 1 >= len + 1) - continue; - revstates[revnum + 1] |= RS_ROOT; - } - - /* Populate tovisit with all the heads */ - l = PyList_GET_SIZE(heads); - for (i = 0; i < l; i++) { - revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i)); - if (revnum == -1 && PyErr_Occurred()) - goto bail; - if (revnum + 1 < 0 || revnum + 1 >= len + 1) { - PyErr_SetString(PyExc_IndexError, "head out of range"); - goto bail; - } - if (!(revstates[revnum + 1] & RS_SEEN)) { - tovisit[lentovisit++] = (int)revnum; - revstates[revnum + 1] |= RS_SEEN; - } - } - - /* Visit the tovisit list and find the reachable roots */ - k = 0; - while (k < lentovisit) { - /* Add the node to reachable if it is a root*/ - revnum = tovisit[k++]; - if (revstates[revnum + 1] & RS_ROOT) { - revstates[revnum + 1] |= RS_REACHABLE; - val = PyInt_FromLong(revnum); - if (val == NULL) - goto bail; - r = PyList_Append(reachable, val); - Py_DECREF(val); - if (r < 0) - goto bail; - if (includepath == 0) - continue; - } - - /* Add its parents to the list of nodes to visit */ - if (revnum == -1) - continue; - r = index_get_parents(self, revnum, parents, (int)len - 1); - if (r < 0) - goto bail; - for (i = 0; i < 2; i++) { - if (!(revstates[parents[i] + 1] & RS_SEEN) - && parents[i] >= minroot) { - tovisit[lentovisit++] = parents[i]; - revstates[parents[i] + 1] |= RS_SEEN; - } - } - } - - /* Find all the nodes in between the roots we found and the heads - * and add them to the reachable set */ - if (includepath == 1) { - long minidx = minroot; - if (minidx < 0) - minidx = 0; - for (i = minidx; i < len; i++) { - if (!(revstates[i + 1] & RS_SEEN)) - continue; - r = index_get_parents(self, i, parents, (int)len - 1); - /* Corrupted index file, error is set from - * index_get_parents */ - if (r < 0) - goto bail; - if (((revstates[parents[0] + 1] | - revstates[parents[1] + 1]) & RS_REACHABLE) - && !(revstates[i + 1] & RS_REACHABLE)) { - revstates[i + 1] |= RS_REACHABLE; - val = PyInt_FromLong(i); - if (val == NULL) - goto bail; - r = PyList_Append(reachable, val); - Py_DECREF(val); - if (r < 0) - goto bail; - } - } - } - - free(revstates); - free(tovisit); - return reachable; -bail: - Py_XDECREF(reachable); - free(revstates); - free(tovisit); - return NULL; -} - -static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args) -{ - PyObject *roots = Py_None; - PyObject *ret = NULL; - PyObject *phaseslist = NULL; - PyObject *phaseroots = NULL; - PyObject *phaseset = NULL; - PyObject *phasessetlist = NULL; - PyObject *rev = NULL; - Py_ssize_t len = index_length(self) - 1; - Py_ssize_t numphase = 0; - Py_ssize_t minrevallphases = 0; - Py_ssize_t minrevphase = 0; - Py_ssize_t i = 0; - char *phases = NULL; - long phase; - - if (!PyArg_ParseTuple(args, "O", &roots)) - goto done; - if (roots == NULL || !PyList_Check(roots)) - goto done; - - phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */ - if (phases == NULL) { - PyErr_NoMemory(); - goto done; - } - /* Put the phase information of all the roots in phases */ - numphase = PyList_GET_SIZE(roots)+1; - minrevallphases = len + 1; - phasessetlist = PyList_New(numphase); - if (phasessetlist == NULL) - goto done; - - PyList_SET_ITEM(phasessetlist, 0, Py_None); - Py_INCREF(Py_None); - - for (i = 0; i < numphase-1; i++) { - phaseroots = PyList_GET_ITEM(roots, i); - phaseset = PySet_New(NULL); - if (phaseset == NULL) - goto release; - PyList_SET_ITEM(phasessetlist, i+1, phaseset); - if (!PyList_Check(phaseroots)) - goto release; - minrevphase = add_roots_get_min(self, phaseroots, i+1, phases); - if (minrevphase == -2) /* Error from add_roots_get_min */ - goto release; - minrevallphases = MIN(minrevallphases, minrevphase); - } - /* Propagate the phase information from the roots to the revs */ - if (minrevallphases != -1) { - int parents[2]; - for (i = minrevallphases; i < len; i++) { - if (index_get_parents(self, i, parents, - (int)len - 1) < 0) - goto release; - set_phase_from_parents(phases, parents[0], parents[1], i); - } - } - /* Transform phase list to a python list */ - phaseslist = PyList_New(len); - if (phaseslist == NULL) - goto release; - for (i = 0; i < len; i++) { - PyObject *phaseval; - - phase = phases[i]; - /* We only store the sets of phase for non public phase, the public phase - * is computed as a difference */ - if (phase != 0) { - phaseset = PyList_GET_ITEM(phasessetlist, phase); - rev = PyInt_FromLong(i); - if (rev == NULL) - goto release; - PySet_Add(phaseset, rev); - Py_XDECREF(rev); - } - phaseval = PyInt_FromLong(phase); - if (phaseval == NULL) - goto release; - PyList_SET_ITEM(phaseslist, i, phaseval); - } - ret = PyTuple_Pack(2, phaseslist, phasessetlist); - -release: - Py_XDECREF(phaseslist); - Py_XDECREF(phasessetlist); -done: - free(phases); - return ret; -} - -static PyObject *index_headrevs(indexObject *self, PyObject *args) -{ - Py_ssize_t i, j, len; - char *nothead = NULL; - PyObject *heads = NULL; - PyObject *filter = NULL; - PyObject *filteredrevs = Py_None; - - if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) { - return NULL; - } - - if (self->headrevs && filteredrevs == self->filteredrevs) - return list_copy(self->headrevs); - - Py_DECREF(self->filteredrevs); - self->filteredrevs = filteredrevs; - Py_INCREF(filteredrevs); - - if (filteredrevs != Py_None) { - filter = PyObject_GetAttrString(filteredrevs, "__contains__"); - if (!filter) { - PyErr_SetString(PyExc_TypeError, - "filteredrevs has no attribute __contains__"); - goto bail; - } - } - - len = index_length(self) - 1; - heads = PyList_New(0); - if (heads == NULL) - goto bail; - if (len == 0) { - PyObject *nullid = PyInt_FromLong(-1); - if (nullid == NULL || PyList_Append(heads, nullid) == -1) { - Py_XDECREF(nullid); - goto bail; - } - goto done; - } - - nothead = calloc(len, 1); - if (nothead == NULL) { - PyErr_NoMemory(); - goto bail; - } - - for (i = len - 1; i >= 0; i--) { - int isfiltered; - int parents[2]; - - /* If nothead[i] == 1, it means we've seen an unfiltered child of this - * node already, and therefore this node is not filtered. So we can skip - * the expensive check_filter step. - */ - if (nothead[i] != 1) { - isfiltered = check_filter(filter, i); - if (isfiltered == -1) { - PyErr_SetString(PyExc_TypeError, - "unable to check filter"); - goto bail; - } - - if (isfiltered) { - nothead[i] = 1; - continue; - } - } - - if (index_get_parents(self, i, parents, (int)len - 1) < 0) - goto bail; - for (j = 0; j < 2; j++) { - if (parents[j] >= 0) - nothead[parents[j]] = 1; - } - } - - for (i = 0; i < len; i++) { - PyObject *head; - - if (nothead[i]) - continue; - head = PyInt_FromSsize_t(i); - if (head == NULL || PyList_Append(heads, head) == -1) { - Py_XDECREF(head); - goto bail; - } - } - -done: - self->headrevs = heads; - Py_XDECREF(filter); - free(nothead); - return list_copy(self->headrevs); -bail: - Py_XDECREF(filter); - Py_XDECREF(heads); - free(nothead); - return NULL; -} - -static inline int nt_level(const char *node, Py_ssize_t level) -{ - int v = node[level>>1]; - if (!(level & 1)) - v >>= 4; - return v & 0xf; -} - -/* - * Return values: - * - * -4: match is ambiguous (multiple candidates) - * -2: not found - * rest: valid rev - */ -static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen, - int hex) -{ - int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level; - int level, maxlevel, off; - - if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) - return -1; - - if (self->nt == NULL) - return -2; - - if (hex) - maxlevel = nodelen > 40 ? 40 : (int)nodelen; - else - maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); - - for (level = off = 0; level < maxlevel; level++) { - int k = getnybble(node, level); - nodetree *n = &self->nt[off]; - int v = n->children[k]; - - if (v < 0) { - const char *n; - Py_ssize_t i; - - v = -(v + 1); - n = index_node(self, v); - if (n == NULL) - return -2; - for (i = level; i < maxlevel; i++) - if (getnybble(node, i) != nt_level(n, i)) - return -2; - return v; - } - if (v == 0) - return -2; - off = v; - } - /* multiple matches against an ambiguous prefix */ - return -4; -} - -static int nt_new(indexObject *self) -{ - if (self->ntlength == self->ntcapacity) { - if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) { - PyErr_SetString(PyExc_MemoryError, - "overflow in nt_new"); - return -1; - } - self->ntcapacity *= 2; - self->nt = realloc(self->nt, - self->ntcapacity * sizeof(nodetree)); - if (self->nt == NULL) { - PyErr_SetString(PyExc_MemoryError, "out of memory"); - return -1; - } - memset(&self->nt[self->ntlength], 0, - sizeof(nodetree) * (self->ntcapacity - self->ntlength)); - } - return self->ntlength++; -} - -static int nt_insert(indexObject *self, const char *node, int rev) -{ - int level = 0; - int off = 0; - - while (level < 40) { - int k = nt_level(node, level); - nodetree *n; - int v; - - n = &self->nt[off]; - v = n->children[k]; - - if (v == 0) { - n->children[k] = -rev - 1; - return 0; - } - if (v < 0) { - const char *oldnode = index_node(self, -(v + 1)); - int noff; - - if (!oldnode || !memcmp(oldnode, node, 20)) { - n->children[k] = -rev - 1; - return 0; - } - noff = nt_new(self); - if (noff == -1) - return -1; - /* self->nt may have been changed by realloc */ - self->nt[off].children[k] = noff; - off = noff; - n = &self->nt[off]; - n->children[nt_level(oldnode, ++level)] = v; - if (level > self->ntdepth) - self->ntdepth = level; - self->ntsplits += 1; - } else { - level += 1; - off = v; - } - } - - return -1; -} - -static int nt_init(indexObject *self) -{ - if (self->nt == NULL) { - if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) { - PyErr_SetString(PyExc_ValueError, "overflow in nt_init"); - return -1; - } - self->ntcapacity = self->raw_length < 4 - ? 4 : (int)self->raw_length / 2; - - self->nt = calloc(self->ntcapacity, sizeof(nodetree)); - if (self->nt == NULL) { - PyErr_NoMemory(); - return -1; - } - self->ntlength = 1; - self->ntrev = (int)index_length(self) - 1; - self->ntlookups = 1; - self->ntmisses = 0; - if (nt_insert(self, nullid, INT_MAX) == -1) - return -1; - } - return 0; -} - -/* - * Return values: - * - * -3: error (exception set) - * -2: not found (no exception set) - * rest: valid rev - */ -static int index_find_node(indexObject *self, - const char *node, Py_ssize_t nodelen) -{ - int rev; - - self->ntlookups++; - rev = nt_find(self, node, nodelen, 0); - if (rev >= -1) - return rev; - - if (nt_init(self) == -1) - return -3; - - /* - * For the first handful of lookups, we scan the entire index, - * and cache only the matching nodes. This optimizes for cases - * like "hg tip", where only a few nodes are accessed. - * - * After that, we cache every node we visit, using a single - * scan amortized over multiple lookups. This gives the best - * bulk performance, e.g. for "hg log". - */ - if (self->ntmisses++ < 4) { - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) - return -2; - if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { - if (nt_insert(self, n, rev) == -1) - return -3; - break; - } - } - } else { - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) { - self->ntrev = rev + 1; - return -2; - } - if (nt_insert(self, n, rev) == -1) { - self->ntrev = rev + 1; - return -3; - } - if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { - break; - } - } - self->ntrev = rev; - } - - if (rev >= 0) - return rev; - return -2; -} - -static void raise_revlog_error(void) -{ - PyObject *mod = NULL, *dict = NULL, *errclass = NULL; - - mod = PyImport_ImportModule("mercurial.error"); - if (mod == NULL) { - goto cleanup; - } - - dict = PyModule_GetDict(mod); - if (dict == NULL) { - goto cleanup; - } - Py_INCREF(dict); - - errclass = PyDict_GetItemString(dict, "RevlogError"); - if (errclass == NULL) { - PyErr_SetString(PyExc_SystemError, - "could not find RevlogError"); - goto cleanup; - } - - /* value of exception is ignored by callers */ - PyErr_SetString(errclass, "RevlogError"); - -cleanup: - Py_XDECREF(dict); - Py_XDECREF(mod); -} - -static PyObject *index_getitem(indexObject *self, PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - int rev; - - if (PyInt_Check(value)) - return index_get(self, PyInt_AS_LONG(value)); - - if (node_check(value, &node, &nodelen) == -1) - return NULL; - rev = index_find_node(self, node, nodelen); - if (rev >= -1) - return PyInt_FromLong(rev); - if (rev == -2) - raise_revlog_error(); - return NULL; -} - -static int nt_partialmatch(indexObject *self, const char *node, - Py_ssize_t nodelen) -{ - int rev; - - if (nt_init(self) == -1) - return -3; - - if (self->ntrev > 0) { - /* ensure that the radix tree is fully populated */ - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) - return -2; - if (nt_insert(self, n, rev) == -1) - return -3; - } - self->ntrev = rev; - } - - return nt_find(self, node, nodelen, 1); -} - -static PyObject *index_partialmatch(indexObject *self, PyObject *args) -{ - const char *fullnode; - int nodelen; - char *node; - int rev, i; - - if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) - return NULL; - - if (nodelen < 4) { - PyErr_SetString(PyExc_ValueError, "key too short"); - return NULL; - } - - if (nodelen > 40) { - PyErr_SetString(PyExc_ValueError, "key too long"); - return NULL; - } - - for (i = 0; i < nodelen; i++) - hexdigit(node, i); - if (PyErr_Occurred()) { - /* input contains non-hex characters */ - PyErr_Clear(); - Py_RETURN_NONE; - } - - rev = nt_partialmatch(self, node, nodelen); - - switch (rev) { - case -4: - raise_revlog_error(); - case -3: - return NULL; - case -2: - Py_RETURN_NONE; - case -1: - return PyBytes_FromStringAndSize(nullid, 20); - } - - fullnode = index_node(self, rev); - if (fullnode == NULL) { - PyErr_Format(PyExc_IndexError, - "could not access rev %d", rev); - return NULL; - } - return PyBytes_FromStringAndSize(fullnode, 20); -} - -static PyObject *index_m_get(indexObject *self, PyObject *args) -{ - Py_ssize_t nodelen; - PyObject *val; - char *node; - int rev; - - if (!PyArg_ParseTuple(args, "O", &val)) - return NULL; - if (node_check(val, &node, &nodelen) == -1) - return NULL; - rev = index_find_node(self, node, nodelen); - if (rev == -3) - return NULL; - if (rev == -2) - Py_RETURN_NONE; - return PyInt_FromLong(rev); -} - -static int index_contains(indexObject *self, PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - - if (PyInt_Check(value)) { - long rev = PyInt_AS_LONG(value); - return rev >= -1 && rev < index_length(self); - } - - if (node_check(value, &node, &nodelen) == -1) - return -1; - - switch (index_find_node(self, node, nodelen)) { - case -3: - return -1; - case -2: - return 0; - default: - return 1; - } -} - -typedef uint64_t bitmask; - -/* - * Given a disjoint set of revs, return all candidates for the - * greatest common ancestor. In revset notation, this is the set - * "heads(::a and ::b and ...)" - */ -static PyObject *find_gca_candidates(indexObject *self, const int *revs, - int revcount) -{ - const bitmask allseen = (1ull << revcount) - 1; - const bitmask poison = 1ull << revcount; - PyObject *gca = PyList_New(0); - int i, v, interesting; - int maxrev = -1; - bitmask sp; - bitmask *seen; - - if (gca == NULL) - return PyErr_NoMemory(); - - for (i = 0; i < revcount; i++) { - if (revs[i] > maxrev) - maxrev = revs[i]; - } - - seen = calloc(sizeof(*seen), maxrev + 1); - if (seen == NULL) { - Py_DECREF(gca); - return PyErr_NoMemory(); - } - - for (i = 0; i < revcount; i++) - seen[revs[i]] = 1ull << i; - - interesting = revcount; - - for (v = maxrev; v >= 0 && interesting; v--) { - bitmask sv = seen[v]; - int parents[2]; - - if (!sv) - continue; - - if (sv < poison) { - interesting -= 1; - if (sv == allseen) { - PyObject *obj = PyInt_FromLong(v); - if (obj == NULL) - goto bail; - if (PyList_Append(gca, obj) == -1) { - Py_DECREF(obj); - goto bail; - } - sv |= poison; - for (i = 0; i < revcount; i++) { - if (revs[i] == v) - goto done; - } - } - } - if (index_get_parents(self, v, parents, maxrev) < 0) - goto bail; - - for (i = 0; i < 2; i++) { - int p = parents[i]; - if (p == -1) - continue; - sp = seen[p]; - if (sv < poison) { - if (sp == 0) { - seen[p] = sv; - interesting++; - } - else if (sp != sv) - seen[p] |= sv; - } else { - if (sp && sp < poison) - interesting--; - seen[p] = sv; - } - } - } - -done: - free(seen); - return gca; -bail: - free(seen); - Py_XDECREF(gca); - return NULL; -} - -/* - * Given a disjoint set of revs, return the subset with the longest - * path to the root. - */ -static PyObject *find_deepest(indexObject *self, PyObject *revs) -{ - const Py_ssize_t revcount = PyList_GET_SIZE(revs); - static const Py_ssize_t capacity = 24; - int *depth, *interesting = NULL; - int i, j, v, ninteresting; - PyObject *dict = NULL, *keys = NULL; - long *seen = NULL; - int maxrev = -1; - long final; - - if (revcount > capacity) { - PyErr_Format(PyExc_OverflowError, - "bitset size (%ld) > capacity (%ld)", - (long)revcount, (long)capacity); - return NULL; - } - - for (i = 0; i < revcount; i++) { - int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); - if (n > maxrev) - maxrev = n; - } - - depth = calloc(sizeof(*depth), maxrev + 1); - if (depth == NULL) - return PyErr_NoMemory(); - - seen = calloc(sizeof(*seen), maxrev + 1); - if (seen == NULL) { - PyErr_NoMemory(); - goto bail; - } - - interesting = calloc(sizeof(*interesting), 2 << revcount); - if (interesting == NULL) { - PyErr_NoMemory(); - goto bail; - } - - if (PyList_Sort(revs) == -1) - goto bail; - - for (i = 0; i < revcount; i++) { - int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); - long b = 1l << i; - depth[n] = 1; - seen[n] = b; - interesting[b] = 1; - } - - ninteresting = (int)revcount; - - for (v = maxrev; v >= 0 && ninteresting > 1; v--) { - int dv = depth[v]; - int parents[2]; - long sv; - - if (dv == 0) - continue; - - sv = seen[v]; - if (index_get_parents(self, v, parents, maxrev) < 0) - goto bail; - - for (i = 0; i < 2; i++) { - int p = parents[i]; - long sp; - int dp; - - if (p == -1) - continue; - - dp = depth[p]; - sp = seen[p]; - if (dp <= dv) { - depth[p] = dv + 1; - if (sp != sv) { - interesting[sv] += 1; - seen[p] = sv; - if (sp) { - interesting[sp] -= 1; - if (interesting[sp] == 0) - ninteresting -= 1; - } - } - } - else if (dv == dp - 1) { - long nsp = sp | sv; - if (nsp == sp) - continue; - seen[p] = nsp; - interesting[sp] -= 1; - if (interesting[sp] == 0 && interesting[nsp] > 0) - ninteresting -= 1; - interesting[nsp] += 1; - } - } - interesting[sv] -= 1; - if (interesting[sv] == 0) - ninteresting -= 1; - } - - final = 0; - j = ninteresting; - for (i = 0; i < (int)(2 << revcount) && j > 0; i++) { - if (interesting[i] == 0) - continue; - final |= i; - j -= 1; - } - if (final == 0) { - keys = PyList_New(0); - goto bail; - } - - dict = PyDict_New(); - if (dict == NULL) - goto bail; - - for (i = 0; i < revcount; i++) { - PyObject *key; - - if ((final & (1 << i)) == 0) - continue; - - key = PyList_GET_ITEM(revs, i); - Py_INCREF(key); - Py_INCREF(Py_None); - if (PyDict_SetItem(dict, key, Py_None) == -1) { - Py_DECREF(key); - Py_DECREF(Py_None); - goto bail; - } - } - - keys = PyDict_Keys(dict); - -bail: - free(depth); - free(seen); - free(interesting); - Py_XDECREF(dict); - - return keys; -} - -/* - * Given a (possibly overlapping) set of revs, return all the - * common ancestors heads: heads(::args[0] and ::a[1] and ...) - */ -static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args) -{ - PyObject *ret = NULL; - Py_ssize_t argcount, i, len; - bitmask repeat = 0; - int revcount = 0; - int *revs; - - argcount = PySequence_Length(args); - revs = PyMem_Malloc(argcount * sizeof(*revs)); - if (argcount > 0 && revs == NULL) - return PyErr_NoMemory(); - len = index_length(self) - 1; - - for (i = 0; i < argcount; i++) { - static const int capacity = 24; - PyObject *obj = PySequence_GetItem(args, i); - bitmask x; - long val; - - if (!PyInt_Check(obj)) { - PyErr_SetString(PyExc_TypeError, - "arguments must all be ints"); - Py_DECREF(obj); - goto bail; - } - val = PyInt_AsLong(obj); - Py_DECREF(obj); - if (val == -1) { - ret = PyList_New(0); - goto done; - } - if (val < 0 || val >= len) { - PyErr_SetString(PyExc_IndexError, - "index out of range"); - goto bail; - } - /* this cheesy bloom filter lets us avoid some more - * expensive duplicate checks in the common set-is-disjoint - * case */ - x = 1ull << (val & 0x3f); - if (repeat & x) { - int k; - for (k = 0; k < revcount; k++) { - if (val == revs[k]) - goto duplicate; - } - } - else repeat |= x; - if (revcount >= capacity) { - PyErr_Format(PyExc_OverflowError, - "bitset size (%d) > capacity (%d)", - revcount, capacity); - goto bail; - } - revs[revcount++] = (int)val; - duplicate:; - } - - if (revcount == 0) { - ret = PyList_New(0); - goto done; - } - if (revcount == 1) { - PyObject *obj; - ret = PyList_New(1); - if (ret == NULL) - goto bail; - obj = PyInt_FromLong(revs[0]); - if (obj == NULL) - goto bail; - PyList_SET_ITEM(ret, 0, obj); - goto done; - } - - ret = find_gca_candidates(self, revs, revcount); - if (ret == NULL) - goto bail; - -done: - PyMem_Free(revs); - return ret; - -bail: - PyMem_Free(revs); - Py_XDECREF(ret); - return NULL; -} - -/* - * Given a (possibly overlapping) set of revs, return the greatest - * common ancestors: those with the longest path to the root. - */ -static PyObject *index_ancestors(indexObject *self, PyObject *args) -{ - PyObject *ret; - PyObject *gca = index_commonancestorsheads(self, args); - if (gca == NULL) - return NULL; - - if (PyList_GET_SIZE(gca) <= 1) { - return gca; - } - - ret = find_deepest(self, gca); - Py_DECREF(gca); - return ret; -} - -/* - * Invalidate any trie entries introduced by added revs. - */ -static void nt_invalidate_added(indexObject *self, Py_ssize_t start) -{ - Py_ssize_t i, len = PyList_GET_SIZE(self->added); - - for (i = start; i < len; i++) { - PyObject *tuple = PyList_GET_ITEM(self->added, i); - PyObject *node = PyTuple_GET_ITEM(tuple, 7); - - nt_insert(self, PyBytes_AS_STRING(node), -1); - } - - if (start == 0) - Py_CLEAR(self->added); -} - -/* - * Delete a numeric range of revs, which must be at the end of the - * range, but exclude the sentinel nullid entry. - */ -static int index_slice_del(indexObject *self, PyObject *item) -{ - Py_ssize_t start, stop, step, slicelength; - Py_ssize_t length = index_length(self); - int ret = 0; - -/* Argument changed from PySliceObject* to PyObject* in Python 3. */ -#ifdef IS_PY3K - if (PySlice_GetIndicesEx(item, length, -#else - if (PySlice_GetIndicesEx((PySliceObject*)item, length, -#endif - &start, &stop, &step, &slicelength) < 0) - return -1; - - if (slicelength <= 0) - return 0; - - if ((step < 0 && start < stop) || (step > 0 && start > stop)) - stop = start; - - if (step < 0) { - stop = start + 1; - start = stop + step*(slicelength - 1) - 1; - step = -step; - } - - if (step != 1) { - PyErr_SetString(PyExc_ValueError, - "revlog index delete requires step size of 1"); - return -1; - } - - if (stop != length - 1) { - PyErr_SetString(PyExc_IndexError, - "revlog index deletion indices are invalid"); - return -1; - } - - if (start < self->length - 1) { - if (self->nt) { - Py_ssize_t i; - - for (i = start + 1; i < self->length - 1; i++) { - const char *node = index_node(self, i); - - if (node) - nt_insert(self, node, -1); - } - if (self->added) - nt_invalidate_added(self, 0); - if (self->ntrev > start) - self->ntrev = (int)start; - } - self->length = start + 1; - if (start < self->raw_length) { - if (self->cache) { - Py_ssize_t i; - for (i = start; i < self->raw_length; i++) - Py_CLEAR(self->cache[i]); - } - self->raw_length = start; - } - goto done; - } - - if (self->nt) { - nt_invalidate_added(self, start - self->length + 1); - if (self->ntrev > start) - self->ntrev = (int)start; - } - if (self->added) - ret = PyList_SetSlice(self->added, start - self->length + 1, - PyList_GET_SIZE(self->added), NULL); -done: - Py_CLEAR(self->headrevs); - return ret; -} - -/* - * Supported ops: - * - * slice deletion - * string assignment (extend node->rev mapping) - * string deletion (shrink node->rev mapping) - */ -static int index_assign_subscript(indexObject *self, PyObject *item, - PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - long rev; - - if (PySlice_Check(item) && value == NULL) - return index_slice_del(self, item); - - if (node_check(item, &node, &nodelen) == -1) - return -1; - - if (value == NULL) - return self->nt ? nt_insert(self, node, -1) : 0; - rev = PyInt_AsLong(value); - if (rev > INT_MAX || rev < 0) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_ValueError, "rev out of range"); - return -1; - } - - if (nt_init(self) == -1) - return -1; - return nt_insert(self, node, (int)rev); -} - -/* - * Find all RevlogNG entries in an index that has inline data. Update - * the optional "offsets" table with those entries. - */ -static Py_ssize_t inline_scan(indexObject *self, const char **offsets) -{ - const char *data = (const char *)self->buf.buf; - Py_ssize_t pos = 0; - Py_ssize_t end = self->buf.len; - long incr = v1_hdrsize; - Py_ssize_t len = 0; - - while (pos + v1_hdrsize <= end && pos >= 0) { - uint32_t comp_len; - /* 3rd element of header is length of compressed inline data */ - comp_len = getbe32(data + pos + 8); - incr = v1_hdrsize + comp_len; - if (offsets) - offsets[len] = data + pos; - len++; - pos += incr; - } - - if (pos != end) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_ValueError, "corrupt index file"); - return -1; - } - - return len; -} - -static int index_init(indexObject *self, PyObject *args) -{ - PyObject *data_obj, *inlined_obj; - Py_ssize_t size; - - /* Initialize before argument-checking to avoid index_dealloc() crash. */ - self->raw_length = 0; - self->added = NULL; - self->cache = NULL; - self->data = NULL; - memset(&self->buf, 0, sizeof(self->buf)); - self->headrevs = NULL; - self->filteredrevs = Py_None; - Py_INCREF(Py_None); - self->nt = NULL; - self->offsets = NULL; - - if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) - return -1; - if (!PyObject_CheckBuffer(data_obj)) { - PyErr_SetString(PyExc_TypeError, - "data does not support buffer interface"); - return -1; - } - - if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1) - return -1; - size = self->buf.len; - - self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); - self->data = data_obj; - - self->ntlength = self->ntcapacity = 0; - self->ntdepth = self->ntsplits = 0; - self->ntlookups = self->ntmisses = 0; - self->ntrev = -1; - Py_INCREF(self->data); - - if (self->inlined) { - Py_ssize_t len = inline_scan(self, NULL); - if (len == -1) - goto bail; - self->raw_length = len; - self->length = len + 1; - } else { - if (size % v1_hdrsize) { - PyErr_SetString(PyExc_ValueError, "corrupt index file"); - goto bail; - } - self->raw_length = size / v1_hdrsize; - self->length = self->raw_length + 1; - } - - return 0; -bail: - return -1; -} - -static PyObject *index_nodemap(indexObject *self) -{ - Py_INCREF(self); - return (PyObject *)self; -} - -static void index_dealloc(indexObject *self) -{ - _index_clearcaches(self); - Py_XDECREF(self->filteredrevs); - if (self->buf.buf) { - PyBuffer_Release(&self->buf); - memset(&self->buf, 0, sizeof(self->buf)); - } - Py_XDECREF(self->data); - Py_XDECREF(self->added); - PyObject_Del(self); -} - -static PySequenceMethods index_sequence_methods = { - (lenfunc)index_length, /* sq_length */ - 0, /* sq_concat */ - 0, /* sq_repeat */ - (ssizeargfunc)index_get, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ - (objobjproc)index_contains, /* sq_contains */ -}; - -static PyMappingMethods index_mapping_methods = { - (lenfunc)index_length, /* mp_length */ - (binaryfunc)index_getitem, /* mp_subscript */ - (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ -}; - -static PyMethodDef index_methods[] = { - {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS, - "return the gca set of the given revs"}, - {"commonancestorsheads", (PyCFunction)index_commonancestorsheads, - METH_VARARGS, - "return the heads of the common ancestors of the given revs"}, - {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, - "clear the index caches"}, - {"get", (PyCFunction)index_m_get, METH_VARARGS, - "get an index entry"}, - {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, - METH_VARARGS, "compute phases"}, - {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS, - "reachableroots"}, - {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS, - "get head revisions"}, /* Can do filtering since 3.2 */ - {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, - "get filtered head revisions"}, /* Can always do filtering */ - {"insert", (PyCFunction)index_insert, METH_VARARGS, - "insert an index entry"}, - {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, - "match a potentially ambiguous node ID"}, - {"stats", (PyCFunction)index_stats, METH_NOARGS, - "stats for the index"}, - {NULL} /* Sentinel */ -}; - -static PyGetSetDef index_getset[] = { - {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject indexType = { - PyVarObject_HEAD_INIT(NULL, 0) - "parsers.index", /* tp_name */ - sizeof(indexObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)index_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - &index_sequence_methods, /* tp_as_sequence */ - &index_mapping_methods, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - "revlog index", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - index_methods, /* tp_methods */ - 0, /* tp_members */ - index_getset, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)index_init, /* tp_init */ - 0, /* tp_alloc */ -}; - -/* - * returns a tuple of the form (index, index, cache) with elements as - * follows: - * - * index: an index object that lazily parses RevlogNG records - * cache: if data is inlined, a tuple (0, index_file_content), else None - * index_file_content could be a string, or a buffer - * - * added complications are for backwards compatibility - */ -static PyObject *parse_index2(PyObject *self, PyObject *args) -{ - PyObject *tuple = NULL, *cache = NULL; - indexObject *idx; - int ret; - - idx = PyObject_New(indexObject, &indexType); - if (idx == NULL) - goto bail; - - ret = index_init(idx, args); - if (ret == -1) - goto bail; - - if (idx->inlined) { - cache = Py_BuildValue("iO", 0, idx->data); - if (cache == NULL) - goto bail; - } else { - cache = Py_None; - Py_INCREF(cache); - } - - tuple = Py_BuildValue("NN", idx, cache); - if (!tuple) - goto bail; - return tuple; - -bail: - Py_XDECREF(idx); - Py_XDECREF(cache); - Py_XDECREF(tuple); - return NULL; -} - -#define BUMPED_FIX 1 -#define USING_SHA_256 2 -#define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1) - -static PyObject *readshas( - const char *source, unsigned char num, Py_ssize_t hashwidth) -{ - int i; - PyObject *list = PyTuple_New(num); - if (list == NULL) { - return NULL; - } - for (i = 0; i < num; i++) { - PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth); - if (hash == NULL) { - Py_DECREF(list); - return NULL; - } - PyTuple_SET_ITEM(list, i, hash); - source += hashwidth; - } - return list; -} - -static PyObject *fm1readmarker(const char *databegin, const char *dataend, - uint32_t *msize) -{ - const char *data = databegin; - const char *meta; - - double mtime; - int16_t tz; - uint16_t flags; - unsigned char nsuccs, nparents, nmetadata; - Py_ssize_t hashwidth = 20; - - PyObject *prec = NULL, *parents = NULL, *succs = NULL; - PyObject *metadata = NULL, *ret = NULL; - int i; - - if (data + FM1_HEADER_SIZE > dataend) { - goto overflow; - } - - *msize = getbe32(data); - data += 4; - mtime = getbefloat64(data); - data += 8; - tz = getbeint16(data); - data += 2; - flags = getbeuint16(data); - data += 2; - - if (flags & USING_SHA_256) { - hashwidth = 32; - } - - nsuccs = (unsigned char)(*data++); - nparents = (unsigned char)(*data++); - nmetadata = (unsigned char)(*data++); - - if (databegin + *msize > dataend) { - goto overflow; - } - dataend = databegin + *msize; /* narrow down to marker size */ - - if (data + hashwidth > dataend) { - goto overflow; - } - prec = PyBytes_FromStringAndSize(data, hashwidth); - data += hashwidth; - if (prec == NULL) { - goto bail; - } - - if (data + nsuccs * hashwidth > dataend) { - goto overflow; - } - succs = readshas(data, nsuccs, hashwidth); - if (succs == NULL) { - goto bail; - } - data += nsuccs * hashwidth; - - if (nparents == 1 || nparents == 2) { - if (data + nparents * hashwidth > dataend) { - goto overflow; - } - parents = readshas(data, nparents, hashwidth); - if (parents == NULL) { - goto bail; - } - data += nparents * hashwidth; - } else { - parents = Py_None; - Py_INCREF(parents); - } - - if (data + 2 * nmetadata > dataend) { - goto overflow; - } - meta = data + (2 * nmetadata); - metadata = PyTuple_New(nmetadata); - if (metadata == NULL) { - goto bail; - } - for (i = 0; i < nmetadata; i++) { - PyObject *tmp, *left = NULL, *right = NULL; - Py_ssize_t leftsize = (unsigned char)(*data++); - Py_ssize_t rightsize = (unsigned char)(*data++); - if (meta + leftsize + rightsize > dataend) { - goto overflow; - } - left = PyBytes_FromStringAndSize(meta, leftsize); - meta += leftsize; - right = PyBytes_FromStringAndSize(meta, rightsize); - meta += rightsize; - tmp = PyTuple_New(2); - if (!left || !right || !tmp) { - Py_XDECREF(left); - Py_XDECREF(right); - Py_XDECREF(tmp); - goto bail; - } - PyTuple_SET_ITEM(tmp, 0, left); - PyTuple_SET_ITEM(tmp, 1, right); - PyTuple_SET_ITEM(metadata, i, tmp); - } - ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, - metadata, mtime, (int)tz * 60, parents); - goto bail; /* return successfully */ - -overflow: - PyErr_SetString(PyExc_ValueError, "overflow in obsstore"); -bail: - Py_XDECREF(prec); - Py_XDECREF(succs); - Py_XDECREF(metadata); - Py_XDECREF(parents); - return ret; -} - - -static PyObject *fm1readmarkers(PyObject *self, PyObject *args) { - const char *data, *dataend; - int datalen; - Py_ssize_t offset, stop; - PyObject *markers = NULL; - - if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) { - return NULL; - } - dataend = data + datalen; - data += offset; - markers = PyList_New(0); - if (!markers) { - return NULL; - } - while (offset < stop) { - uint32_t msize; - int error; - PyObject *record = fm1readmarker(data, dataend, &msize); - if (!record) { - goto bail; - } - error = PyList_Append(markers, record); - Py_DECREF(record); - if (error) { - goto bail; - } - data += msize; - offset += msize; - } - return markers; -bail: - Py_DECREF(markers); - return NULL; -} - -static char parsers_doc[] = "Efficient content parsing."; - -PyObject *encodedir(PyObject *self, PyObject *args); -PyObject *pathencode(PyObject *self, PyObject *args); -PyObject *lowerencode(PyObject *self, PyObject *args); - -static PyMethodDef methods[] = { - {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"}, - {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS, - "create a set containing non-normal and other parent entries of given " - "dirstate\n"}, - {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, - {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, - {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"}, - {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"}, - {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"}, - {"dict_new_presized", dict_new_presized, METH_VARARGS, - "construct a dict with an expected size\n"}, - {"make_file_foldmap", make_file_foldmap, METH_VARARGS, - "make file foldmap\n"}, - {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"}, - {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"}, - {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"}, - {"fm1readmarkers", fm1readmarkers, METH_VARARGS, - "parse v1 obsolete markers\n"}, - {NULL, NULL} -}; - -void dirs_module_init(PyObject *mod); -void manifest_module_init(PyObject *mod); - -static const int version = 1; - -static void module_init(PyObject *mod) -{ - PyModule_AddIntConstant(mod, "version", version); - - /* This module constant has two purposes. First, it lets us unit test - * the ImportError raised without hard-coding any error text. This - * means we can change the text in the future without breaking tests, - * even across changesets without a recompile. Second, its presence - * can be used to determine whether the version-checking logic is - * present, which also helps in testing across changesets without a - * recompile. Note that this means the pure-Python version of parsers - * should not have this module constant. */ - PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext); - - dirs_module_init(mod); - manifest_module_init(mod); - - indexType.tp_new = PyType_GenericNew; - if (PyType_Ready(&indexType) < 0 || - PyType_Ready(&dirstateTupleType) < 0) - return; - Py_INCREF(&indexType); - PyModule_AddObject(mod, "index", (PyObject *)&indexType); - Py_INCREF(&dirstateTupleType); - PyModule_AddObject(mod, "dirstatetuple", - (PyObject *)&dirstateTupleType); - - nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0, - -1, -1, -1, -1, nullid, 20); - if (nullentry) - PyObject_GC_UnTrack(nullentry); -} - -static int check_python_version(void) -{ - PyObject *sys = PyImport_ImportModule("sys"), *ver; - long hexversion; - if (!sys) - return -1; - ver = PyObject_GetAttrString(sys, "hexversion"); - Py_DECREF(sys); - if (!ver) - return -1; - hexversion = PyInt_AsLong(ver); - Py_DECREF(ver); - /* sys.hexversion is a 32-bit number by default, so the -1 case - * should only occur in unusual circumstances (e.g. if sys.hexversion - * is manually set to an invalid value). */ - if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) { - PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension " - "modules were compiled with Python " PY_VERSION ", but " - "Mercurial is currently using Python with sys.hexversion=%ld: " - "Python %s\n at: %s", versionerrortext, hexversion, - Py_GetVersion(), Py_GetProgramFullPath()); - return -1; - } - return 0; -} - -#ifdef IS_PY3K -static struct PyModuleDef parsers_module = { - PyModuleDef_HEAD_INIT, - "parsers", - parsers_doc, - -1, - methods -}; - -PyMODINIT_FUNC PyInit_parsers(void) -{ - PyObject *mod; - - if (check_python_version() == -1) - return NULL; - mod = PyModule_Create(&parsers_module); - module_init(mod); - return mod; -} -#else -PyMODINIT_FUNC initparsers(void) -{ - PyObject *mod; - - if (check_python_version() == -1) - return; - mod = Py_InitModule3("parsers", methods, parsers_doc); - module_init(mod); -} -#endif
--- a/mercurial/pathencode.c Sat Aug 13 12:18:58 2016 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,765 +0,0 @@ -/* - pathencode.c - efficient path name encoding - - Copyright 2012 Facebook - - This software may be used and distributed according to the terms of - the GNU General Public License, incorporated herein by reference. -*/ - -/* - * An implementation of the name encoding scheme used by the fncache - * store. The common case is of a path < 120 bytes long, which is - * handled either in a single pass with no allocations or two passes - * with a single allocation. For longer paths, multiple passes are - * required. - */ - -#define PY_SSIZE_T_CLEAN -#include <Python.h> -#include <assert.h> -#include <ctype.h> -#include <stdlib.h> -#include <string.h> - -#include "util.h" - -/* state machine for the fast path */ -enum path_state { - START, /* first byte of a path component */ - A, /* "AUX" */ - AU, - THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */ - C, /* "CON" or "COMn" */ - CO, - COMLPT, /* "COM" or "LPT" */ - COMLPTn, - L, - LP, - N, - NU, - P, /* "PRN" */ - PR, - LDOT, /* leading '.' */ - DOT, /* '.' in a non-leading position */ - H, /* ".h" */ - HGDI, /* ".hg", ".d", or ".i" */ - SPACE, - DEFAULT /* byte of a path component after the first */ -}; - -/* state machine for dir-encoding */ -enum dir_state { - DDOT, - DH, - DHGDI, - DDEFAULT -}; - -static inline int inset(const uint32_t bitset[], char c) -{ - return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31)); -} - -static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize, - char c) -{ - if (dest) { - assert(*destlen < destsize); - dest[*destlen] = c; - } - (*destlen)++; -} - -static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize, - const void *src, Py_ssize_t len) -{ - if (dest) { - assert(*destlen + len < destsize); - memcpy((void *)&dest[*destlen], src, len); - } - *destlen += len; -} - -static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize, - uint8_t c) -{ - static const char hexdigit[] = "0123456789abcdef"; - - charcopy(dest, destlen, destsize, hexdigit[c >> 4]); - charcopy(dest, destlen, destsize, hexdigit[c & 15]); -} - -/* 3-byte escape: tilde followed by two hex digits */ -static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize, - char c) -{ - charcopy(dest, destlen, destsize, '~'); - hexencode(dest, destlen, destsize, c); -} - -static Py_ssize_t _encodedir(char *dest, size_t destsize, - const char *src, Py_ssize_t len) -{ - enum dir_state state = DDEFAULT; - Py_ssize_t i = 0, destlen = 0; - - while (i < len) { - switch (state) { - case DDOT: - switch (src[i]) { - case 'd': - case 'i': - state = DHGDI; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'h': - state = DH; - charcopy(dest, &destlen, destsize, src[i++]); - break; - default: - state = DDEFAULT; - break; - } - break; - case DH: - if (src[i] == 'g') { - state = DHGDI; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DDEFAULT; - break; - case DHGDI: - if (src[i] == '/') { - memcopy(dest, &destlen, destsize, ".hg", 3); - charcopy(dest, &destlen, destsize, src[i++]); - } - state = DDEFAULT; - break; - case DDEFAULT: - if (src[i] == '.') - state = DDOT; - charcopy(dest, &destlen, destsize, src[i++]); - break; - } - } - - return destlen; -} - -PyObject *encodedir(PyObject *self, PyObject *args) -{ - Py_ssize_t len, newlen; - PyObject *pathobj, *newobj; - char *path; - - if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) - return NULL; - - if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { - PyErr_SetString(PyExc_TypeError, "expected a string"); - return NULL; - } - - newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1; - - if (newlen == len + 1) { - Py_INCREF(pathobj); - return pathobj; - } - - newobj = PyBytes_FromStringAndSize(NULL, newlen); - - if (newobj) { - assert(PyBytes_Check(newobj)); - Py_SIZE(newobj)--; - _encodedir(PyBytes_AS_STRING(newobj), newlen, path, - len + 1); - } - - return newobj; -} - -static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8], - char *dest, Py_ssize_t destlen, size_t destsize, - const char *src, Py_ssize_t len, - int encodedir) -{ - enum path_state state = START; - Py_ssize_t i = 0; - - /* - * Python strings end with a zero byte, which we use as a - * terminal token as they are not valid inside path names. - */ - - while (i < len) { - switch (state) { - case START: - switch (src[i]) { - case '/': - charcopy(dest, &destlen, destsize, src[i++]); - break; - case '.': - state = LDOT; - escape3(dest, &destlen, destsize, src[i++]); - break; - case ' ': - state = DEFAULT; - escape3(dest, &destlen, destsize, src[i++]); - break; - case 'a': - state = A; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'c': - state = C; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'l': - state = L; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'n': - state = N; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'p': - state = P; - charcopy(dest, &destlen, destsize, src[i++]); - break; - default: - state = DEFAULT; - break; - } - break; - case A: - if (src[i] == 'u') { - state = AU; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case AU: - if (src[i] == 'x') { - state = THIRD; - i++; - } - else state = DEFAULT; - break; - case THIRD: - state = DEFAULT; - switch (src[i]) { - case '.': - case '/': - case '\0': - escape3(dest, &destlen, destsize, src[i - 1]); - break; - default: - i--; - break; - } - break; - case C: - if (src[i] == 'o') { - state = CO; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case CO: - if (src[i] == 'm') { - state = COMLPT; - i++; - } - else if (src[i] == 'n') { - state = THIRD; - i++; - } - else state = DEFAULT; - break; - case COMLPT: - switch (src[i]) { - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - state = COMLPTn; - i++; - break; - default: - state = DEFAULT; - charcopy(dest, &destlen, destsize, src[i - 1]); - break; - } - break; - case COMLPTn: - state = DEFAULT; - switch (src[i]) { - case '.': - case '/': - case '\0': - escape3(dest, &destlen, destsize, src[i - 2]); - charcopy(dest, &destlen, destsize, src[i - 1]); - break; - default: - memcopy(dest, &destlen, destsize, - &src[i - 2], 2); - break; - } - break; - case L: - if (src[i] == 'p') { - state = LP; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case LP: - if (src[i] == 't') { - state = COMLPT; - i++; - } - else state = DEFAULT; - break; - case N: - if (src[i] == 'u') { - state = NU; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case NU: - if (src[i] == 'l') { - state = THIRD; - i++; - } - else state = DEFAULT; - break; - case P: - if (src[i] == 'r') { - state = PR; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case PR: - if (src[i] == 'n') { - state = THIRD; - i++; - } - else state = DEFAULT; - break; - case LDOT: - switch (src[i]) { - case 'd': - case 'i': - state = HGDI; - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'h': - state = H; - charcopy(dest, &destlen, destsize, src[i++]); - break; - default: - state = DEFAULT; - break; - } - break; - case DOT: - switch (src[i]) { - case '/': - case '\0': - state = START; - memcopy(dest, &destlen, destsize, "~2e", 3); - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'd': - case 'i': - state = HGDI; - charcopy(dest, &destlen, destsize, '.'); - charcopy(dest, &destlen, destsize, src[i++]); - break; - case 'h': - state = H; - memcopy(dest, &destlen, destsize, ".h", 2); - i++; - break; - default: - state = DEFAULT; - charcopy(dest, &destlen, destsize, '.'); - break; - } - break; - case H: - if (src[i] == 'g') { - state = HGDI; - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case HGDI: - if (src[i] == '/') { - state = START; - if (encodedir) - memcopy(dest, &destlen, destsize, ".hg", - 3); - charcopy(dest, &destlen, destsize, src[i++]); - } - else state = DEFAULT; - break; - case SPACE: - switch (src[i]) { - case '/': - case '\0': - state = START; - memcopy(dest, &destlen, destsize, "~20", 3); - charcopy(dest, &destlen, destsize, src[i++]); - break; - default: - state = DEFAULT; - charcopy(dest, &destlen, destsize, ' '); - break; - } - break; - case DEFAULT: - while (inset(onebyte, src[i])) { - charcopy(dest, &destlen, destsize, src[i++]); - if (i == len) - goto done; - } - switch (src[i]) { - case '.': - state = DOT; - i++; - break; - case ' ': - state = SPACE; - i++; - break; - case '/': - state = START; - charcopy(dest, &destlen, destsize, '/'); - i++; - break; - default: - if (inset(onebyte, src[i])) { - do { - charcopy(dest, &destlen, - destsize, src[i++]); - } while (i < len && - inset(onebyte, src[i])); - } - else if (inset(twobytes, src[i])) { - char c = src[i++]; - charcopy(dest, &destlen, destsize, '_'); - charcopy(dest, &destlen, destsize, - c == '_' ? '_' : c + 32); - } - else - escape3(dest, &destlen, destsize, - src[i++]); - break; - } - break; - } - } -done: - return destlen; -} - -static Py_ssize_t basicencode(char *dest, size_t destsize, - const char *src, Py_ssize_t len) -{ - static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe }; - - static const uint32_t onebyte[8] = { - 1, 0x2bff3bfa, 0x68000001, 0x2fffffff, - }; - - Py_ssize_t destlen = 0; - - return _encode(twobytes, onebyte, dest, destlen, destsize, - src, len, 1); -} - -static const Py_ssize_t maxstorepathlen = 120; - -static Py_ssize_t _lowerencode(char *dest, size_t destsize, - const char *src, Py_ssize_t len) -{ - static const uint32_t onebyte[8] = { - 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff - }; - - static const uint32_t lower[8] = { 0, 0, 0x7fffffe }; - - Py_ssize_t i, destlen = 0; - - for (i = 0; i < len; i++) { - if (inset(onebyte, src[i])) - charcopy(dest, &destlen, destsize, src[i]); - else if (inset(lower, src[i])) - charcopy(dest, &destlen, destsize, src[i] + 32); - else - escape3(dest, &destlen, destsize, src[i]); - } - - return destlen; -} - -PyObject *lowerencode(PyObject *self, PyObject *args) -{ - char *path; - Py_ssize_t len, newlen; - PyObject *ret; - - if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len)) - return NULL; - - newlen = _lowerencode(NULL, 0, path, len); - ret = PyBytes_FromStringAndSize(NULL, newlen); - if (ret) - _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len); - - return ret; -} - -/* See store.py:_auxencode for a description. */ -static Py_ssize_t auxencode(char *dest, size_t destsize, - const char *src, Py_ssize_t len) -{ - static const uint32_t twobytes[8]; - - static const uint32_t onebyte[8] = { - ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, - }; - - return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0); -} - -static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20]) -{ - static const Py_ssize_t dirprefixlen = 8; - static const Py_ssize_t maxshortdirslen = 68; - char *dest; - PyObject *ret; - - Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1; - Py_ssize_t destsize, destlen = 0, slop, used; - - while (lastslash >= 0 && src[lastslash] != '/') { - if (src[lastslash] == '.' && lastdot == -1) - lastdot = lastslash; - lastslash--; - } - -#if 0 - /* All paths should end in a suffix of ".i" or ".d". - Unfortunately, the file names in test-hybridencode.py - violate this rule. */ - if (lastdot != len - 3) { - PyErr_SetString(PyExc_ValueError, - "suffix missing or wrong length"); - return NULL; - } -#endif - - /* If src contains a suffix, we will append it to the end of - the new string, so make room. */ - destsize = 120; - if (lastdot >= 0) - destsize += len - lastdot - 1; - - ret = PyBytes_FromStringAndSize(NULL, destsize); - if (ret == NULL) - return NULL; - - dest = PyBytes_AS_STRING(ret); - memcopy(dest, &destlen, destsize, "dh/", 3); - - /* Copy up to dirprefixlen bytes of each path component, up to - a limit of maxshortdirslen bytes. */ - for (i = d = p = 0; i < lastslash; i++, p++) { - if (src[i] == '/') { - char d = dest[destlen - 1]; - /* After truncation, a directory name may end - in a space or dot, which are unportable. */ - if (d == '.' || d == ' ') - dest[destlen - 1] = '_'; - /* The + 3 is to account for "dh/" in the beginning */ - if (destlen > maxshortdirslen + 3) - break; - charcopy(dest, &destlen, destsize, src[i]); - p = -1; - } - else if (p < dirprefixlen) - charcopy(dest, &destlen, destsize, src[i]); - } - - /* Rewind to just before the last slash copied. */ - if (destlen > maxshortdirslen + 3) - do { - destlen--; - } while (destlen > 0 && dest[destlen] != '/'); - - if (destlen > 3) { - if (lastslash > 0) { - char d = dest[destlen - 1]; - /* The last directory component may be - truncated, so make it safe. */ - if (d == '.' || d == ' ') - dest[destlen - 1] = '_'; - } - - charcopy(dest, &destlen, destsize, '/'); - } - - /* Add a prefix of the original file's name. Its length - depends on the number of bytes left after accounting for - hash and suffix. */ - used = destlen + 40; - if (lastdot >= 0) - used += len - lastdot - 1; - slop = maxstorepathlen - used; - if (slop > 0) { - Py_ssize_t basenamelen = - lastslash >= 0 ? len - lastslash - 2 : len - 1; - - if (basenamelen > slop) - basenamelen = slop; - if (basenamelen > 0) - memcopy(dest, &destlen, destsize, &src[lastslash + 1], - basenamelen); - } - - /* Add hash and suffix. */ - for (i = 0; i < 20; i++) - hexencode(dest, &destlen, destsize, sha[i]); - - if (lastdot >= 0) - memcopy(dest, &destlen, destsize, &src[lastdot], - len - lastdot - 1); - - assert(PyBytes_Check(ret)); - Py_SIZE(ret) = destlen; - - return ret; -} - -/* - * Avoiding a trip through Python would improve performance by 50%, - * but we don't encounter enough long names to be worth the code. - */ -static int sha1hash(char hash[20], const char *str, Py_ssize_t len) -{ - static PyObject *shafunc; - PyObject *shaobj, *hashobj; - - if (shafunc == NULL) { - PyObject *hashlib, *name = PyBytes_FromString("hashlib"); - - if (name == NULL) - return -1; - - hashlib = PyImport_Import(name); - Py_DECREF(name); - - if (hashlib == NULL) { - PyErr_SetString(PyExc_ImportError, "hashlib"); - return -1; - } - shafunc = PyObject_GetAttrString(hashlib, "sha1"); - Py_DECREF(hashlib); - - if (shafunc == NULL) { - PyErr_SetString(PyExc_AttributeError, - "module 'hashlib' has no " - "attribute 'sha1'"); - return -1; - } - } - - shaobj = PyObject_CallFunction(shafunc, "s#", str, len); - - if (shaobj == NULL) - return -1; - - hashobj = PyObject_CallMethod(shaobj, "digest", ""); - Py_DECREF(shaobj); - if (hashobj == NULL) - return -1; - - if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) { - PyErr_SetString(PyExc_TypeError, - "result of digest is not a 20-byte hash"); - Py_DECREF(hashobj); - return -1; - } - - memcpy(hash, PyBytes_AS_STRING(hashobj), 20); - Py_DECREF(hashobj); - return 0; -} - -#define MAXENCODE 4096 * 4 - -static PyObject *hashencode(const char *src, Py_ssize_t len) -{ - char dired[MAXENCODE]; - char lowered[MAXENCODE]; - char auxed[MAXENCODE]; - Py_ssize_t dirlen, lowerlen, auxlen, baselen; - char sha[20]; - - baselen = (len - 5) * 3; - if (baselen >= MAXENCODE) { - PyErr_SetString(PyExc_ValueError, "string too long"); - return NULL; - } - - dirlen = _encodedir(dired, baselen, src, len); - if (sha1hash(sha, dired, dirlen - 1) == -1) - return NULL; - lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5); - auxlen = auxencode(auxed, baselen, lowered, lowerlen); - return hashmangle(auxed, auxlen, sha); -} - -PyObject *pathencode(PyObject *self, PyObject *args) -{ - Py_ssize_t len, newlen; - PyObject *pathobj, *newobj; - char *path; - - if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) - return NULL; - - if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) { - PyErr_SetString(PyExc_TypeError, "expected a string"); - return NULL; - } - - if (len > maxstorepathlen) - newlen = maxstorepathlen + 2; - else - newlen = len ? basicencode(NULL, 0, path, len + 1) : 1; - - if (newlen <= maxstorepathlen + 1) { - if (newlen == len + 1) { - Py_INCREF(pathobj); - return pathobj; - } - - newobj = PyBytes_FromStringAndSize(NULL, newlen); - - if (newobj) { - assert(PyBytes_Check(newobj)); - Py_SIZE(newobj)--; - basicencode(PyBytes_AS_STRING(newobj), newlen, path, - len + 1); - } - } - else - newobj = hashencode(path, len + 1); - - return newobj; -}
--- a/mercurial/pure/parsers.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/pure/parsers.py Sat Aug 13 12:23:56 2016 +0900 @@ -10,8 +10,8 @@ import struct import zlib -from .node import nullid -from . import pycompat +from ..node import nullid +from .. import pycompat stringio = pycompat.stringio
--- a/mercurial/revlog.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/revlog.py Sat Aug 13 12:23:56 2016 +0900 @@ -32,12 +32,14 @@ ancestor, error, mdiff, - parsers, + policy, pycompat, templatefilters, util, ) +parsers = policy.importmod(r'parsers') + _pack = struct.pack _unpack = struct.unpack # Aliased for performance.
--- a/mercurial/store.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/store.py Sat Aug 13 12:23:56 2016 +0900 @@ -15,12 +15,14 @@ from .i18n import _ from . import ( error, - parsers, + policy, pycompat, util, vfs as vfsmod, ) +parsers = policy.importmod(r'parsers') + # This avoids a collision between a file named foo and a dir named # foo.i or foo.d def _encodedir(path):
--- a/mercurial/util.py Sat Aug 13 12:18:58 2016 +0900 +++ b/mercurial/util.py Sat Aug 13 12:23:56 2016 +0900 @@ -45,13 +45,13 @@ encoding, error, i18n, - parsers, policy, pycompat, ) base85 = policy.importmod(r'base85') osutil = policy.importmod(r'osutil') +parsers = policy.importmod(r'parsers') b85decode = base85.b85decode b85encode = base85.b85encode
--- a/setup.py Sat Aug 13 12:18:58 2016 +0900 +++ b/setup.py Sat Aug 13 12:23:56 2016 +0900 @@ -635,10 +635,10 @@ 'mercurial/cext/mpatch.c'], include_dirs=common_include_dirs, depends=common_depends), - Extension('mercurial.parsers', ['mercurial/dirs.c', - 'mercurial/manifest.c', - 'mercurial/parsers.c', - 'mercurial/pathencode.c'], + Extension('mercurial.cext.parsers', ['mercurial/cext/dirs.c', + 'mercurial/cext/manifest.c', + 'mercurial/cext/parsers.c', + 'mercurial/cext/pathencode.c'], include_dirs=common_include_dirs, depends=common_depends), Extension('mercurial.cext.osutil', ['mercurial/cext/osutil.c'],
--- a/tests/fakedirstatewritetime.py Sat Aug 13 12:18:58 2016 +0900 +++ b/tests/fakedirstatewritetime.py Sat Aug 13 12:23:56 2016 +0900 @@ -11,10 +11,12 @@ context, dirstate, extensions, - parsers, + policy, util, ) +parsers = policy.importmod(r'parsers') + def pack_dirstate(fakenow, orig, dmap, copymap, pl, now): # execute what original parsers.pack_dirstate should do actually # for consistency
--- a/tests/test-parseindex2.py Sat Aug 13 12:18:58 2016 +0900 +++ b/tests/test-parseindex2.py Sat Aug 13 12:23:56 2016 +0900 @@ -14,9 +14,11 @@ nullrev, ) from mercurial import ( - parsers, + policy, ) +parsers = policy.importmod(r'parsers') + # original python implementation def gettype(q): return int(q & 0xFFFF) @@ -114,7 +116,7 @@ # of the currently-running Python interpreter, so we monkey-patch # sys.hexversion to simulate using different versions. code = ("import sys; sys.hexversion=%s; " - "import mercurial.parsers" % hexversion) + "import mercurial.cext.parsers" % hexversion) cmd = "python -c \"%s\"" % code # We need to do these tests inside a subprocess because parser.c's # version-checking code happens inside the module init function, and