# HG changeset patch # User Bryan O'Sullivan # Date 1365631708 25200 # Node ID 8c0a7eeda06d2773ec92b14527280db3e0167588 # Parent 66d3aebe2d95a4380e6eac434bedc7de09426627 dirs: use mutable strings internally perfdirs results for a working dir with 170,000 files: Python 638 msec C 244 C+int 192 C+int+str 168 In the large repo above, the nearly 0.5 second time improvement is visible in commands like "hg add" and "hg update". hg add Python 1100 msec C+int+str 600 hg update (with nothing to do) Python 2800 msec C+int+str 2240 diff -r 66d3aebe2d95 -r 8c0a7eeda06d mercurial/dirs.c --- a/mercurial/dirs.c Wed Apr 10 15:08:27 2013 -0700 +++ b/mercurial/dirs.c Wed Apr 10 15:08:28 2013 -0700 @@ -19,6 +19,9 @@ * * We modify Python integers for refcounting, but those integers are * never visible to Python code. + * + * We mutate strings in-place, but leave them immutable once they can + * be seen by Python code. */ typedef struct { PyObject_HEAD @@ -40,6 +43,7 @@ static int _addpath(PyObject *dirs, PyObject *path) { + const char *cpath = PyString_AS_STRING(path); Py_ssize_t pos = PyString_GET_SIZE(path); PyObject *key = NULL; int ret = -1; @@ -47,15 +51,24 @@ while ((pos = _finddir(path, pos - 1)) != -1) { PyObject *val; - key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos); - - if (key == NULL) - goto bail; + /* It's likely that every prefix already has an entry + in our dict. Try to avoid allocating and + deallocating a string for each prefix we check. */ + if (key != NULL) + ((PyStringObject *)key)->ob_shash = -1; + else { + /* Force Python to not reuse a small shared string. */ + key = PyString_FromStringAndSize(cpath, + pos < 2 ? 2 : pos); + if (key == NULL) + goto bail; + } + PyString_GET_SIZE(key) = pos; + PyString_AS_STRING(key)[pos] = '\0'; val = PyDict_GetItem(dirs, key); if (val != NULL) { PyInt_AS_LONG(val) += 1; - Py_CLEAR(key); continue; }