# HG changeset patch # User Siddharth Agarwal # Date 1427443386 25200 # Node ID 1a9efc312700bb3657a610d6bd1170c2af0b6c76 # Parent 914caae9a86a7b8a2409162674d06e3921c39e08 dirs.addpath: rework algorithm to search forward This improves performance because it uses strchr rather than a loop. For LLVM/clang version "Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)" on OS X, for a repo with over 200,000 files, this improves perfdirs from 0.248 seconds to 0.230 (7.3%) For gcc 4.4.6 on Linux, for a test repo with over 500,000 files, this improves perfdirs from 0.704 seconds to 0.658 (6.5%). diff -r 914caae9a86a -r 1a9efc312700 mercurial/dirs.c --- a/mercurial/dirs.c Sat Mar 14 17:40:47 2015 +0900 +++ b/mercurial/dirs.c Fri Mar 27 01:03:06 2015 -0700 @@ -9,6 +9,7 @@ #define PY_SSIZE_T_CLEAN #include +#include #include "util.h" /* @@ -32,23 +33,19 @@ { const char *s = PyString_AS_STRING(path); - while (pos != -1) { - if (s[pos] == '/') - break; - pos -= 1; - } - - return pos; + const char *ret = strchr(s + pos, '/'); + return (ret != NULL) ? (ret - s) : -1; } static int _addpath(PyObject *dirs, PyObject *path) { - const char *cpath = PyString_AS_STRING(path); - Py_ssize_t pos = PyString_GET_SIZE(path); + char *cpath = PyString_AS_STRING(path); + Py_ssize_t len = PyString_GET_SIZE(path); + Py_ssize_t pos = -1; PyObject *key = NULL; int ret = -1; - while ((pos = _finddir(path, pos - 1)) != -1) { + while ((pos = _finddir(path, pos + 1)) != -1) { PyObject *val; /* It's likely that every prefix already has an entry @@ -56,10 +53,18 @@ deallocating a string for each prefix we check. */ if (key != NULL) ((PyStringObject *)key)->ob_shash = -1; - else { - /* Force Python to not reuse a small shared string. */ - key = PyString_FromStringAndSize(cpath, - pos < 2 ? 2 : pos); + else if (pos != 0) { + /* pos >= 1, which means that len >= 2. This is + guaranteed to produce a non-interned string. */ + key = PyString_FromStringAndSize(cpath, len); + if (key == NULL) + goto bail; + } else { + /* pos == 0, which means we need to increment the dir + count for the empty string. We need to make sure we + don't muck around with interned strings, so throw it + away later. */ + key = PyString_FromString(""); if (key == NULL) goto bail; } @@ -69,6 +74,10 @@ val = PyDict_GetItem(dirs, key); if (val != NULL) { PyInt_AS_LONG(val) += 1; + if (pos != 0) + PyString_AS_STRING(key)[pos] = '/'; + else + key = NULL; continue; } @@ -83,6 +92,11 @@ Py_DECREF(val); if (ret == -1) goto bail; + + if (pos != 0) + PyString_AS_STRING(key)[pos] = '/'; + else + key = NULL; Py_CLEAR(key); } ret = 0; @@ -95,11 +109,11 @@ static int _delpath(PyObject *dirs, PyObject *path) { - Py_ssize_t pos = PyString_GET_SIZE(path); + Py_ssize_t pos = -1; PyObject *key = NULL; int ret = -1; - while ((pos = _finddir(path, pos - 1)) != -1) { + while ((pos = _finddir(path, pos + 1)) != -1) { PyObject *val; key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);