changeset 18902:8c0a7eeda06d

dirs: use mutable strings internally perfdirs results for a working dir with 170,000 files: Python 638 msec C 244 C+int 192 C+int+str 168 In the large repo above, the nearly 0.5 second time improvement is visible in commands like "hg add" and "hg update". hg add Python 1100 msec C+int+str 600 hg update (with nothing to do) Python 2800 msec C+int+str 2240
author Bryan O'Sullivan <bryano@fb.com>
date Wed, 10 Apr 2013 15:08:28 -0700
parents 66d3aebe2d95
children 5df602551eea
files mercurial/dirs.c
diffstat 1 files changed, 18 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirs.c	Wed Apr 10 15:08:27 2013 -0700
+++ b/mercurial/dirs.c	Wed Apr 10 15:08:28 2013 -0700
@@ -19,6 +19,9 @@
  *
  * We modify Python integers for refcounting, but those integers are
  * never visible to Python code.
+ *
+ * We mutate strings in-place, but leave them immutable once they can
+ * be seen by Python code.
  */
 typedef struct {
 	PyObject_HEAD
@@ -40,6 +43,7 @@
 
 static int _addpath(PyObject *dirs, PyObject *path)
 {
+	const char *cpath = PyString_AS_STRING(path);
 	Py_ssize_t pos = PyString_GET_SIZE(path);
 	PyObject *key = NULL;
 	int ret = -1;
@@ -47,15 +51,24 @@
 	while ((pos = _finddir(path, pos - 1)) != -1) {
 		PyObject *val;
 
-		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
-
-		if (key == NULL)
-			goto bail;
+		/* It's likely that every prefix already has an entry
+		   in our dict. Try to avoid allocating and
+		   deallocating a string for each prefix we check. */
+		if (key != NULL)
+			((PyStringObject *)key)->ob_shash = -1;
+		else {
+			/* Force Python to not reuse a small shared string. */
+			key = PyString_FromStringAndSize(cpath,
+							 pos < 2 ? 2 : pos);
+			if (key == NULL)
+				goto bail;
+		}
+		PyString_GET_SIZE(key) = pos;
+		PyString_AS_STRING(key)[pos] = '\0';
 
 		val = PyDict_GetItem(dirs, key);
 		if (val != NULL) {
 			PyInt_AS_LONG(val) += 1;
-			Py_CLEAR(key);
 			continue;
 		}