# HG changeset patch # User Augie Fackler # Date 1422682240 28800 # Node ID 542c891274b22bda5ce1cbc6b2b8a8caab9bfd48 # Parent 8ec2df32bd39aa74d77d6b094b7c15ba5236ed81 lazymanifest: use a binary search to do an insertion This makes insertions log(n) plus some memmove() overhead, rather than doing an append followed by an n*log(n) sort. There's probably a lot of performance to be gained by adding a batch-add method, which could be smarter about the memmove()s performed. Includes a couple of extra tests that should help prevent bugs. Thanks to Martin for some significant pre-mail cleanup of this change. diff -r 8ec2df32bd39 -r 542c891274b2 mercurial/manifest.c --- a/mercurial/manifest.c Mon Nov 17 00:00:25 2014 -0500 +++ b/mercurial/manifest.c Fri Jan 30 21:30:40 2015 -0800 @@ -363,6 +363,39 @@ return 0; } +/* Do a binary search for the insertion point for new, creating the + * new entry if needed. */ +static int internalsetitem(lazymanifest *self, line *new) { + int start = 0, end = self->numlines; + while (start < end) { + int pos = start + (end - start) / 2; + int c = linecmp(new, self->lines + pos); + if (c < 0) + end = pos; + else if (c > 0) + start = pos + 1; + else { + if (self->lines[pos].deleted) + self->livelines++; + start = pos; + goto finish; + } + } + /* being here means we need to do an insert */ + if (!realloc_if_full(self)) { + PyErr_NoMemory(); + return -1; + } + memmove(self->lines + start + 1, self->lines + start, + (self->numlines - start) * sizeof(line)); + self->numlines++; + self->livelines++; +finish: + self->lines[start] = *new; + self->dirty = true; + return 0; +} + static int lazymanifest_setitem( lazymanifest *self, PyObject *key, PyObject *value) { @@ -378,7 +411,6 @@ char *dest; int i; line new; - line *hit; if (!PyString_Check(key)) { PyErr_Format(PyExc_TypeError, "setitem: manifest keys must be a string."); @@ -446,32 +478,8 @@ } new.from_malloc = true; /* is `start` a pointer we allocated? */ new.deleted = false; /* is this entry deleted? */ - hit = bsearch(&new, self->lines, self->numlines, - sizeof(line), &linecmp); - self->dirty = true; - if (hit) { - /* updating a line we already had */ - if (hit->from_malloc) { - free(hit->start); - } - if (hit->deleted) { - self->livelines++; - } - *hit = new; - } else { - /* new line */ - if (!realloc_if_full(self)) { - PyErr_NoMemory(); - return -1; - } - self->lines[self->numlines++] = new; - self->livelines++; - /* TODO: do a binary search and insert rather than - * append and qsort. Also offer a batch-insert - * interface, which should be a nice little - * performance win. - */ - qsort(self->lines, self->numlines, sizeof(line), &linecmp); + if (internalsetitem(self, &new)) { + return -1; } return 0; } diff -r 8ec2df32bd39 -r 542c891274b2 tests/test-manifest.py --- a/tests/test-manifest.py Mon Nov 17 00:00:25 2014 -0500 +++ b/tests/test-manifest.py Fri Jan 30 21:30:40 2015 -0800 @@ -133,6 +133,10 @@ self.assertRaises(KeyError, lambda : m['foo']) self.assertEqual(1, len(m)) self.assertEqual(1, len(list(m))) + # now restore and make sure everything works right + m['foo'] = 'a' * 20, '' + self.assertEqual(2, len(m)) + self.assertEqual(2, len(list(m))) def testManifestDiff(self): MISSING = (None, '')