lazymanifest: use a binary search to do an insertion
This makes insertions log(n) plus some memmove() overhead, rather than
doing an append followed by an n*log(n) sort. There's probably a lot
of performance to be gained by adding a batch-add method, which could
be smarter about the memmove()s performed.
Includes a couple of extra tests that should help prevent bugs.
Thanks to Martin for some significant pre-mail cleanup of this change.
--- a/mercurial/manifest.c Mon Nov 17 00:00:25 2014 -0500
+++ b/mercurial/manifest.c Fri Jan 30 21:30:40 2015 -0800
@@ -363,6 +363,39 @@
return 0;
}
+/* Do a binary search for the insertion point for new, creating the
+ * new entry if needed. */
+static int internalsetitem(lazymanifest *self, line *new) {
+ int start = 0, end = self->numlines;
+ while (start < end) {
+ int pos = start + (end - start) / 2;
+ int c = linecmp(new, self->lines + pos);
+ if (c < 0)
+ end = pos;
+ else if (c > 0)
+ start = pos + 1;
+ else {
+ if (self->lines[pos].deleted)
+ self->livelines++;
+ start = pos;
+ goto finish;
+ }
+ }
+ /* being here means we need to do an insert */
+ if (!realloc_if_full(self)) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ memmove(self->lines + start + 1, self->lines + start,
+ (self->numlines - start) * sizeof(line));
+ self->numlines++;
+ self->livelines++;
+finish:
+ self->lines[start] = *new;
+ self->dirty = true;
+ return 0;
+}
+
static int lazymanifest_setitem(
lazymanifest *self, PyObject *key, PyObject *value)
{
@@ -378,7 +411,6 @@
char *dest;
int i;
line new;
- line *hit;
if (!PyString_Check(key)) {
PyErr_Format(PyExc_TypeError,
"setitem: manifest keys must be a string.");
@@ -446,32 +478,8 @@
}
new.from_malloc = true; /* is `start` a pointer we allocated? */
new.deleted = false; /* is this entry deleted? */
- hit = bsearch(&new, self->lines, self->numlines,
- sizeof(line), &linecmp);
- self->dirty = true;
- if (hit) {
- /* updating a line we already had */
- if (hit->from_malloc) {
- free(hit->start);
- }
- if (hit->deleted) {
- self->livelines++;
- }
- *hit = new;
- } else {
- /* new line */
- if (!realloc_if_full(self)) {
- PyErr_NoMemory();
- return -1;
- }
- self->lines[self->numlines++] = new;
- self->livelines++;
- /* TODO: do a binary search and insert rather than
- * append and qsort. Also offer a batch-insert
- * interface, which should be a nice little
- * performance win.
- */
- qsort(self->lines, self->numlines, sizeof(line), &linecmp);
+ if (internalsetitem(self, &new)) {
+ return -1;
}
return 0;
}
--- a/tests/test-manifest.py Mon Nov 17 00:00:25 2014 -0500
+++ b/tests/test-manifest.py Fri Jan 30 21:30:40 2015 -0800
@@ -133,6 +133,10 @@
self.assertRaises(KeyError, lambda : m['foo'])
self.assertEqual(1, len(m))
self.assertEqual(1, len(list(m)))
+ # now restore and make sure everything works right
+ m['foo'] = 'a' * 20, ''
+ self.assertEqual(2, len(m))
+ self.assertEqual(2, len(list(m)))
def testManifestDiff(self):
MISSING = (None, '')