comparison mercurial/manifest.c @ 24228:542c891274b2

lazymanifest: use a binary search to do an insertion This makes insertions log(n) plus some memmove() overhead, rather than doing an append followed by an n*log(n) sort. There's probably a lot of performance to be gained by adding a batch-add method, which could be smarter about the memmove()s performed. Includes a couple of extra tests that should help prevent bugs. Thanks to Martin for some significant pre-mail cleanup of this change.
author Augie Fackler <augie@google.com>
date Fri, 30 Jan 2015 21:30:40 -0800
parents a5f1bccd2996
children 40528ad1b1e8
comparison
equal deleted inserted replaced
24227:8ec2df32bd39 24228:542c891274b2
361 hit->deleted = true; 361 hit->deleted = true;
362 self->livelines--; 362 self->livelines--;
363 return 0; 363 return 0;
364 } 364 }
365 365
366 /* Do a binary search for the insertion point for new, creating the
367 * new entry if needed. */
368 static int internalsetitem(lazymanifest *self, line *new) {
369 int start = 0, end = self->numlines;
370 while (start < end) {
371 int pos = start + (end - start) / 2;
372 int c = linecmp(new, self->lines + pos);
373 if (c < 0)
374 end = pos;
375 else if (c > 0)
376 start = pos + 1;
377 else {
378 if (self->lines[pos].deleted)
379 self->livelines++;
380 start = pos;
381 goto finish;
382 }
383 }
384 /* being here means we need to do an insert */
385 if (!realloc_if_full(self)) {
386 PyErr_NoMemory();
387 return -1;
388 }
389 memmove(self->lines + start + 1, self->lines + start,
390 (self->numlines - start) * sizeof(line));
391 self->numlines++;
392 self->livelines++;
393 finish:
394 self->lines[start] = *new;
395 self->dirty = true;
396 return 0;
397 }
398
366 static int lazymanifest_setitem( 399 static int lazymanifest_setitem(
367 lazymanifest *self, PyObject *key, PyObject *value) 400 lazymanifest *self, PyObject *key, PyObject *value)
368 { 401 {
369 char *path; 402 char *path;
370 Py_ssize_t plen; 403 Py_ssize_t plen;
376 Py_ssize_t flen; 409 Py_ssize_t flen;
377 size_t dlen; 410 size_t dlen;
378 char *dest; 411 char *dest;
379 int i; 412 int i;
380 line new; 413 line new;
381 line *hit;
382 if (!PyString_Check(key)) { 414 if (!PyString_Check(key)) {
383 PyErr_Format(PyExc_TypeError, 415 PyErr_Format(PyExc_TypeError,
384 "setitem: manifest keys must be a string."); 416 "setitem: manifest keys must be a string.");
385 return -1; 417 return -1;
386 } 418 }
444 if (hlen > 20) { 476 if (hlen > 20) {
445 new.hash_suffix = hash[20]; 477 new.hash_suffix = hash[20];
446 } 478 }
447 new.from_malloc = true; /* is `start` a pointer we allocated? */ 479 new.from_malloc = true; /* is `start` a pointer we allocated? */
448 new.deleted = false; /* is this entry deleted? */ 480 new.deleted = false; /* is this entry deleted? */
449 hit = bsearch(&new, self->lines, self->numlines, 481 if (internalsetitem(self, &new)) {
450 sizeof(line), &linecmp); 482 return -1;
451 self->dirty = true;
452 if (hit) {
453 /* updating a line we already had */
454 if (hit->from_malloc) {
455 free(hit->start);
456 }
457 if (hit->deleted) {
458 self->livelines++;
459 }
460 *hit = new;
461 } else {
462 /* new line */
463 if (!realloc_if_full(self)) {
464 PyErr_NoMemory();
465 return -1;
466 }
467 self->lines[self->numlines++] = new;
468 self->livelines++;
469 /* TODO: do a binary search and insert rather than
470 * append and qsort. Also offer a batch-insert
471 * interface, which should be a nice little
472 * performance win.
473 */
474 qsort(self->lines, self->numlines, sizeof(line), &linecmp);
475 } 483 }
476 return 0; 484 return 0;
477 } 485 }
478 486
479 static PyMappingMethods lazymanifest_mapping_methods = { 487 static PyMappingMethods lazymanifest_mapping_methods = {