Mercurial > hg
comparison mercurial/manifest.py @ 26871:1cbf144fd8a1
manifest: skip fastdelta if the change is large
In large repos, the existing manifest fastdelta computation (which performs a
bisect on the raw manifest for every file that is changing), is excessively
slow. This patch makes fastdelta fallback to the normal string delta algorithm
if the number of changes is large.
On a large repo with a commit of 8000 files, this reduces the commit time by 7
seconds (fastdelta goes from 8 seconds to 1).
I tested this change by modifying the function to compare the old and the new
values and running the test suite. The only difference is that the pure
text-diff algorithm sometimes produces smaller (but functionaly identical)
deltatexts than the bisect algorithm.
author | Durham Goode <durham@fb.com> |
---|---|
date | Thu, 05 Nov 2015 18:56:40 -0800 |
parents | 05871262acd5 |
children | 2a31433a59ba |
comparison
equal
deleted
inserted
replaced
26870:ab798d1a230f | 26871:1cbf144fd8a1 |
---|---|
332 dline = [""] | 332 dline = [""] |
333 start = 0 | 333 start = 0 |
334 # zero copy representation of base as a buffer | 334 # zero copy representation of base as a buffer |
335 addbuf = util.buffer(base) | 335 addbuf = util.buffer(base) |
336 | 336 |
337 # start with a readonly loop that finds the offset of | 337 changes = list(changes) |
338 # each line and creates the deltas | 338 if len(changes) < 1000: |
339 for f, todelete in changes: | 339 # start with a readonly loop that finds the offset of |
340 # bs will either be the index of the item or the insert point | 340 # each line and creates the deltas |
341 start, end = _msearch(addbuf, f, start) | 341 for f, todelete in changes: |
342 if not todelete: | 342 # bs will either be the index of the item or the insert point |
343 h, fl = self._lm[f] | 343 start, end = _msearch(addbuf, f, start) |
344 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) | 344 if not todelete: |
345 else: | 345 h, fl = self._lm[f] |
346 if start == end: | 346 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) |
347 # item we want to delete was not found, error out | 347 else: |
348 raise AssertionError( | 348 if start == end: |
349 _("failed to remove %s from manifest") % f) | 349 # item we want to delete was not found, error out |
350 l = "" | 350 raise AssertionError( |
351 if dstart is not None and dstart <= start and dend >= start: | 351 _("failed to remove %s from manifest") % f) |
352 if dend < end: | 352 l = "" |
353 if dstart is not None and dstart <= start and dend >= start: | |
354 if dend < end: | |
355 dend = end | |
356 if l: | |
357 dline.append(l) | |
358 else: | |
359 if dstart is not None: | |
360 delta.append([dstart, dend, "".join(dline)]) | |
361 dstart = start | |
353 dend = end | 362 dend = end |
354 if l: | 363 dline = [l] |
355 dline.append(l) | 364 |
356 else: | 365 if dstart is not None: |
357 if dstart is not None: | 366 delta.append([dstart, dend, "".join(dline)]) |
358 delta.append([dstart, dend, "".join(dline)]) | 367 # apply the delta to the base, and get a delta for addrevision |
359 dstart = start | 368 deltatext, arraytext = _addlistdelta(base, delta) |
360 dend = end | 369 else: |
361 dline = [l] | 370 # For large changes, it's much cheaper to just build the text and |
362 | 371 # diff it. |
363 if dstart is not None: | 372 arraytext = array.array('c', self.text()) |
364 delta.append([dstart, dend, "".join(dline)]) | 373 deltatext = mdiff.textdiff(base, arraytext) |
365 # apply the delta to the base, and get a delta for addrevision | 374 |
366 deltatext, arraytext = _addlistdelta(base, delta) | |
367 return arraytext, deltatext | 375 return arraytext, deltatext |
368 | 376 |
369 def _msearch(m, s, lo=0, hi=None): | 377 def _msearch(m, s, lo=0, hi=None): |
370 '''return a tuple (start, end) that says where to find s within m. | 378 '''return a tuple (start, end) that says where to find s within m. |
371 | 379 |