comparison mercurial/manifest.py @ 26871:1cbf144fd8a1

manifest: skip fastdelta if the change is large In large repos, the existing manifest fastdelta computation (which performs a bisect on the raw manifest for every file that is changing), is excessively slow. This patch makes fastdelta fallback to the normal string delta algorithm if the number of changes is large. On a large repo with a commit of 8000 files, this reduces the commit time by 7 seconds (fastdelta goes from 8 seconds to 1). I tested this change by modifying the function to compare the old and the new values and running the test suite. The only difference is that the pure text-diff algorithm sometimes produces smaller (but functionaly identical) deltatexts than the bisect algorithm.
author Durham Goode <durham@fb.com>
date Thu, 05 Nov 2015 18:56:40 -0800
parents 05871262acd5
children 2a31433a59ba
comparison
equal deleted inserted replaced
26870:ab798d1a230f 26871:1cbf144fd8a1
332 dline = [""] 332 dline = [""]
333 start = 0 333 start = 0
334 # zero copy representation of base as a buffer 334 # zero copy representation of base as a buffer
335 addbuf = util.buffer(base) 335 addbuf = util.buffer(base)
336 336
337 # start with a readonly loop that finds the offset of 337 changes = list(changes)
338 # each line and creates the deltas 338 if len(changes) < 1000:
339 for f, todelete in changes: 339 # start with a readonly loop that finds the offset of
340 # bs will either be the index of the item or the insert point 340 # each line and creates the deltas
341 start, end = _msearch(addbuf, f, start) 341 for f, todelete in changes:
342 if not todelete: 342 # bs will either be the index of the item or the insert point
343 h, fl = self._lm[f] 343 start, end = _msearch(addbuf, f, start)
344 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) 344 if not todelete:
345 else: 345 h, fl = self._lm[f]
346 if start == end: 346 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
347 # item we want to delete was not found, error out 347 else:
348 raise AssertionError( 348 if start == end:
349 _("failed to remove %s from manifest") % f) 349 # item we want to delete was not found, error out
350 l = "" 350 raise AssertionError(
351 if dstart is not None and dstart <= start and dend >= start: 351 _("failed to remove %s from manifest") % f)
352 if dend < end: 352 l = ""
353 if dstart is not None and dstart <= start and dend >= start:
354 if dend < end:
355 dend = end
356 if l:
357 dline.append(l)
358 else:
359 if dstart is not None:
360 delta.append([dstart, dend, "".join(dline)])
361 dstart = start
353 dend = end 362 dend = end
354 if l: 363 dline = [l]
355 dline.append(l) 364
356 else: 365 if dstart is not None:
357 if dstart is not None: 366 delta.append([dstart, dend, "".join(dline)])
358 delta.append([dstart, dend, "".join(dline)]) 367 # apply the delta to the base, and get a delta for addrevision
359 dstart = start 368 deltatext, arraytext = _addlistdelta(base, delta)
360 dend = end 369 else:
361 dline = [l] 370 # For large changes, it's much cheaper to just build the text and
362 371 # diff it.
363 if dstart is not None: 372 arraytext = array.array('c', self.text())
364 delta.append([dstart, dend, "".join(dline)]) 373 deltatext = mdiff.textdiff(base, arraytext)
365 # apply the delta to the base, and get a delta for addrevision 374
366 deltatext, arraytext = _addlistdelta(base, delta)
367 return arraytext, deltatext 375 return arraytext, deltatext
368 376
369 def _msearch(m, s, lo=0, hi=None): 377 def _msearch(m, s, lo=0, hi=None):
370 '''return a tuple (start, end) that says where to find s within m. 378 '''return a tuple (start, end) that says where to find s within m.
371 379