changeset 26871:1cbf144fd8a1

manifest: skip fastdelta if the change is large In large repos, the existing manifest fastdelta computation (which performs a bisect on the raw manifest for every file that is changing), is excessively slow. This patch makes fastdelta fallback to the normal string delta algorithm if the number of changes is large. On a large repo with a commit of 8000 files, this reduces the commit time by 7 seconds (fastdelta goes from 8 seconds to 1). I tested this change by modifying the function to compare the old and the new values and running the test suite. The only difference is that the pure text-diff algorithm sometimes produces smaller (but functionaly identical) deltatexts than the bisect algorithm.
author Durham Goode <durham@fb.com>
date Thu, 05 Nov 2015 18:56:40 -0800
parents ab798d1a230f
children 853154f27525
files mercurial/manifest.py
diffstat 1 files changed, 36 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/manifest.py	Wed Nov 04 23:44:51 2015 -0800
+++ b/mercurial/manifest.py	Thu Nov 05 18:56:40 2015 -0800
@@ -334,36 +334,44 @@
         # zero copy representation of base as a buffer
         addbuf = util.buffer(base)
 
-        # start with a readonly loop that finds the offset of
-        # each line and creates the deltas
-        for f, todelete in changes:
-            # bs will either be the index of the item or the insert point
-            start, end = _msearch(addbuf, f, start)
-            if not todelete:
-                h, fl = self._lm[f]
-                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
-            else:
-                if start == end:
-                    # item we want to delete was not found, error out
-                    raise AssertionError(
-                            _("failed to remove %s from manifest") % f)
-                l = ""
-            if dstart is not None and dstart <= start and dend >= start:
-                if dend < end:
+        changes = list(changes)
+        if len(changes) < 1000:
+            # start with a readonly loop that finds the offset of
+            # each line and creates the deltas
+            for f, todelete in changes:
+                # bs will either be the index of the item or the insert point
+                start, end = _msearch(addbuf, f, start)
+                if not todelete:
+                    h, fl = self._lm[f]
+                    l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
+                else:
+                    if start == end:
+                        # item we want to delete was not found, error out
+                        raise AssertionError(
+                                _("failed to remove %s from manifest") % f)
+                    l = ""
+                if dstart is not None and dstart <= start and dend >= start:
+                    if dend < end:
+                        dend = end
+                    if l:
+                        dline.append(l)
+                else:
+                    if dstart is not None:
+                        delta.append([dstart, dend, "".join(dline)])
+                    dstart = start
                     dend = end
-                if l:
-                    dline.append(l)
-            else:
-                if dstart is not None:
-                    delta.append([dstart, dend, "".join(dline)])
-                dstart = start
-                dend = end
-                dline = [l]
+                    dline = [l]
 
-        if dstart is not None:
-            delta.append([dstart, dend, "".join(dline)])
-        # apply the delta to the base, and get a delta for addrevision
-        deltatext, arraytext = _addlistdelta(base, delta)
+            if dstart is not None:
+                delta.append([dstart, dend, "".join(dline)])
+            # apply the delta to the base, and get a delta for addrevision
+            deltatext, arraytext = _addlistdelta(base, delta)
+        else:
+            # For large changes, it's much cheaper to just build the text and
+            # diff it.
+            arraytext = array.array('c', self.text())
+            deltatext = mdiff.textdiff(base, arraytext)
+
         return arraytext, deltatext
 
 def _msearch(m, s, lo=0, hi=None):