changeset 10262:eb243551cbd8 stable

copies: speed up copy detection On some large repos, copy detection could spend > 10min using fctx.ancestor() to determine if file revisions were actually related. Because ancestor must traverse history to the root to determine the GCA, it was doing a lot more work than necessary. With this replacement, same status -r a:b takes ~3 seconds.
author Matt Mackall <mpm@selenic.com>
date Tue, 19 Jan 2010 22:20:05 -0600
parents 5eae671c0b57
children 25e572394f5c
files mercurial/context.py mercurial/copies.py
diffstat 2 files changed, 52 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/context.py	Mon Dec 28 12:14:26 2009 +0900
+++ b/mercurial/context.py	Tue Jan 19 22:20:05 2010 -0600
@@ -495,6 +495,17 @@
 
         return None
 
+    def ancestors(self):
+        seen = set(str(self))
+        visit = [self]
+        while visit:
+            for parent in visit.pop(0).parents():
+                s = str(parent)
+                if s not in seen:
+                    visit.append(parent)
+                    seen.add(s)
+                    yield parent
+
 class workingctx(changectx):
     """A workingctx object makes access to data related to
     the current working directory convenient.
--- a/mercurial/copies.py	Mon Dec 28 12:14:26 2009 +0900
+++ b/mercurial/copies.py	Tue Jan 19 22:20:05 2010 -0600
@@ -28,27 +28,6 @@
             f = _dirname(f)
     return d
 
-def _findoldnames(fctx, limit):
-    "find files that path was copied from, back to linkrev limit"
-    old = {}
-    seen = set()
-    orig = fctx.path()
-    visit = [(fctx, 0)]
-    while visit:
-        fc, depth = visit.pop()
-        s = str(fc)
-        if s in seen:
-            continue
-        seen.add(s)
-        if fc.path() != orig and fc.path() not in old:
-            old[fc.path()] = (depth, fc.path()) # remember depth
-        if fc.rev() is not None and fc.rev() < limit:
-            continue
-        visit += [(p, depth - 1) for p in fc.parents()]
-
-    # return old names sorted by depth
-    return [o[1] for o in sorted(old.values())]
-
 def _findlimit(repo, a, b):
     """Find the earliest revision that's an ancestor of a or b but not both,
     None if no such revision exists.
@@ -138,23 +117,50 @@
     fullcopy = {}
     diverge = {}
 
+    def related(f1, f2, limit):
+        g1, g2 = f1.ancestors(), f2.ancestors()
+        try:
+            while 1:
+                f1r, f2r = f1.rev(), f2.rev()
+                if f1r > f2r:
+                    f1 = g1.next()
+                elif f2r > f1r:
+                    f2 = g2.next()
+                elif f1 == f2:
+                    return f1 # a match
+                elif f1r == f2r or f1r < limit or f2r < limit:
+                    return False # copy no longer relevant
+        except StopIteration:
+            return False
+
     def checkcopies(f, m1, m2):
         '''check possible copies of f from m1 to m2'''
-        c1 = ctx(f, m1[f])
-        for of in _findoldnames(c1, limit):
+        of = None
+        seen = set([f])
+        for oc in ctx(f, m1[f]).ancestors():
+            ocr = oc.rev()
+            of = oc.path()
+            if of in seen:
+                # check limit late - grab last rename before
+                if ocr < limit:
+                    break
+                continue
+            seen.add(of)
+
             fullcopy[f] = of # remember for dir rename detection
-            if of in m2: # original file not in other manifest?
-                # if the original file is unchanged on the other branch,
-                # no merge needed
-                if m2[of] != ma.get(of):
-                    c2 = ctx(of, m2[of])
-                    ca = c1.ancestor(c2)
-                    # related and named changed on only one side?
-                    if ca and (ca.path() == f or ca.path() == c2.path()):
-                        if c1 != ca or c2 != ca: # merge needed?
-                            copy[f] = of
-            elif of in ma:
-                diverge.setdefault(of, []).append(f)
+            if of not in m2:
+                continue # no match, keep looking
+            if m2[of] == ma.get(of):
+                break # no merge needed, quit early
+            c2 = ctx(of, m2[of])
+            cr = related(oc, c2, ca.rev())
+            if of == f or of == c2.path(): # non-divergent
+                copy[f] = of
+                of = None
+                break
+
+        if of in ma:
+            diverge.setdefault(of, []).append(f)
 
     repo.ui.debug("  searching for copies back to rev %d\n" % limit)