copies: speed up copy detection
On some large repos, copy detection could spend > 10min using
fctx.ancestor() to determine if file revisions were actually related.
Because ancestor must traverse history to the root to determine the
GCA, it was doing a lot more work than necessary. With this
replacement, same status -r a:b takes ~3 seconds.
--- a/mercurial/context.py Mon Dec 28 12:14:26 2009 +0900
+++ b/mercurial/context.py Tue Jan 19 22:20:05 2010 -0600
@@ -495,6 +495,17 @@
return None
+ def ancestors(self):
+ seen = set(str(self))
+ visit = [self]
+ while visit:
+ for parent in visit.pop(0).parents():
+ s = str(parent)
+ if s not in seen:
+ visit.append(parent)
+ seen.add(s)
+ yield parent
+
class workingctx(changectx):
"""A workingctx object makes access to data related to
the current working directory convenient.
--- a/mercurial/copies.py Mon Dec 28 12:14:26 2009 +0900
+++ b/mercurial/copies.py Tue Jan 19 22:20:05 2010 -0600
@@ -28,27 +28,6 @@
f = _dirname(f)
return d
-def _findoldnames(fctx, limit):
- "find files that path was copied from, back to linkrev limit"
- old = {}
- seen = set()
- orig = fctx.path()
- visit = [(fctx, 0)]
- while visit:
- fc, depth = visit.pop()
- s = str(fc)
- if s in seen:
- continue
- seen.add(s)
- if fc.path() != orig and fc.path() not in old:
- old[fc.path()] = (depth, fc.path()) # remember depth
- if fc.rev() is not None and fc.rev() < limit:
- continue
- visit += [(p, depth - 1) for p in fc.parents()]
-
- # return old names sorted by depth
- return [o[1] for o in sorted(old.values())]
-
def _findlimit(repo, a, b):
"""Find the earliest revision that's an ancestor of a or b but not both,
None if no such revision exists.
@@ -138,23 +117,50 @@
fullcopy = {}
diverge = {}
+ def related(f1, f2, limit):
+ g1, g2 = f1.ancestors(), f2.ancestors()
+ try:
+ while 1:
+ f1r, f2r = f1.rev(), f2.rev()
+ if f1r > f2r:
+ f1 = g1.next()
+ elif f2r > f1r:
+ f2 = g2.next()
+ elif f1 == f2:
+ return f1 # a match
+ elif f1r == f2r or f1r < limit or f2r < limit:
+ return False # copy no longer relevant
+ except StopIteration:
+ return False
+
def checkcopies(f, m1, m2):
'''check possible copies of f from m1 to m2'''
- c1 = ctx(f, m1[f])
- for of in _findoldnames(c1, limit):
+ of = None
+ seen = set([f])
+ for oc in ctx(f, m1[f]).ancestors():
+ ocr = oc.rev()
+ of = oc.path()
+ if of in seen:
+ # check limit late - grab last rename before
+ if ocr < limit:
+ break
+ continue
+ seen.add(of)
+
fullcopy[f] = of # remember for dir rename detection
- if of in m2: # original file not in other manifest?
- # if the original file is unchanged on the other branch,
- # no merge needed
- if m2[of] != ma.get(of):
- c2 = ctx(of, m2[of])
- ca = c1.ancestor(c2)
- # related and named changed on only one side?
- if ca and (ca.path() == f or ca.path() == c2.path()):
- if c1 != ca or c2 != ca: # merge needed?
- copy[f] = of
- elif of in ma:
- diverge.setdefault(of, []).append(f)
+ if of not in m2:
+ continue # no match, keep looking
+ if m2[of] == ma.get(of):
+ break # no merge needed, quit early
+ c2 = ctx(of, m2[of])
+ cr = related(oc, c2, ca.rev())
+ if of == f or of == c2.path(): # non-divergent
+ copy[f] = of
+ of = None
+ break
+
+ if of in ma:
+ diverge.setdefault(of, []).append(f)
repo.ui.debug(" searching for copies back to rev %d\n" % limit)