copies: move findcopies code to its own module
authorMatt Mackall <mpm@selenic.com>
Sat, 15 Mar 2008 10:02:31 -0500
changeset 6274 f3f383efbeae
parent 6273 20aa460a52b6
child 6275 fda369b5779c
copies: move findcopies code to its own module - pass in contexts - fold symmetricdifference check into copies.copies
mercurial/copies.py
mercurial/merge.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/copies.py	Sat Mar 15 10:02:31 2008 -0500
@@ -0,0 +1,193 @@
+# copies.py - copy detection for Mercurial
+#
+# Copyright 2008 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+from node import nullid, nullrev
+from i18n import _
+import util, ancestor
+
+def _nonoverlap(d1, d2, d3):
+    "Return list of elements in d1 not in d2 or d3"
+    l = [d for d in d1 if d not in d3 and d not in d2]
+    l.sort()
+    return l
+
+def _dirname(f):
+    s = f.rfind("/")
+    if s == -1:
+        return ""
+    return f[:s]
+
+def _dirs(files):
+    d = {}
+    for f in files:
+        f = _dirname(f)
+        while f not in d:
+            d[f] = True
+            f = _dirname(f)
+    return d
+
+def _findoldnames(fctx, limit):
+    "find files that path was copied from, back to linkrev limit"
+    old = {}
+    seen = {}
+    orig = fctx.path()
+    visit = [fctx]
+    while visit:
+        fc = visit.pop()
+        s = str(fc)
+        if s in seen:
+            continue
+        seen[s] = 1
+        if fc.path() != orig and fc.path() not in old:
+            old[fc.path()] = 1
+        if fc.rev() < limit and fc.rev() is not None:
+            continue
+        visit += fc.parents()
+
+    old = old.keys()
+    old.sort()
+    return old
+
+def copies(repo, c1, c2, ca):
+    """
+    Find moves and copies between context c1 and c2
+    """
+    # avoid silly behavior for update from empty dir
+    if not c1 or not c2 or not ca:
+        return {}, {}
+
+    rev1, rev2 = c1.rev(), c2.rev()
+    if rev1 is None: # c1 is a workingctx
+        rev1 = c1.parents()[0].rev()
+    if rev2 is None: # c2 is a workingctx
+        rev2 = c2.parents()[0].rev()
+    pr = repo.changelog.parentrevs
+    def parents(rev):
+        return [p for p in pr(rev) if p != nullrev]
+    limit = min(ancestor.symmetricdifference(rev1, rev2, parents))
+    m1 = c1.manifest()
+    m2 = c2.manifest()
+    ma = ca.manifest()
+
+    def makectx(f, n):
+        if len(n) != 20: # in a working context?
+            if c1.rev() is None:
+                return c1.filectx(f)
+            return c2.filectx(f)
+        return repo.filectx(f, fileid=n)
+    ctx = util.cachefunc(makectx)
+
+    copy = {}
+    fullcopy = {}
+    diverge = {}
+
+    def checkcopies(f, m1, m2):
+        '''check possible copies of f from m1 to m2'''
+        c1 = ctx(f, m1[f])
+        for of in _findoldnames(c1, limit):
+            fullcopy[f] = of # remember for dir rename detection
+            if of in m2: # original file not in other manifest?
+                # if the original file is unchanged on the other branch,
+                # no merge needed
+                if m2[of] != ma.get(of):
+                    c2 = ctx(of, m2[of])
+                    ca = c1.ancestor(c2)
+                    # related and named changed on only one side?
+                    if ca and ca.path() == f or ca.path() == c2.path():
+                        if c1 != ca or c2 != ca: # merge needed?
+                            copy[f] = of
+            elif of in ma:
+                diverge.setdefault(of, []).append(f)
+
+    if not repo.ui.configbool("merge", "followcopies", True):
+        return {}, {}
+
+    repo.ui.debug(_("  searching for copies back to rev %d\n") % limit)
+
+    u1 = _nonoverlap(m1, m2, ma)
+    u2 = _nonoverlap(m2, m1, ma)
+
+    if u1:
+        repo.ui.debug(_("  unmatched files in local:\n   %s\n")
+                      % "\n   ".join(u1))
+    if u2:
+        repo.ui.debug(_("  unmatched files in other:\n   %s\n")
+                      % "\n   ".join(u2))
+
+    for f in u1:
+        checkcopies(f, m1, m2)
+    for f in u2:
+        checkcopies(f, m2, m1)
+
+    diverge2 = {}
+    for of, fl in diverge.items():
+        if len(fl) == 1:
+            del diverge[of] # not actually divergent
+        else:
+            diverge2.update(dict.fromkeys(fl)) # reverse map for below
+
+    if fullcopy:
+        repo.ui.debug(_("  all copies found (* = to merge, ! = divergent):\n"))
+        for f in fullcopy:
+            note = ""
+            if f in copy: note += "*"
+            if f in diverge2: note += "!"
+            repo.ui.debug(_("   %s -> %s %s\n") % (f, fullcopy[f], note))
+    del diverge2
+
+    if not fullcopy or not repo.ui.configbool("merge", "followdirs", True):
+        return copy, diverge
+
+    repo.ui.debug(_("  checking for directory renames\n"))
+
+    # generate a directory move map
+    d1, d2 = _dirs(m1), _dirs(m2)
+    invalid = {}
+    dirmove = {}
+
+    # examine each file copy for a potential directory move, which is
+    # when all the files in a directory are moved to a new directory
+    for dst, src in fullcopy.items():
+        dsrc, ddst = _dirname(src), _dirname(dst)
+        if dsrc in invalid:
+            # already seen to be uninteresting
+            continue
+        elif dsrc in d1 and ddst in d1:
+            # directory wasn't entirely moved locally
+            invalid[dsrc] = True
+        elif dsrc in d2 and ddst in d2:
+            # directory wasn't entirely moved remotely
+            invalid[dsrc] = True
+        elif dsrc in dirmove and dirmove[dsrc] != ddst:
+            # files from the same directory moved to two different places
+            invalid[dsrc] = True
+        else:
+            # looks good so far
+            dirmove[dsrc + "/"] = ddst + "/"
+
+    for i in invalid:
+        if i in dirmove:
+            del dirmove[i]
+    del d1, d2, invalid
+
+    if not dirmove:
+        return copy, diverge
+
+    for d in dirmove:
+        repo.ui.debug(_("  dir %s -> %s\n") % (d, dirmove[d]))
+
+    # check unaccounted nonoverlapping files against directory moves
+    for f in u1 + u2:
+        if f not in fullcopy:
+            for d in dirmove:
+                if f.startswith(d):
+                    # new file added in a directory that was moved, move it
+                    copy[f] = dirmove[d] + f[len(d):]
+                    repo.ui.debug(_("  file %s -> %s\n") % (f, copy[f]))
+                    break
+
+    return copy, diverge
--- a/mercurial/merge.py	Sat Mar 15 10:02:31 2008 -0500
+++ b/mercurial/merge.py	Sat Mar 15 10:02:31 2008 -0500
@@ -7,7 +7,7 @@
 
 from node import nullid, nullrev
 from i18n import _
-import errno, util, os, heapq, filemerge, ancestor
+import errno, util, os, filemerge, copies
 
 def _checkunknown(wctx, mctx):
     "check for collisions between unknown files and files in mctx"
@@ -54,180 +54,6 @@
 
     return action
 
-def _nonoverlap(d1, d2, d3):
-    "Return list of elements in d1 not in d2 or d3"
-    l = [d for d in d1 if d not in d3 and d not in d2]
-    l.sort()
-    return l
-
-def _dirname(f):
-    s = f.rfind("/")
-    if s == -1:
-        return ""
-    return f[:s]
-
-def _dirs(files):
-    d = {}
-    for f in files:
-        f = _dirname(f)
-        while f not in d:
-            d[f] = True
-            f = _dirname(f)
-    return d
-
-def _findoldnames(fctx, limit):
-    "find files that path was copied from, back to linkrev limit"
-    old = {}
-    seen = {}
-    orig = fctx.path()
-    visit = [fctx]
-    while visit:
-        fc = visit.pop()
-        s = str(fc)
-        if s in seen:
-            continue
-        seen[s] = 1
-        if fc.path() != orig and fc.path() not in old:
-            old[fc.path()] = 1
-        if fc.rev() < limit:
-            continue
-        visit += fc.parents()
-
-    old = old.keys()
-    old.sort()
-    return old
-
-def findcopies(repo, m1, m2, ma, limit):
-    """
-    Find moves and copies between m1 and m2 back to limit linkrev
-    """
-
-    wctx = repo.workingctx()
-
-    def makectx(f, n):
-        if len(n) == 20:
-            return repo.filectx(f, fileid=n)
-        return wctx.filectx(f)
-    ctx = util.cachefunc(makectx)
-
-    copy = {}
-    fullcopy = {}
-    diverge = {}
-
-    def checkcopies(f, m1, m2):
-        '''check possible copies of f from m1 to m2'''
-        c1 = ctx(f, m1[f])
-        for of in _findoldnames(c1, limit):
-            fullcopy[f] = of # remember for dir rename detection
-            if of in m2: # original file not in other manifest?
-                # if the original file is unchanged on the other branch,
-                # no merge needed
-                if m2[of] != ma.get(of):
-                    c2 = ctx(of, m2[of])
-                    ca = c1.ancestor(c2)
-                    # related and named changed on only one side?
-                    if ca and ca.path() == f or ca.path() == c2.path():
-                        if c1 != ca or c2 != ca: # merge needed?
-                            copy[f] = of
-            elif of in ma:
-                diverge.setdefault(of, []).append(f)
-
-    if not repo.ui.configbool("merge", "followcopies", True):
-        return {}, {}
-
-    # avoid silly behavior for update from empty dir
-    if not m1 or not m2 or not ma:
-        return {}, {}
-
-    repo.ui.debug(_("  searching for copies back to rev %d\n") % limit)
-
-    u1 = _nonoverlap(m1, m2, ma)
-    u2 = _nonoverlap(m2, m1, ma)
-
-    if u1:
-        repo.ui.debug(_("  unmatched files in local:\n   %s\n")
-                      % "\n   ".join(u1))
-    if u2:
-        repo.ui.debug(_("  unmatched files in other:\n   %s\n")
-                      % "\n   ".join(u2))
-
-    for f in u1:
-        checkcopies(f, m1, m2)
-
-    for f in u2:
-        checkcopies(f, m2, m1)
-
-    diverge2 = {}
-    for of, fl in diverge.items():
-        if len(fl) == 1:
-            del diverge[of] # not actually divergent
-        else:
-            diverge2.update(dict.fromkeys(fl)) # reverse map for below
-
-    if fullcopy:
-        repo.ui.debug(_("  all copies found (* = to merge, ! = divergent):\n"))
-        for f in fullcopy:
-            note = ""
-            if f in copy: note += "*"
-            if f in diverge2: note += "!"
-            repo.ui.debug(_("   %s -> %s %s\n") % (f, fullcopy[f], note))
-
-    del diverge2
-
-    if not fullcopy or not repo.ui.configbool("merge", "followdirs", True):
-        return copy, diverge
-
-    repo.ui.debug(_("  checking for directory renames\n"))
-
-    # generate a directory move map
-    d1, d2 = _dirs(m1), _dirs(m2)
-    invalid = {}
-    dirmove = {}
-
-    # examine each file copy for a potential directory move, which is
-    # when all the files in a directory are moved to a new directory
-    for dst, src in fullcopy.items():
-        dsrc, ddst = _dirname(src), _dirname(dst)
-        if dsrc in invalid:
-            # already seen to be uninteresting
-            continue
-        elif dsrc in d1 and ddst in d1:
-            # directory wasn't entirely moved locally
-            invalid[dsrc] = True
-        elif dsrc in d2 and ddst in d2:
-            # directory wasn't entirely moved remotely
-            invalid[dsrc] = True
-        elif dsrc in dirmove and dirmove[dsrc] != ddst:
-            # files from the same directory moved to two different places
-            invalid[dsrc] = True
-        else:
-            # looks good so far
-            dirmove[dsrc + "/"] = ddst + "/"
-
-    for i in invalid:
-        if i in dirmove:
-            del dirmove[i]
-
-    del d1, d2, invalid
-
-    if not dirmove:
-        return copy, diverge
-
-    for d in dirmove:
-        repo.ui.debug(_("  dir %s -> %s\n") % (d, dirmove[d]))
-
-    # check unaccounted nonoverlapping files against directory moves
-    for f in u1 + u2:
-        if f not in fullcopy:
-            for d in dirmove:
-                if f.startswith(d):
-                    # new file added in a directory that was moved, move it
-                    copy[f] = dirmove[d] + f[len(d):]
-                    repo.ui.debug(_("  file %s -> %s\n") % (f, copy[f]))
-                    break
-
-    return copy, diverge
-
 def manifestmerge(repo, p1, p2, pa, overwrite, partial):
     """
     Merge p1 and p2 with ancestor ma and generate merge action list
@@ -245,8 +71,7 @@
     ma = pa.manifest()
     backwards = (pa == p2)
     action = []
-    copy = {}
-    diverge = {}
+    copy, copied, diverge = {}, {}, {}
 
     def fmerge(f, f2=None, fa=None):
         """merge flags"""
@@ -276,20 +101,10 @@
         action.append((f, m) + args)
 
     if not (backwards or overwrite):
-        rev1 = p1.rev()
-        if rev1 is None:
-            # p1 is a workingctx
-            rev1 = p1.parents()[0].rev()
-        pr = repo.changelog.parentrevs
-        def parents(rev):
-            return [p for p in pr(rev) if p != nullrev]
-        limit = min(ancestor.symmetricdifference(rev1, p2.rev(), parents))
-        copy, diverge = findcopies(repo, m1, m2, ma, limit)
-
-    for of, fl in diverge.items():
-        act("divergent renames", "dr", of, fl)
-
-    copied = dict.fromkeys(copy.values())
+        copy, diverge = copies.copies(repo, p1, p2, pa)
+        copied = dict.fromkeys(copy.values())
+        for of, fl in diverge.items():
+            act("divergent renames", "dr", of, fl)
 
     # Compare manifests
     for f, n in m1.iteritems():