copies: rewrite copy detection for non-merge users
The existing copy detection API was designed with merge in mind and
was ill-suited for doing status/diff. The new pathcopies
implementation gives more accurate, easier to use results for
comparing two revisions, and is much simpler to understand.
Test notes:
- test-mv-cp-st.t results finds more renames in the reverse direction now
- test-mq-merge.t was always wrong and duplicated a copy in diff that
was already present in one of the parent revisions
--- a/mercurial/commands.py Wed Jan 04 15:48:02 2012 -0600
+++ b/mercurial/commands.py Wed Jan 04 17:55:30 2012 -0600
@@ -5206,17 +5206,7 @@
changestates = zip(states, 'MAR!?IC', stat)
if (opts.get('all') or opts.get('copies')) and not opts.get('no_status'):
- ctx1 = repo[node1]
- ctx2 = repo[node2]
- added = stat[1]
- if node2 is None:
- added = stat[0] + stat[1] # merged?
-
- for k, v in copies.pathcopies(ctx1, ctx2).iteritems():
- if k in added:
- copy[k] = v
- elif v in added:
- copy[v] = k
+ copy = copies.pathcopies(repo[node1], repo[node2])
for state, char, files in changestates:
if state in show:
--- a/mercurial/copies.py Wed Jan 04 15:48:02 2012 -0600
+++ b/mercurial/copies.py Wed Jan 04 17:55:30 2012 -0600
@@ -84,8 +84,89 @@
return None
return limit
-def pathcopies(c1, c2):
- return mergecopies(c1._repo, c1, c2, c1._repo["null"], False)[0]
+def _chain(src, dst, a, b):
+ '''chain two sets of copies a->b'''
+ t = a.copy()
+ for k, v in b.iteritems():
+ if v in t:
+ # found a chain
+ if t[v] != k:
+ # file wasn't renamed back to itself
+ t[k] = t[v]
+ if v not in dst:
+ # chain was a rename, not a copy
+ del t[v]
+ if v in src:
+ # file is a copy of an existing file
+ t[k] = v
+ return t
+
+def _tracefile(fctx, actx):
+ '''return file context that is the ancestor of fctx present in actx'''
+ stop = actx.rev()
+ am = actx.manifest()
+
+ for f in fctx.ancestors():
+ if am.get(f.path(), None) == f.filenode():
+ return f
+ if f.rev() < stop:
+ return None
+
+def _dirstatecopies(d):
+ ds = d._repo.dirstate
+ c = ds.copies().copy()
+ for k in c.keys():
+ if ds[k] not in 'anm':
+ del c[k]
+ return c
+
+def _forwardcopies(a, b):
+ '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
+
+ # check for working copy
+ w = None
+ if b.rev() is None:
+ w = b
+ b = w.p1()
+ if a == b:
+ # short-circuit to avoid issues with merge states
+ return _dirstatecopies(w)
+
+ # find where new files came from
+ # we currently don't try to find where old files went, too expensive
+ # this means we can miss a case like 'hg rm b; hg cp a b'
+ cm = {}
+ for f in b:
+ if f not in a:
+ ofctx = _tracefile(b[f], a)
+ if ofctx:
+ cm[f] = ofctx.path()
+
+ # combine copies from dirstate if necessary
+ if w is not None:
+ cm = _chain(a, w, cm, _dirstatecopies(w))
+
+ return cm
+
+def _backwardcopies(a, b):
+ # because the forward mapping is 1:n, we can lose renames here
+ # in particular, we find renames better than copies
+ f = _forwardcopies(b, a)
+ r = {}
+ for k, v in f.iteritems():
+ r[v] = k
+ return r
+
+def pathcopies(x, y):
+ '''find {dst@y: src@x} copy mapping for directed compare'''
+ if x == y or not x or not y:
+ return {}
+ a = y.ancestor(x)
+ if a == x:
+ return _forwardcopies(x, y)
+ if a == y:
+ return _backwardcopies(x, y)
+ return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
def mergecopies(repo, c1, c2, ca, checkdirs=True):
"""
--- a/tests/test-mq-merge.t Wed Jan 04 15:48:02 2012 -0600
+++ b/tests/test-mq-merge.t Wed Jan 04 17:55:30 2012 -0600
@@ -149,13 +149,11 @@
-b
+a
+c
- diff --git a/a b/aa
- copy from a
- copy to aa
- --- a/a
+ diff --git a/aa b/aa
+ new file mode 100644
+ --- /dev/null
+++ b/aa
- @@ -1,1 +1,1 @@
- -b
+ @@ -0,0 +1,1 @@
+a
Check patcha2 is still a regular patch:
--- a/tests/test-mv-cp-st-diff.t Wed Jan 04 15:48:02 2012 -0600
+++ b/tests/test-mv-cp-st-diff.t Wed Jan 04 17:55:30 2012 -0600
@@ -560,6 +560,7 @@
- parent to root: --rev . --rev 0
M a
+ b
R b
diff --git a/a b/a
@@ -611,6 +612,7 @@
- parent to branch: --rev . --rev 2
M a
+ b
A x/y
R b
@@ -906,6 +908,7 @@
- parent to root: --rev . --rev 0
M a
+ b
R b
R c
@@ -975,6 +978,7 @@
- parent to branch: --rev . --rev 2
M a
+ b
A x/y
R b
R c