comparison mercurial/copies.py @ 15775:91eb4512edd0

copies: rewrite copy detection for non-merge users The existing copy detection API was designed with merge in mind and was ill-suited for doing status/diff. The new pathcopies implementation gives more accurate, easier to use results for comparing two revisions, and is much simpler to understand. Test notes: - test-mv-cp-st.t results finds more renames in the reverse direction now - test-mq-merge.t was always wrong and duplicated a copy in diff that was already present in one of the parent revisions
author Matt Mackall <mpm@selenic.com>
date Wed, 04 Jan 2012 17:55:30 -0600
parents 0bd17a4bed88
children d1c74c6151c9
comparison
equal deleted inserted replaced
15774:0bd17a4bed88 15775:91eb4512edd0
82 82
83 if not hascommonancestor: 83 if not hascommonancestor:
84 return None 84 return None
85 return limit 85 return limit
86 86
87 def pathcopies(c1, c2): 87 def _chain(src, dst, a, b):
88 return mergecopies(c1._repo, c1, c2, c1._repo["null"], False)[0] 88 '''chain two sets of copies a->b'''
89 t = a.copy()
90 for k, v in b.iteritems():
91 if v in t:
92 # found a chain
93 if t[v] != k:
94 # file wasn't renamed back to itself
95 t[k] = t[v]
96 if v not in dst:
97 # chain was a rename, not a copy
98 del t[v]
99 if v in src:
100 # file is a copy of an existing file
101 t[k] = v
102 return t
103
104 def _tracefile(fctx, actx):
105 '''return file context that is the ancestor of fctx present in actx'''
106 stop = actx.rev()
107 am = actx.manifest()
108
109 for f in fctx.ancestors():
110 if am.get(f.path(), None) == f.filenode():
111 return f
112 if f.rev() < stop:
113 return None
114
115 def _dirstatecopies(d):
116 ds = d._repo.dirstate
117 c = ds.copies().copy()
118 for k in c.keys():
119 if ds[k] not in 'anm':
120 del c[k]
121 return c
122
123 def _forwardcopies(a, b):
124 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
125
126 # check for working copy
127 w = None
128 if b.rev() is None:
129 w = b
130 b = w.p1()
131 if a == b:
132 # short-circuit to avoid issues with merge states
133 return _dirstatecopies(w)
134
135 # find where new files came from
136 # we currently don't try to find where old files went, too expensive
137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 cm = {}
139 for f in b:
140 if f not in a:
141 ofctx = _tracefile(b[f], a)
142 if ofctx:
143 cm[f] = ofctx.path()
144
145 # combine copies from dirstate if necessary
146 if w is not None:
147 cm = _chain(a, w, cm, _dirstatecopies(w))
148
149 return cm
150
151 def _backwardcopies(a, b):
152 # because the forward mapping is 1:n, we can lose renames here
153 # in particular, we find renames better than copies
154 f = _forwardcopies(b, a)
155 r = {}
156 for k, v in f.iteritems():
157 r[v] = k
158 return r
159
160 def pathcopies(x, y):
161 '''find {dst@y: src@x} copy mapping for directed compare'''
162 if x == y or not x or not y:
163 return {}
164 a = y.ancestor(x)
165 if a == x:
166 return _forwardcopies(x, y)
167 if a == y:
168 return _backwardcopies(x, y)
169 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
89 170
90 def mergecopies(repo, c1, c2, ca, checkdirs=True): 171 def mergecopies(repo, c1, c2, ca, checkdirs=True):
91 """ 172 """
92 Find moves and copies between context c1 and c2 173 Find moves and copies between context c1 and c2
93 """ 174 """