comparison mercurial/copies.py @ 41756:49ad315b39ee

copies: do copy tracing based on ctx.p[12]copies() if configured This adds an option to do copy tracing in a changeset-optimized way. If the metadata is stored in filelogs, this is obviously going to be suboptimal. The point is that it provides a way of transitioning to changeset-stored metadata. Some of the tests behave a little differently, but they all seem resonable to me. The config option may very well be renamed later when it's clearer what options we want and how they will behave. When the test suite is run with --extra-config-opt to use the new copy tracing, all tests pass, besides test-copies.t (which fails in the same way as you can see in this patch). `hg debugpathcopies 4.0 4.8` reports 82 copies. With this option enabled, the only difference is this: -mercurial/pure/bdiff.py -> mercurial/cffi/bdiff.py +setup_bdiff_cffi.py -> mercurial/cffi/bdiff.py I believe that happened because it was renamed in different ways on different sides of a merge and the new algorithm arbitrarily prefers copies that happened on p1. The runtime is about 0.85 seconds with the old copy tracing and 5.7 seconds with the new copy tracing. That's kind of slow, but actually better than I had expected. Differential Revision: https://phab.mercurial-scm.org/D5991
author Martin von Zweigbergk <martinvonz@google.com>
date Tue, 19 Feb 2019 15:42:45 -0800
parents d5edb5d3a337
children 7694b685bb10
comparison
equal deleted inserted replaced
41755:a4358f7345b4 41756:49ad315b39ee
164 def _committedforwardcopies(a, b, match): 164 def _committedforwardcopies(a, b, match):
165 """Like _forwardcopies(), but b.rev() cannot be None (working copy)""" 165 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
166 # files might have to be traced back to the fctx parent of the last 166 # files might have to be traced back to the fctx parent of the last
167 # one-side-only changeset, but not further back than that 167 # one-side-only changeset, but not further back than that
168 repo = a._repo 168 repo = a._repo
169
170 if repo.ui.config('experimental', 'copies.read-from') == 'compatibility':
171 return _changesetforwardcopies(a, b, match)
172
169 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies') 173 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
170 dbg = repo.ui.debug 174 dbg = repo.ui.debug
171 if debug: 175 if debug:
172 dbg('debug.copies: looking into rename from %s to %s\n' 176 dbg('debug.copies: looking into rename from %s to %s\n'
173 % (a, b)) 177 % (a, b))
213 cm[f] = ofctx.path() 217 cm[f] = ofctx.path()
214 if debug: 218 if debug:
215 dbg('debug.copies: time: %f seconds\n' 219 dbg('debug.copies: time: %f seconds\n'
216 % (util.timer() - start)) 220 % (util.timer() - start))
217 return cm 221 return cm
222
223 def _changesetforwardcopies(a, b, match):
224 if a.rev() == node.nullrev:
225 return {}
226
227 repo = a.repo()
228 children = {}
229 cl = repo.changelog
230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 for r in missingrevs:
232 for p in cl.parentrevs(r):
233 if p == node.nullrev:
234 continue
235 if p not in children:
236 children[p] = [r]
237 else:
238 children[p].append(r)
239
240 roots = set(children) - set(missingrevs)
241 # 'work' contains 3-tuples of a (revision number, parent number, copies).
242 # The parent number is only used for knowing which parent the copies dict
243 # came from.
244 work = [(r, 1, {}) for r in roots]
245 heapq.heapify(work)
246 while work:
247 r, i1, copies1 = heapq.heappop(work)
248 if work and work[0][0] == r:
249 # We are tracing copies from both parents
250 r, i2, copies2 = heapq.heappop(work)
251 copies = {}
252 ctx = repo[r]
253 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
254 allcopies = set(copies1) | set(copies2)
255 # TODO: perhaps this filtering should be done as long as ctx
256 # is merge, whether or not we're tracing from both parent.
257 for dst in allcopies:
258 if not match(dst):
259 continue
260 if dst not in copies2:
261 # Copied on p1 side: mark as copy from p1 side if it didn't
262 # already exist on p2 side
263 if dst not in p2man:
264 copies[dst] = copies1[dst]
265 elif dst not in copies1:
266 # Copied on p2 side: mark as copy from p2 side if it didn't
267 # already exist on p1 side
268 if dst not in p1man:
269 copies[dst] = copies2[dst]
270 else:
271 # Copied on both sides: mark as copy from p1 side
272 copies[dst] = copies1[dst]
273 else:
274 copies = copies1
275 if r == b.rev():
276 return copies
277 for c in children[r]:
278 childctx = repo[c]
279 if r == childctx.p1().rev():
280 parent = 1
281 childcopies = childctx.p1copies()
282 else:
283 assert r == childctx.p2().rev()
284 parent = 2
285 childcopies = childctx.p2copies()
286 if not match.always():
287 childcopies = {dst: src for dst, src in childcopies.items()
288 if match(dst)}
289 childcopies = _chain(a, childctx, copies, childcopies)
290 heapq.heappush(work, (c, parent, childcopies))
291 assert False
218 292
219 def _forwardcopies(a, b, match=None): 293 def _forwardcopies(a, b, match=None):
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b""" 294 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221 295
222 match = a.repo().narrowmatch(match) 296 match = a.repo().narrowmatch(match)