copies: do copy tracing based on ctx.p[12]copies() if configured
This adds an option to do copy tracing in a changeset-optimized
way. If the metadata is stored in filelogs, this is obviously going to
be suboptimal. The point is that it provides a way of transitioning to
changeset-stored metadata.
Some of the tests behave a little differently, but they all seem
resonable to me.
The config option may very well be renamed later when it's clearer
what options we want and how they will behave.
When the test suite is run with --extra-config-opt to use the new copy
tracing, all tests pass, besides test-copies.t (which fails in the
same way as you can see in this patch).
`hg debugpathcopies 4.0 4.8` reports 82 copies. With this option
enabled, the only difference is this:
-mercurial/pure/bdiff.py -> mercurial/cffi/bdiff.py
+setup_bdiff_cffi.py -> mercurial/cffi/bdiff.py
I believe that happened because it was renamed in different ways on
different sides of a merge and the new algorithm arbitrarily prefers
copies that happened on p1. The runtime is about 0.85 seconds with the
old copy tracing and 5.7 seconds with the new copy tracing. That's
kind of slow, but actually better than I had expected.
Differential Revision: https://phab.mercurial-scm.org/D5991
--- a/mercurial/configitems.py Fri Jan 18 13:13:30 2019 -0800
+++ b/mercurial/configitems.py Tue Feb 19 15:42:45 2019 -0800
@@ -482,6 +482,9 @@
coreconfigitem('experimental', 'copytrace.sourcecommitlimit',
default=100,
)
+coreconfigitem('experimental', 'copies.read-from',
+ default="filelog-only",
+)
coreconfigitem('experimental', 'crecordtest',
default=None,
)
--- a/mercurial/copies.py Fri Jan 18 13:13:30 2019 -0800
+++ b/mercurial/copies.py Tue Feb 19 15:42:45 2019 -0800
@@ -166,6 +166,10 @@
# files might have to be traced back to the fctx parent of the last
# one-side-only changeset, but not further back than that
repo = a._repo
+
+ if repo.ui.config('experimental', 'copies.read-from') == 'compatibility':
+ return _changesetforwardcopies(a, b, match)
+
debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
dbg = repo.ui.debug
if debug:
@@ -216,6 +220,76 @@
% (util.timer() - start))
return cm
+def _changesetforwardcopies(a, b, match):
+ if a.rev() == node.nullrev:
+ return {}
+
+ repo = a.repo()
+ children = {}
+ cl = repo.changelog
+ missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
+ for r in missingrevs:
+ for p in cl.parentrevs(r):
+ if p == node.nullrev:
+ continue
+ if p not in children:
+ children[p] = [r]
+ else:
+ children[p].append(r)
+
+ roots = set(children) - set(missingrevs)
+ # 'work' contains 3-tuples of a (revision number, parent number, copies).
+ # The parent number is only used for knowing which parent the copies dict
+ # came from.
+ work = [(r, 1, {}) for r in roots]
+ heapq.heapify(work)
+ while work:
+ r, i1, copies1 = heapq.heappop(work)
+ if work and work[0][0] == r:
+ # We are tracing copies from both parents
+ r, i2, copies2 = heapq.heappop(work)
+ copies = {}
+ ctx = repo[r]
+ p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
+ allcopies = set(copies1) | set(copies2)
+ # TODO: perhaps this filtering should be done as long as ctx
+ # is merge, whether or not we're tracing from both parent.
+ for dst in allcopies:
+ if not match(dst):
+ continue
+ if dst not in copies2:
+ # Copied on p1 side: mark as copy from p1 side if it didn't
+ # already exist on p2 side
+ if dst not in p2man:
+ copies[dst] = copies1[dst]
+ elif dst not in copies1:
+ # Copied on p2 side: mark as copy from p2 side if it didn't
+ # already exist on p1 side
+ if dst not in p1man:
+ copies[dst] = copies2[dst]
+ else:
+ # Copied on both sides: mark as copy from p1 side
+ copies[dst] = copies1[dst]
+ else:
+ copies = copies1
+ if r == b.rev():
+ return copies
+ for c in children[r]:
+ childctx = repo[c]
+ if r == childctx.p1().rev():
+ parent = 1
+ childcopies = childctx.p1copies()
+ else:
+ assert r == childctx.p2().rev()
+ parent = 2
+ childcopies = childctx.p2copies()
+ if not match.always():
+ childcopies = {dst: src for dst, src in childcopies.items()
+ if match(dst)}
+ childcopies = _chain(a, childctx, copies, childcopies)
+ heapq.heappush(work, (c, parent, childcopies))
+ assert False
+
def _forwardcopies(a, b, match=None):
"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""
--- a/tests/test-copies.t Fri Jan 18 13:13:30 2019 -0800
+++ b/tests/test-copies.t Tue Feb 19 15:42:45 2019 -0800
@@ -1,9 +1,17 @@
+#testcases filelog compatibility
$ cat >> $HGRCPATH << EOF
> [alias]
> l = log -G -T '{rev} {desc}\n{files}\n'
> EOF
+#if compatibility
+ $ cat >> $HGRCPATH << EOF
+ > [experimental]
+ > copies.read-from = compatibility
+ > EOF
+#endif
+
$ REPONUM=0
$ newrepo() {
> cd $TESTTMP
@@ -338,7 +346,7 @@
$ hg debugpathcopies 1 2
x -> z
$ hg debugpathcopies 0 2
- x -> z
+ x -> z (filelog !)
Copy file that exists on both sides of the merge, different content
$ newrepo
@@ -476,7 +484,8 @@
$ hg debugpathcopies 1 4
$ hg debugpathcopies 2 4
$ hg debugpathcopies 0 4
- x -> z
+ x -> z (filelog !)
+ y -> z (compatibility !)
$ hg debugpathcopies 1 5
$ hg debugpathcopies 2 5
$ hg debugpathcopies 0 5