--- a/mercurial/copies.py Fri Apr 24 15:06:42 2020 -0400
+++ b/mercurial/copies.py Thu Mar 05 17:55:05 2020 +0100
@@ -183,10 +183,27 @@
* p1copies: mapping of copies from p1
* p2copies: mapping of copies from p2
* removed: a list of removed files
+ * ismerged: a callback to know if file was merged in that revision
"""
cl = repo.changelog
parents = cl.parentrevs
+ def get_ismerged(rev):
+ ctx = repo[rev]
+
+ def ismerged(path):
+ if path not in ctx.files():
+ return False
+ fctx = ctx[path]
+ parents = fctx._filelog.parents(fctx._filenode)
+ nb_parents = 0
+ for n in parents:
+ if n != node.nullid:
+ nb_parents += 1
+ return nb_parents >= 2
+
+ return ismerged
+
if repo.filecopiesmode == b'changeset-sidedata':
changelogrevision = cl.changelogrevision
flags = cl.flags
@@ -218,6 +235,7 @@
def revinfo(rev):
p1, p2 = parents(rev)
+ value = None
if flags(rev) & REVIDX_SIDEDATA:
e = merge_caches.pop(rev, None)
if e is not None:
@@ -228,12 +246,22 @@
removed = c.filesremoved
if p1 != node.nullrev and p2 != node.nullrev:
# XXX some case we over cache, IGNORE
- merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
+ value = merge_caches[rev] = (
+ p1,
+ p2,
+ p1copies,
+ p2copies,
+ removed,
+ get_ismerged(rev),
+ )
else:
p1copies = {}
p2copies = {}
removed = []
- return p1, p2, p1copies, p2copies, removed
+
+ if value is None:
+ value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
+ return value
else:
@@ -242,7 +270,7 @@
ctx = repo[rev]
p1copies, p2copies = ctx._copies
removed = ctx.filesremoved()
- return p1, p2, p1copies, p2copies, removed
+ return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
return revinfo
@@ -256,6 +284,7 @@
revinfo = _revinfogetter(repo)
cl = repo.changelog
+ isancestor = cl.isancestorrev # XXX we should had chaching to this.
missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
mrset = set(missingrevs)
roots = set()
@@ -283,10 +312,14 @@
iterrevs.update(roots)
iterrevs.remove(b.rev())
revs = sorted(iterrevs)
- return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
+ return _combinechangesetcopies(
+ revs, children, b.rev(), revinfo, match, isancestor
+ )
-def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
+def _combinechangesetcopies(
+ revs, children, targetrev, revinfo, match, isancestor
+):
"""combine the copies information for each item of iterrevs
revs: sorted iterable of revision to visit
@@ -305,7 +338,7 @@
# this is a root
copies = {}
for i, c in enumerate(children[r]):
- p1, p2, p1copies, p2copies, removed = revinfo(c)
+ p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
if r == p1:
parent = 1
childcopies = p1copies
@@ -319,9 +352,12 @@
}
newcopies = copies
if childcopies:
- newcopies = _chain(newcopies, childcopies)
- # _chain makes a copies, we can avoid doing so in some
- # simple/linear cases.
+ newcopies = copies.copy()
+ for dest, source in pycompat.iteritems(childcopies):
+ prev = copies.get(source)
+ if prev is not None and prev[1] is not None:
+ source = prev[1]
+ newcopies[dest] = (c, source)
assert newcopies is not copies
for f in removed:
if f in newcopies:
@@ -330,7 +366,7 @@
# branches. when there are no other branches, this
# could be avoided.
newcopies = copies.copy()
- del newcopies[f]
+ newcopies[f] = (c, None)
othercopies = all_copies.get(c)
if othercopies is None:
all_copies[c] = newcopies
@@ -338,21 +374,55 @@
# we are the second parent to work on c, we need to merge our
# work with the other.
#
- # Unlike when copies are stored in the filelog, we consider
- # it a copy even if the destination already existed on the
- # other branch. It's simply too expensive to check if the
- # file existed in the manifest.
- #
# In case of conflict, parent 1 take precedence over parent 2.
# This is an arbitrary choice made anew when implementing
# changeset based copies. It was made without regards with
# potential filelog related behavior.
if parent == 1:
- othercopies.update(newcopies)
+ _merge_copies_dict(
+ othercopies, newcopies, isancestor, ismerged
+ )
else:
- newcopies.update(othercopies)
+ _merge_copies_dict(
+ newcopies, othercopies, isancestor, ismerged
+ )
all_copies[c] = newcopies
- return all_copies[targetrev]
+
+ final_copies = {}
+ for dest, (tt, source) in all_copies[targetrev].items():
+ if source is not None:
+ final_copies[dest] = source
+ return final_copies
+
+
+def _merge_copies_dict(minor, major, isancestor, ismerged):
+ """merge two copies-mapping together, minor and major
+
+ In case of conflict, value from "major" will be picked.
+
+ - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
+ ancestors of `high_rev`,
+
+ - `ismerged(path)`: callable return True if `path` have been merged in the
+ current revision,
+ """
+ for dest, value in major.items():
+ other = minor.get(dest)
+ if other is None:
+ minor[dest] = value
+ else:
+ new_tt = value[0]
+ other_tt = other[0]
+ if value[1] == other[1]:
+ continue
+ # content from "major" wins, unless it is older
+ # than the branch point or there is a merge
+ if (
+ new_tt == other_tt
+ or not isancestor(new_tt, other_tt)
+ or ismerged(dest)
+ ):
+ minor[dest] = value
def _forwardcopies(a, b, base=None, match=None):
--- a/tests/test-copies-chain-merge.t Fri Apr 24 15:06:42 2020 -0400
+++ b/tests/test-copies-chain-merge.t Thu Mar 05 17:55:05 2020 +0100
@@ -1,3 +1,5 @@
+#testcases filelog compatibility sidedata
+
=====================================================
Test Copy tracing for chain of copies involving merge
=====================================================
@@ -6,6 +8,7 @@
are involved. It cheks we do not have unwanted update of behavior and that the
different options to retrieve copies behave correctly.
+
Setup
=====
@@ -18,6 +21,22 @@
> logtemplate={rev} {desc}\n
> EOF
+#if compatibility
+ $ cat >> $HGRCPATH << EOF
+ > [experimental]
+ > copies.read-from = compatibility
+ > EOF
+#endif
+
+#if sidedata
+ $ cat >> $HGRCPATH << EOF
+ > [format]
+ > exp-use-side-data = yes
+ > exp-use-copies-side-data-changeset = yes
+ > EOF
+#endif
+
+
$ hg init repo-chain
$ cd repo-chain
@@ -453,17 +472,26 @@
0 4 0dd616bc7ab1 000000000000 000000000000
1 10 6da5a2eecb9c 000000000000 000000000000
2 19 eb806e34ef6b 0dd616bc7ab1 6da5a2eecb9c
+
+# Here the filelog based implementation is not looking at the rename
+# information (because the file exist on both side). However the changelog
+# based on works fine. We have different output.
+
$ hg status --copies --rev 'desc("a-2")' --rev 'desc("mAEm-0")'
M f
+ b (no-filelog !)
R b
$ hg status --copies --rev 'desc("a-2")' --rev 'desc("mEAm-0")'
M f
+ b (no-filelog !)
R b
$ hg status --copies --rev 'desc("e-2")' --rev 'desc("mAEm-0")'
M f
+ d (no-filelog !)
R d
$ hg status --copies --rev 'desc("e-2")' --rev 'desc("mEAm-0")'
M f
+ d (no-filelog !)
R d
$ hg status --copies --rev 'desc("i-2")' --rev 'desc("a-2")'
A f
@@ -473,6 +501,18 @@
A f
b
R b
+
+# From here, we run status against revision where both source file exists.
+#
+# The filelog based implementation picks an arbitrary side based on revision
+# numbers. So the same side "wins" whatever the parents order is. This is
+# sub-optimal because depending on revision numbers means the result can be
+# different from one repository to the next.
+#
+# The changeset based algorithm use the parent order to break tie on conflicting
+# information and will have a different order depending on who is p1 and p2.
+# That order is stable accross repositories. (data from p1 prevails)
+
$ hg status --copies --rev 'desc("i-2")' --rev 'desc("mAEm-0")'
A f
d
@@ -480,7 +520,8 @@
R d
$ hg status --copies --rev 'desc("i-2")' --rev 'desc("mEAm-0")'
A f
- d
+ d (filelog !)
+ b (no-filelog !)
R b
R d
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mAEm-0")'
@@ -490,7 +531,8 @@
R b
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mEAm-0")'
A f
- a
+ a (filelog !)
+ b (no-filelog !)
R a
R b
@@ -563,21 +605,25 @@
R h
$ hg status --copies --rev 'desc("b-1")' --rev 'desc("mBFm-0")'
M d
+ h (no-filelog !)
R h
$ hg status --copies --rev 'desc("f-2")' --rev 'desc("mBFm-0")'
M b
$ hg status --copies --rev 'desc("f-1")' --rev 'desc("mBFm-0")'
M b
M d
+ i (no-filelog !)
R i
$ hg status --copies --rev 'desc("b-1")' --rev 'desc("mFBm-0")'
M d
+ h (no-filelog !)
R h
$ hg status --copies --rev 'desc("f-2")' --rev 'desc("mFBm-0")'
M b
$ hg status --copies --rev 'desc("f-1")' --rev 'desc("mFBm-0")'
M b
M d
+ i (no-filelog !)
R i
The following graphlog is wrong, the "a -> c -> d" chain was overwritten and should not appear.
@@ -645,9 +691,15 @@
|
o 0 i-0 initial commit: a b h
+One side of the merge have a long history with rename. The other side of the
+merge point to a new file with a smaller history. Each side is "valid".
+
+(and again the filelog based algorithm only explore one, with a pick based on
+revision numbers)
+
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mDGm-0")'
A d
- a
+ a (filelog !)
R a
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mGDm-0")'
A d
@@ -740,7 +792,8 @@
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mFGm-0")'
A d
- a
+ h (no-filelog !)
+ a (filelog !)
R a
R h
$ hg status --copies --rev 'desc("i-0")' --rev 'desc("mGFm-0")'
@@ -754,15 +807,19 @@
M d
$ hg status --copies --rev 'desc("f-1")' --rev 'desc("mFGm-0")'
M d
+ i (no-filelog !)
R i
$ hg status --copies --rev 'desc("f-1")' --rev 'desc("mGFm-0")'
M d
+ i (no-filelog !)
R i
$ hg status --copies --rev 'desc("g-1")' --rev 'desc("mFGm-0")'
M d
+ h (no-filelog !)
R h
$ hg status --copies --rev 'desc("g-1")' --rev 'desc("mGFm-0")'
M d
+ h (no-filelog !)
R h
$ hg log -Gfr 'desc("mFGm-0")' d