Mercurial: mercurial/copies.py comparison

comparison mercurial/copies.py @ 44758:45f3f35cefe7

copies: fix the changeset based algorithm regarding merge In 99ebde4fec99, we changed the list of files stored into the `files` field. This lead to the changeset centric copy algorithm to break in various merge situation involving merge. Older information could reach the merge through `p1`, and while information from `p2` was strictly fresher, it would get overwritten anyway. We update the situation with more details about which revision introduces rename information. This help use making the right decision in case of merge. We are now running a more comprehensive suite of test with include this kind of situation. The behavior differ slightly from the filelog based in a couple of instance. There is mostly two distinct cases: 1) there are conflicting rename information in a merge (different rename history on each side). In this case the filelog based implementation arbitrarily pick a side based on the file-revision-number. So it depends on a local factor. The changeset centric algorithm will use a deterministic approach, by picking the information coming from the first parent of the merge. This is stable across different clone. 2) rename information related to file that exist in both source and destination. The filelog based implementation do not even try to detect these, however the changeset centric one get them for "free" (it is simpler to detect them than not). The new implementation focus on correctness. Performance improvement will come later. Differential Revision: https://phab.mercurial-scm.org/D8244

author	Pierre-Yves David <pierre-yves.david@octobus.net>
date	Thu, 05 Mar 2020 17:55:05 +0100
parents	30862e226339
children	61719b9658b1

comparison

equal deleted inserted replaced

-:f365dfede78f
+:45f3f35cefe7
 * p1: revision number of first parent
 * p2: revision number of first parent
 * p1copies: mapping of copies from p1
 * p2copies: mapping of copies from p2
 * removed: a list of removed files
+* ismerged: a callback to know if file was merged in that revision
 """
 cl = repo.changelog
 parents = cl.parentrevs
+def get_ismerged(rev):
+ctx = repo[rev]
+def ismerged(path):
+if path not in ctx.files():
+return False
+fctx = ctx[path]
+parents = fctx._filelog.parents(fctx._filenode)
+nb_parents = 0
+for n in parents:
+if n != node.nullid:
+nb_parents += 1
+return nb_parents >= 2
+return ismerged
 if repo.filecopiesmode == b'changeset-sidedata':
 changelogrevision = cl.changelogrevision
 flags = cl.flags
 # time to save memory.
 merge_caches = {}
 def revinfo(rev):
 p1, p2 = parents(rev)
+value = None
 if flags(rev) & REVIDX_SIDEDATA:
 e = merge_caches.pop(rev, None)
 if e is not None:
 return e
 c = changelogrevision(rev)
 p1copies = c.p1copies
 p2copies = c.p2copies
 removed = c.filesremoved
 if p1 != node.nullrev and p2 != node.nullrev:
 # XXX some case we over cache, IGNORE
-merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
+value = merge_caches[rev] = (
+p1,
+p2,
+p1copies,
+p2copies,
+removed,
+get_ismerged(rev),
+)
 else:
 p1copies = {}
 p2copies = {}
 removed = []
-return p1, p2, p1copies, p2copies, removed
+if value is None:
+value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
+return value
 else:
 def revinfo(rev):
 p1, p2 = parents(rev)
 ctx = repo[rev]
 p1copies, p2copies = ctx._copies
 removed = ctx.filesremoved()
-return p1, p2, p1copies, p2copies, removed
+return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
 return revinfo
 def _changesetforwardcopies(a, b, match):
 repo = a.repo().unfiltered()
 children = {}
 revinfo = _revinfogetter(repo)
 cl = repo.changelog
+isancestor = cl.isancestorrev  # XXX we should had chaching to this.
 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
 mrset = set(missingrevs)
 roots = set()
 for r in missingrevs:
 for p in cl.parentrevs(r):
 iterrevs = set(from_head)
 iterrevs &= mrset
 iterrevs.update(roots)
 iterrevs.remove(b.rev())
 revs = sorted(iterrevs)
-return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
+return _combinechangesetcopies(
+revs, children, b.rev(), revinfo, match, isancestor
+)
-def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
+def _combinechangesetcopies(
+revs, children, targetrev, revinfo, match, isancestor
+):
 """combine the copies information for each item of iterrevs
 revs: sorted iterable of revision to visit
 children: a {parent: [children]} mapping.
 targetrev: the final copies destination revision (not in iterrevs)
 copies = all_copies.pop(r, None)
 if copies is None:
 # this is a root
 copies = {}
 for i, c in enumerate(children[r]):
-p1, p2, p1copies, p2copies, removed = revinfo(c)
+p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
 if r == p1:
 parent = 1
 childcopies = p1copies
 else:
 assert r == p2
 childcopies = {
 dst: src for dst, src in childcopies.items() if match(dst)
 }
 newcopies = copies
 if childcopies:
-newcopies = _chain(newcopies, childcopies)
+newcopies = copies.copy()
-# _chain makes a copies, we can avoid doing so in some
+for dest, source in pycompat.iteritems(childcopies):
-# simple/linear cases.
+prev = copies.get(source)
+if prev is not None and prev[1] is not None:
+source = prev[1]
+newcopies[dest] = (c, source)
 assert newcopies is not copies
 for f in removed:
 if f in newcopies:
 if newcopies is copies:
 # copy on write to avoid affecting potential other
 # branches.  when there are no other branches, this
 # could be avoided.
 newcopies = copies.copy()
-del newcopies[f]
+newcopies[f] = (c, None)
 othercopies = all_copies.get(c)
 if othercopies is None:
 all_copies[c] = newcopies
 else:
 # we are the second parent to work on c, we need to merge our
 # work with the other.
 #
-# Unlike when copies are stored in the filelog, we consider
-# it a copy even if the destination already existed on the
-# other branch. It's simply too expensive to check if the
-# file existed in the manifest.
-#
 # In case of conflict, parent 1 take precedence over parent 2.
 # This is an arbitrary choice made anew when implementing
 # changeset based copies. It was made without regards with
 # potential filelog related behavior.
 if parent == 1:
-othercopies.update(newcopies)
+_merge_copies_dict(
+othercopies, newcopies, isancestor, ismerged
+)
 else:
-newcopies.update(othercopies)
+_merge_copies_dict(
+newcopies, othercopies, isancestor, ismerged
+)
 all_copies[c] = newcopies
-return all_copies[targetrev]
+final_copies = {}
+for dest, (tt, source) in all_copies[targetrev].items():
+if source is not None:
+final_copies[dest] = source
+return final_copies
+def _merge_copies_dict(minor, major, isancestor, ismerged):
+"""merge two copies-mapping together, minor and major
+In case of conflict, value from "major" will be picked.
+- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
+ancestors of `high_rev`,
+- `ismerged(path)`: callable return True if `path` have been merged in the
+current revision,
+"""
+for dest, value in major.items():
+other = minor.get(dest)
+if other is None:
+minor[dest] = value
+else:
+new_tt = value[0]
+other_tt = other[0]
+if value[1] == other[1]:
+continue
+# content from "major" wins, unless it is older
+# than the branch point or there is a merge
+if (
+new_tt == other_tt
+or not isancestor(new_tt, other_tt)
+or ismerged(dest)
+):
+minor[dest] = value
 def _forwardcopies(a, b, base=None, match=None):
 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""

Mercurial > hg

comparison mercurial/copies.py @ 44758:45f3f35cefe7