Mercurial > hg-stable
changeset 45668:47ad23549b81
changing-files: add clean computation of changed file for merges
This is "a tad more complicated" than the previous cases. See inline
documentation for details (have fun).
Differential Revision: https://phab.mercurial-scm.org/D9128
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 29 Sep 2020 22:47:54 +0200 |
parents | 0303fc1f43f8 |
children | e53778ad64bf |
files | mercurial/metadata.py |
diffstat | 1 files changed, 215 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/metadata.py Tue Sep 29 22:46:29 2020 +0200 +++ b/mercurial/metadata.py Tue Sep 29 22:47:54 2020 +0200 @@ -1,3 +1,4 @@ +# coding: utf8 # metadata.py -- code related to various metadata computation and access. # # Copyright 2019 Google, Inc <martinvonz@google.com> @@ -238,18 +239,8 @@ elif p1.rev() == p2.rev(): # In the wild, one can encounter such "non-merge" return _process_linear(p1, ctx) - filescopies = computechangesetcopies(ctx) - filesadded = computechangesetfilesadded(ctx) - filesremoved = computechangesetfilesremoved(ctx) - filesmerged = computechangesetfilesmerged(ctx) - files = ChangingFiles() - files.update_touched(ctx.files()) - files.update_added(filesadded) - files.update_removed(filesremoved) - files.update_merged(filesmerged) - files.update_copies_from_p1(filescopies[0]) - files.update_copies_from_p2(filescopies[1]) - return files + else: + return _process_merge(p1, p2, ctx) def _process_root(ctx): @@ -301,6 +292,218 @@ return md +def _process_merge(p1_ctx, p2_ctx, ctx): + """compute the appropriate changed files for a changeset with two parents + + This is a more advance case. The information we need to record is summarise + in the following table: + + ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐ + │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │ + │ p2 ╲ p1 │ │ │ │ │ + ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ + │ │ │🄱 No Changes │🄳 No Changes │ │ + │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │ + │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │ + ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ + │ │🄶 No Changes │ │ │ │ + │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │ + │ │🄷 Deleted[1] │ │ │ │ + ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ + │ │🄸 No Changes │ │ │ │ + │ (None, Some) │ OR │ ø │🄼 Added │🄽 Merged │ + │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │ + ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ + │ │ │ │ │ │ + │ (Some, Some) │🄺 No Changes │ ø │🄾 Merged │🄿 Merged │ + │ │ [3] │ │ (copied?) │ (copied?) │ + └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘ + + Special case [1]: + + The situation is: + - parent-A: file exists, + - parent-B: no file, + - working-copy: no file. + + Detecting a "deletion" will depend on the presence of actual change on + the "parent-A" branch: + + Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged + compared to the merge ancestors, then parent-A branch left the file + untouched while parent-B deleted it. We simply apply the change from + "parent-B" branch the file was automatically dropped. + The result is: + - file is not recorded as touched by the merge. + + Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and + the file was "deleted again". From a user perspective, the message + about "locally changed" while "remotely deleted" (or the other way + around) was issued and the user chose to deleted the file. + The result: + - file is recorded as touched by the merge. + + + Special case [2]: + + The situation is: + - parent-A: no file, + - parent-B: file, + - working-copy: file (same content as parent-B). + + There are three subcases depending on the ancestors contents: + + - A) the file is missing in all ancestors, + - B) at least one ancestor has the file with filenode ≠ from parent-B, + - C) all ancestors use the same filenode as parent-B, + + Subcase (A) is the simpler, nothing happend on parent-A side while + parent-B added it. + + The result: + - the file is not marked as touched by the merge. + + Subcase (B) is the counter part of "Special case [1]", the file was + modified on parent-B side, while parent-A side deleted it. However this + time, the conflict was solved by keeping the file (and its + modification). We consider the file as "salvaged". + + The result: + - the file is marked as "salvaged" by the merge. + + Subcase (C) is subtle variation of the case above. In this case, the + file in unchanged on the parent-B side and actively removed on the + parent-A side. So the merge machinery correctly decide it should be + removed. However, the file was explicitly restored to its parent-B + content before the merge was commited. The file is be marked + as salvaged too. From the merge result perspective, this is similar to + Subcase (B), however from the merge resolution perspective they differ + since in (C), there was some conflict not obvious solution to the + merge (That got reversed) + + Special case [3]: + + The situation is: + - parent-A: file, + - parent-B: file (different filenode as parent-A), + - working-copy: file (same filenode as parent-B). + + This case is in theory much simple, for this to happens, this mean the + filenode in parent-A is purely replacing the one in parent-B (either a + descendant, or a full new file history, see changeset). So the merge + introduce no changes, and the file is not affected by the merge... + + However, in the wild it is possible to find commit with the above is not + True. For example repository have some commit where the *new* node is an + ancestor of the node in parent-A, or where parent-A and parent-B are two + branches of the same file history, yet not merge-filenode were created + (while the "merge" should have led to a "modification"). + + Detecting such cases (and not recording the file as modified) would be a + nice bonus. However do not any of this yet. + """ + + md = ChangingFiles() + + m = ctx.manifest() + p1m = p1_ctx.manifest() + p2m = p2_ctx.manifest() + diff_p1 = p1m.diff(m) + diff_p2 = p2m.diff(m) + + cahs = ctx.repo().changelog.commonancestorsheads( + p1_ctx.node(), p2_ctx.node() + ) + if not cahs: + cahs = [node.nullrev] + mas = [ctx.repo()[r].manifest() for r in cahs] + + copy_candidates = [] + + # Dealing with case 🄰 happens automatically. Since there are no entry in + # d1 nor d2, we won't iterate on it ever. + + # Iteration over d1 content will deal with all cases, but the one in the + # first column of the table. + for filename, d1 in diff_p1.items(): + + d2 = diff_p2.pop(filename, None) + + if d2 is None: + # this deal with the first line of the table. + _process_other_unchanged(md, mas, filename, d1) + else: + + if d1[0][0] is None and d2[0][0] is None: + # case 🄼 — both deleted the file. + md.mark_added(filename) + copy_candidates.append(filename) + elif d1[1][0] is None and d2[1][0] is None: + # case 🄻 — both deleted the file. + md.mark_removed(filename) + elif d1[1][0] is not None and d2[1][0] is not None: + # case 🄽 🄾 🄿 + md.mark_merged(filename) + copy_candidates.append(filename) + else: + # Impossible case, the post-merge file status cannot be None on + # one side and Something on the other side. + assert False, "unreachable" + + # Iteration over remaining d2 content deal with the first column of the + # table. + for filename, d2 in diff_p2.items(): + _process_other_unchanged(md, mas, filename, d2) + + for filename in copy_candidates: + copy_info = ctx[filename].renamed() + if copy_info: + source, srcnode = copy_info + if source in p1_ctx and p1_ctx[source].filenode() == srcnode: + md.mark_copied_from_p1(source, filename) + elif source in p2_ctx and p2_ctx[source].filenode() == srcnode: + md.mark_copied_from_p2(source, filename) + return md + + +def _find(manifest, filename): + """return the associate filenode or None""" + if filename not in manifest: + return None + return manifest.find(filename)[0] + + +def _process_other_unchanged(md, mas, filename, diff): + source_node = diff[0][0] + target_node = diff[1][0] + + if source_node is not None and target_node is None: + if any(not _find(ma, filename) == source_node for ma in mas): + # case 🄲 of 🄷 + md.mark_removed(filename) + # else, we have case 🄱 or 🄶 : no change need to be recorded + elif source_node is None and target_node is not None: + if any(_find(ma, filename) is not None for ma in mas): + # case 🄴 or 🄹 + md.mark_salvaged(filename) + # else, we have case 🄳 or 🄸 : simple merge without intervention + elif source_node is not None and target_node is not None: + # case 🄵 or 🄺 : simple merge without intervention + # + # In buggy case where source_node is not an ancestors of target_node. + # There should have a been a new filenode created, recording this as + # "modified". We do not deal with them yet. + pass + else: + # An impossible case, the diff algorithm should not return entry if the + # file is missing on both side. + assert False, "unreachable" + + +def _missing_from_all_ancestors(mas, filename): + return all(_find(ma, filename) is None for ma in mas) + + def computechangesetfilesadded(ctx): """return the list of files added in a changeset """