changeset 44941:edd08aa193fb

files: extract code for extra filtering of the `removed` entry into copies We want to reduce the set of `removed` files that to the set of files actually removed. That `removed` set is used as of the changeset centric algorithm, having smaller sets means less processing and faster computation. In this changeset we extract the code to be a function of it own. We will make use of it in the next changesets. Differential Revision: https://phab.mercurial-scm.org/D8588
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 27 May 2020 12:45:39 +0200
parents 4c1d39215034
children 25512a65cefd
files mercurial/localrepo.py mercurial/metadata.py
diffstat 2 files changed, 59 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/localrepo.py	Wed May 27 12:26:08 2020 +0200
+++ b/mercurial/localrepo.py	Wed May 27 12:45:39 2020 +0200
@@ -46,6 +46,7 @@
     match as matchmod,
     mergestate as mergestatemod,
     mergeutil,
+    metadata,
     namespaces,
     narrowspec,
     obsolete,
@@ -3145,51 +3146,8 @@
                 for f in drop:
                     del m[f]
                 if p2.rev() != nullrev:
-
-                    @util.cachefunc
-                    def mas():
-                        p1n = p1.node()
-                        p2n = p2.node()
-                        cahs = self.changelog.commonancestorsheads(p1n, p2n)
-                        if not cahs:
-                            cahs = [nullrev]
-                        return [self[r].manifest() for r in cahs]
-
-                    def deletionfromparent(f):
-                        # When a file is removed relative to p1 in a merge, this
-                        # function determines whether the absence is due to a
-                        # deletion from a parent, or whether the merge commit
-                        # itself deletes the file. We decide this by doing a
-                        # simplified three way merge of the manifest entry for
-                        # the file. There are two ways we decide the merge
-                        # itself didn't delete a file:
-                        # - neither parent (nor the merge) contain the file
-                        # - exactly one parent contains the file, and that
-                        #   parent has the same filelog entry as the merge
-                        #   ancestor (or all of them if there two). In other
-                        #   words, that parent left the file unchanged while the
-                        #   other one deleted it.
-                        # One way to think about this is that deleting a file is
-                        # similar to emptying it, so the list of changed files
-                        # should be similar either way. The computation
-                        # described above is not done directly in _filecommit
-                        # when creating the list of changed files, however
-                        # it does something very similar by comparing filelog
-                        # nodes.
-                        if f in m1:
-                            return f not in m2 and all(
-                                f in ma and ma.find(f) == m1.find(f)
-                                for ma in mas()
-                            )
-                        elif f in m2:
-                            return all(
-                                f in ma and ma.find(f) == m2.find(f)
-                                for ma in mas()
-                            )
-                        else:
-                            return True
-
-                    removed = [f for f in removed if not deletionfromparent(f)]
+                    rf = metadata.get_removal_filter(ctx, (p1, p2, m1, m2))
+                    removed = [f for f in removed if not rf(f)]
 
                 files = changed + removed
                 md = None
--- a/mercurial/metadata.py	Wed May 27 12:26:08 2020 +0200
+++ b/mercurial/metadata.py	Wed May 27 12:45:39 2020 +0200
@@ -11,6 +11,7 @@
 
 from . import (
     error,
+    node,
     pycompat,
     util,
 )
@@ -31,6 +32,61 @@
     return added
 
 
+def get_removal_filter(ctx, x=None):
+    """return a function to detect files "wrongly" detected as `removed`
+
+    When a file is removed relative to p1 in a merge, this
+    function determines whether the absence is due to a
+    deletion from a parent, or whether the merge commit
+    itself deletes the file. We decide this by doing a
+    simplified three way merge of the manifest entry for
+    the file. There are two ways we decide the merge
+    itself didn't delete a file:
+    - neither parent (nor the merge) contain the file
+    - exactly one parent contains the file, and that
+      parent has the same filelog entry as the merge
+      ancestor (or all of them if there two). In other
+      words, that parent left the file unchanged while the
+      other one deleted it.
+    One way to think about this is that deleting a file is
+    similar to emptying it, so the list of changed files
+    should be similar either way. The computation
+    described above is not done directly in _filecommit
+    when creating the list of changed files, however
+    it does something very similar by comparing filelog
+    nodes.
+    """
+
+    if x is not None:
+        p1, p2, m1, m2 = x
+    else:
+        p1 = ctx.p1()
+        p2 = ctx.p2()
+        m1 = p1.manifest()
+        m2 = p2.manifest()
+
+    @util.cachefunc
+    def mas():
+        p1n = p1.node()
+        p2n = p2.node()
+        cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
+        if not cahs:
+            cahs = [node.nullrev]
+        return [ctx.repo()[r].manifest() for r in cahs]
+
+    def deletionfromparent(f):
+        if f in m1:
+            return f not in m2 and all(
+                f in ma and ma.find(f) == m1.find(f) for ma in mas()
+            )
+        elif f in m2:
+            return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
+        else:
+            return True
+
+    return deletionfromparent
+
+
 def computechangesetfilesremoved(ctx):
     """return the list of files removed in a changeset
     """