copies: split up _chain() in naive chaining and filtering steps
authorMartin von Zweigbergk <martinvonz@google.com>
Thu, 09 May 2019 15:09:07 -0700
changeset 42373 f3d06d37e194
parent 42372 ba6ca4e80607
child 42374 65fa61ca20af
copies: split up _chain() in naive chaining and filtering steps The function now has two clearly defined steps. The first step is the actual chaining. This step is very cheap. The second step is filtering out invalid copies. This step is expensive. For changeset-centric copy tracing, I want to do the filtering step only at the end. This patch prepares for that. Differential Revision: https://phab.mercurial-scm.org/D6418
mercurial/copies.py
--- a/mercurial/copies.py	Fri May 24 09:24:47 2019 -0700
+++ b/mercurial/copies.py	Thu May 09 15:09:07 2019 -0700
@@ -107,8 +107,8 @@
     # This only occurs when a is a descendent of b or visa-versa.
     return min(limit, a, b)
 
-def _chain(src, dst, a, b):
-    """chain two sets of copies 'a' and 'b'"""
+def _chainandfilter(src, dst, a, b):
+    """chain two sets of copies 'a' and 'b' and filter result"""
 
     # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
     # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
@@ -123,31 +123,37 @@
     #   4   x   y   z   x->z
     #   5   -   x   y    -
     #   6   x   x   y   x->y
+    #
+    # _chain() takes care of chaining the copies in 'a' and 'b', but it
+    # cannot tell the difference between cases 1 and 2, between 3 and 4, or
+    # between 5 and 6, so it includes all cases in its result.
+    # Cases 1, 3, and 5 are then removed by _filter().
 
-    # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
-    # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
-    # overwritten later, and case 3 will be removed later.
+    t = _chain(a, b)
+    _filter(src, dst, t)
+    return t
+
+def _filter(src, dst, t):
+    """filters out invalid copies after chaining"""
+    for k, v in list(t.items()):
+        # remove copies from files that didn't exist
+        if v not in src:
+            del t[k]
+        # remove criss-crossed copies
+        elif k in src and v in dst:
+            del t[k]
+        # remove copies to files that were then removed
+        elif k not in dst:
+            del t[k]
+
+def _chain(a, b):
+    """chain two sets of copies 'a' and 'b'"""
     t = a.copy()
     for k, v in b.iteritems():
         if v in t:
-            # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
             t[k] = t[v]
         else:
-            # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
             t[k] = v
-
-    for k, v in list(t.items()):
-        # remove copies from files that didn't exist, i.e. case 5
-        if v not in src:
-            del t[k]
-        # remove criss-crossed copies, i.e. case 3
-        elif k in src and v in dst:
-            del t[k]
-        # remove copies to files that were then removed, i.e. case 1
-        # and file 'y' in cases 3 & 4 (in case of rename)
-        elif k not in dst:
-            del t[k]
-
     return t
 
 def _tracefile(fctx, am, limit):
@@ -307,7 +313,7 @@
             if not match.always():
                 childcopies = {dst: src for dst, src in childcopies.items()
                                if match(dst)}
-            childcopies = _chain(a, childctx, copies, childcopies)
+            childcopies = _chainandfilter(a, childctx, copies, childcopies)
             heapq.heappush(work, (c, parent, childcopies))
     assert False
 
@@ -323,7 +329,7 @@
 
         cm = _committedforwardcopies(a, b.p1(), match)
         # combine copies from dirstate if necessary
-        return _chain(a, b, cm, _dirstatecopies(b._repo, match))
+        return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
     return _committedforwardcopies(a, b, match)
 
 def _backwardrenames(a, b, match):
@@ -367,8 +373,8 @@
         return _backwardrenames(x, y, match=match)
     if debug:
         repo.ui.debug('debug.copies: search mode: combined\n')
-    return _chain(x, y, _backwardrenames(x, a, match=match),
-                  _forwardcopies(a, y, match=match))
+    return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
+                           _forwardcopies(a, y, match=match))
 
 def mergecopies(repo, c1, c2, base):
     """