Mercurial > hg

--- a/mercurial/similar.py	Mon Jan 09 17:58:19 2017 +0900
+++ b/mercurial/similar.py	Sat Jan 07 20:47:57 2017 -0800
@@ -43,6 +43,27 @@
     # Done
     repo.ui.progress(_('searching for exact renames'), None)

+@util.cachefunc
+def _ctxdata(fctx):
+    # lazily load text
+    orig = fctx.data()
+    return orig, mdiff.splitnewlines(orig)
+
+@util.cachefunc
+def score(fctx1, fctx2):
+    text = fctx1.data()
+    orig, lines = _ctxdata(fctx2)
+    # bdiff.blocks() returns blocks of matching lines
+    # count the number of bytes in each
+    equal = 0
+    matches = bdiff.blocks(text, orig)
+    for x1, x2, y1, y2 in matches:
+        for line in lines[y1:y2]:
+            equal += len(line)
+
+    lengths = len(text) + len(orig)
+    return equal * 2.0 / lengths
+
 def _findsimilarmatches(repo, added, removed, threshold):
     '''find potentially renamed files based on similar file content

@@ -54,28 +75,9 @@
         repo.ui.progress(_('searching for similar files'), i,
                          total=len(removed), unit=_('files'))

-        # lazily load text
-        @util.cachefunc
-        def data():
-            orig = r.data()
-            return orig, mdiff.splitnewlines(orig)
-
-        def score(text):
-            orig, lines = data()
-            # bdiff.blocks() returns blocks of matching lines
-            # count the number of bytes in each
-            equal = 0
-            matches = bdiff.blocks(text, orig)
-            for x1, x2, y1, y2 in matches:
-                for line in lines[y1:y2]:
-                    equal += len(line)
-
-            lengths = len(text) + len(orig)
-            return equal * 2.0 / lengths
-
         for a in added:
             bestscore = copies.get(a, (None, threshold))[1]
-            myscore = score(a.data())
+            myscore = score(a, r)
             if myscore >= bestscore:
                 copies[a] = (r, myscore)
     repo.ui.progress(_('searching'), None)