similar: move score function to module level
Future patches will use this to report the similarity of a rename / copy
in the patch output.
--- a/mercurial/similar.py Mon Jan 09 17:58:19 2017 +0900
+++ b/mercurial/similar.py Sat Jan 07 20:47:57 2017 -0800
@@ -43,6 +43,27 @@
# Done
repo.ui.progress(_('searching for exact renames'), None)
+@util.cachefunc
+def _ctxdata(fctx):
+ # lazily load text
+ orig = fctx.data()
+ return orig, mdiff.splitnewlines(orig)
+
+@util.cachefunc
+def score(fctx1, fctx2):
+ text = fctx1.data()
+ orig, lines = _ctxdata(fctx2)
+ # bdiff.blocks() returns blocks of matching lines
+ # count the number of bytes in each
+ equal = 0
+ matches = bdiff.blocks(text, orig)
+ for x1, x2, y1, y2 in matches:
+ for line in lines[y1:y2]:
+ equal += len(line)
+
+ lengths = len(text) + len(orig)
+ return equal * 2.0 / lengths
+
def _findsimilarmatches(repo, added, removed, threshold):
'''find potentially renamed files based on similar file content
@@ -54,28 +75,9 @@
repo.ui.progress(_('searching for similar files'), i,
total=len(removed), unit=_('files'))
- # lazily load text
- @util.cachefunc
- def data():
- orig = r.data()
- return orig, mdiff.splitnewlines(orig)
-
- def score(text):
- orig, lines = data()
- # bdiff.blocks() returns blocks of matching lines
- # count the number of bytes in each
- equal = 0
- matches = bdiff.blocks(text, orig)
- for x1, x2, y1, y2 in matches:
- for line in lines[y1:y2]:
- equal += len(line)
-
- lengths = len(text) + len(orig)
- return equal * 2.0 / lengths
-
for a in added:
bestscore = copies.get(a, (None, threshold))[1]
- myscore = score(a.data())
+ myscore = score(a, r)
if myscore >= bestscore:
copies[a] = (r, myscore)
repo.ui.progress(_('searching'), None)