Mercurial > hg
changeset 30805:0ae287eb6a4f
similar: move score function to module level
Future patches will use this to report the similarity of a rename / copy
in the patch output.
author | Sean Farley <sean@farley.io> |
---|---|
date | Sat, 07 Jan 2017 20:47:57 -0800 |
parents | 4227f80f72b2 |
children | e2796f193f06 |
files | mercurial/similar.py |
diffstat | 1 files changed, 22 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/similar.py Mon Jan 09 17:58:19 2017 +0900 +++ b/mercurial/similar.py Sat Jan 07 20:47:57 2017 -0800 @@ -43,6 +43,27 @@ # Done repo.ui.progress(_('searching for exact renames'), None) +@util.cachefunc +def _ctxdata(fctx): + # lazily load text + orig = fctx.data() + return orig, mdiff.splitnewlines(orig) + +@util.cachefunc +def score(fctx1, fctx2): + text = fctx1.data() + orig, lines = _ctxdata(fctx2) + # bdiff.blocks() returns blocks of matching lines + # count the number of bytes in each + equal = 0 + matches = bdiff.blocks(text, orig) + for x1, x2, y1, y2 in matches: + for line in lines[y1:y2]: + equal += len(line) + + lengths = len(text) + len(orig) + return equal * 2.0 / lengths + def _findsimilarmatches(repo, added, removed, threshold): '''find potentially renamed files based on similar file content @@ -54,28 +75,9 @@ repo.ui.progress(_('searching for similar files'), i, total=len(removed), unit=_('files')) - # lazily load text - @util.cachefunc - def data(): - orig = r.data() - return orig, mdiff.splitnewlines(orig) - - def score(text): - orig, lines = data() - # bdiff.blocks() returns blocks of matching lines - # count the number of bytes in each - equal = 0 - matches = bdiff.blocks(text, orig) - for x1, x2, y1, y2 in matches: - for line in lines[y1:y2]: - equal += len(line) - - lengths = len(text) + len(orig) - return equal * 2.0 / lengths - for a in added: bestscore = copies.get(a, (None, threshold))[1] - myscore = score(a.data()) + myscore = score(a, r) if myscore >= bestscore: copies[a] = (r, myscore) repo.ui.progress(_('searching'), None)