Mercurial > hg
changeset 9926:4b044b81cb54
findrenames: refactor the score computation
author | Benoit Boissinot <benoit.boissinot@ens-lyon.org> |
---|---|
date | Tue, 24 Nov 2009 17:39:42 +0100 |
parents | 9dfe34bf42c7 |
children | 2ae4d0865629 |
files | mercurial/cmdutil.py |
diffstat | 1 files changed, 15 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/cmdutil.py Tue Nov 24 17:26:42 2009 +0100 +++ b/mercurial/cmdutil.py Tue Nov 24 17:39:42 2009 +0100 @@ -276,24 +276,28 @@ if r not in ctx: continue fctx = ctx.filectx(r) - rr = fctx.data() - for a in added: - bestscore = copies.get(a, (None, threshold))[1] - aa = repo.wread(a) + orig = fctx.data() + + def score(text): + if not len(text): + return 0.0 # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 - alines = mdiff.splitnewlines(aa) - matches = bdiff.blocks(aa, rr) + alines = mdiff.splitnewlines(text) + matches = bdiff.blocks(text, orig) for x1,x2,y1,y2 in matches: for line in alines[x1:x2]: equal += len(line) - lengths = len(aa) + len(rr) - if lengths: - myscore = equal*2.0 / lengths - if myscore >= bestscore: - copies[a] = (r, myscore) + lengths = len(text) + len(orig) + return equal*2.0 / lengths + + for a in added: + bestscore = copies.get(a, (None, threshold))[1] + myscore = score(repo.wread(a)) + if myscore >= bestscore: + copies[a] = (r, myscore) for dest, v in copies.iteritems(): source, score = v yield source, dest, score