comparison mercurial/cmdutil.py @ 9926:4b044b81cb54

findrenames: refactor the score computation
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Tue, 24 Nov 2009 17:39:42 +0100
parents 9dfe34bf42c7
children 2ae4d0865629
comparison
equal deleted inserted replaced
9925:9dfe34bf42c7 9926:4b044b81cb54
274 ctx = repo['.'] 274 ctx = repo['.']
275 for r in removed: 275 for r in removed:
276 if r not in ctx: 276 if r not in ctx:
277 continue 277 continue
278 fctx = ctx.filectx(r) 278 fctx = ctx.filectx(r)
279 rr = fctx.data() 279 orig = fctx.data()
280 for a in added: 280
281 bestscore = copies.get(a, (None, threshold))[1] 281 def score(text):
282 aa = repo.wread(a) 282 if not len(text):
283 return 0.0
283 # bdiff.blocks() returns blocks of matching lines 284 # bdiff.blocks() returns blocks of matching lines
284 # count the number of bytes in each 285 # count the number of bytes in each
285 equal = 0 286 equal = 0
286 alines = mdiff.splitnewlines(aa) 287 alines = mdiff.splitnewlines(text)
287 matches = bdiff.blocks(aa, rr) 288 matches = bdiff.blocks(text, orig)
288 for x1,x2,y1,y2 in matches: 289 for x1,x2,y1,y2 in matches:
289 for line in alines[x1:x2]: 290 for line in alines[x1:x2]:
290 equal += len(line) 291 equal += len(line)
291 292
292 lengths = len(aa) + len(rr) 293 lengths = len(text) + len(orig)
293 if lengths: 294 return equal*2.0 / lengths
294 myscore = equal*2.0 / lengths 295
295 if myscore >= bestscore: 296 for a in added:
296 copies[a] = (r, myscore) 297 bestscore = copies.get(a, (None, threshold))[1]
298 myscore = score(repo.wread(a))
299 if myscore >= bestscore:
300 copies[a] = (r, myscore)
297 for dest, v in copies.iteritems(): 301 for dest, v in copies.iteritems():
298 source, score = v 302 source, score = v
299 yield source, dest, score 303 yield source, dest, score
300 304
301 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None): 305 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):