293 repo.ui.progress(_('looking for similarities'), i, total=len(removed)) |
293 repo.ui.progress(_('looking for similarities'), i, total=len(removed)) |
294 if r not in ctx: |
294 if r not in ctx: |
295 continue |
295 continue |
296 fctx = ctx.filectx(r) |
296 fctx = ctx.filectx(r) |
297 |
297 |
|
298 # lazily load text |
|
299 @util.cachefunc |
|
300 def data(): |
|
301 orig = fctx.data() |
|
302 return orig, mdiff.splitnewlines(orig) |
|
303 |
298 def score(text): |
304 def score(text): |
299 if not len(text): |
305 if not len(text): |
300 return 0.0 |
306 return 0.0 |
301 if not fctx.cmp(text): |
307 if not fctx.cmp(text): |
302 return 1.0 |
308 return 1.0 |
303 if threshold == 1.0: |
309 if threshold == 1.0: |
304 return 0.0 |
310 return 0.0 |
305 orig = fctx.data() |
311 orig, lines = data() |
306 # bdiff.blocks() returns blocks of matching lines |
312 # bdiff.blocks() returns blocks of matching lines |
307 # count the number of bytes in each |
313 # count the number of bytes in each |
308 equal = 0 |
314 equal = 0 |
309 alines = mdiff.splitnewlines(text) |
|
310 matches = bdiff.blocks(text, orig) |
315 matches = bdiff.blocks(text, orig) |
311 for x1, x2, y1, y2 in matches: |
316 for x1, x2, y1, y2 in matches: |
312 for line in alines[x1:x2]: |
317 for line in lines[y1:y2]: |
313 equal += len(line) |
318 equal += len(line) |
314 |
319 |
315 lengths = len(text) + len(orig) |
320 lengths = len(text) + len(orig) |
316 return equal * 2.0 / lengths |
321 return equal * 2.0 / lengths |
317 |
322 |