41 yield (hashes[h], fctx) |
41 yield (hashes[h], fctx) |
42 |
42 |
43 # Done |
43 # Done |
44 repo.ui.progress(_('searching for exact renames'), None) |
44 repo.ui.progress(_('searching for exact renames'), None) |
45 |
45 |
|
46 @util.cachefunc |
|
47 def _ctxdata(fctx): |
|
48 # lazily load text |
|
49 orig = fctx.data() |
|
50 return orig, mdiff.splitnewlines(orig) |
|
51 |
|
52 @util.cachefunc |
|
53 def score(fctx1, fctx2): |
|
54 text = fctx1.data() |
|
55 orig, lines = _ctxdata(fctx2) |
|
56 # bdiff.blocks() returns blocks of matching lines |
|
57 # count the number of bytes in each |
|
58 equal = 0 |
|
59 matches = bdiff.blocks(text, orig) |
|
60 for x1, x2, y1, y2 in matches: |
|
61 for line in lines[y1:y2]: |
|
62 equal += len(line) |
|
63 |
|
64 lengths = len(text) + len(orig) |
|
65 return equal * 2.0 / lengths |
|
66 |
46 def _findsimilarmatches(repo, added, removed, threshold): |
67 def _findsimilarmatches(repo, added, removed, threshold): |
47 '''find potentially renamed files based on similar file content |
68 '''find potentially renamed files based on similar file content |
48 |
69 |
49 Takes a list of new filectxs and a list of removed filectxs, and yields |
70 Takes a list of new filectxs and a list of removed filectxs, and yields |
50 (before, after, score) tuples of partial matches. |
71 (before, after, score) tuples of partial matches. |
52 copies = {} |
73 copies = {} |
53 for i, r in enumerate(removed): |
74 for i, r in enumerate(removed): |
54 repo.ui.progress(_('searching for similar files'), i, |
75 repo.ui.progress(_('searching for similar files'), i, |
55 total=len(removed), unit=_('files')) |
76 total=len(removed), unit=_('files')) |
56 |
77 |
57 # lazily load text |
|
58 @util.cachefunc |
|
59 def data(): |
|
60 orig = r.data() |
|
61 return orig, mdiff.splitnewlines(orig) |
|
62 |
|
63 def score(text): |
|
64 orig, lines = data() |
|
65 # bdiff.blocks() returns blocks of matching lines |
|
66 # count the number of bytes in each |
|
67 equal = 0 |
|
68 matches = bdiff.blocks(text, orig) |
|
69 for x1, x2, y1, y2 in matches: |
|
70 for line in lines[y1:y2]: |
|
71 equal += len(line) |
|
72 |
|
73 lengths = len(text) + len(orig) |
|
74 return equal * 2.0 / lengths |
|
75 |
|
76 for a in added: |
78 for a in added: |
77 bestscore = copies.get(a, (None, threshold))[1] |
79 bestscore = copies.get(a, (None, threshold))[1] |
78 myscore = score(a.data()) |
80 myscore = score(a, r) |
79 if myscore >= bestscore: |
81 if myscore >= bestscore: |
80 copies[a] = (r, myscore) |
82 copies[a] = (r, myscore) |
81 repo.ui.progress(_('searching'), None) |
83 repo.ui.progress(_('searching'), None) |
82 |
84 |
83 for dest, v in copies.iteritems(): |
85 for dest, v in copies.iteritems(): |