comparison mercurial/similar.py @ 11059:ef4aa90b1e58

Move 'findrenames' code into its own file. The next few patches will increase the size of the "findrenames" functionality. This patch simply moves the function into its own file to avoid clutter building up in 'cmdutil.py'.
author David Greenaway <hg-dev@davidgreenaway.com>
date Sat, 03 Apr 2010 11:58:16 +1100
parents
children e6df01776e08
comparison
equal deleted inserted replaced
11058:f6dcbeb5babe 11059:ef4aa90b1e58
1 # similar.py - mechanisms for finding similar files
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from i18n import _
9 import util
10 import mdiff
11 import bdiff
12
13 def findrenames(repo, added, removed, threshold):
14 '''find renamed files -- yields (before, after, score) tuples'''
15 copies = {}
16 ctx = repo['.']
17 for i, r in enumerate(removed):
18 repo.ui.progress(_('searching'), i, total=len(removed))
19 if r not in ctx:
20 continue
21 fctx = ctx.filectx(r)
22
23 # lazily load text
24 @util.cachefunc
25 def data():
26 orig = fctx.data()
27 return orig, mdiff.splitnewlines(orig)
28
29 def score(text):
30 if not len(text):
31 return 0.0
32 if not fctx.cmp(text):
33 return 1.0
34 if threshold == 1.0:
35 return 0.0
36 orig, lines = data()
37 # bdiff.blocks() returns blocks of matching lines
38 # count the number of bytes in each
39 equal = 0
40 matches = bdiff.blocks(text, orig)
41 for x1, x2, y1, y2 in matches:
42 for line in lines[y1:y2]:
43 equal += len(line)
44
45 lengths = len(text) + len(orig)
46 return equal * 2.0 / lengths
47
48 for a in added:
49 bestscore = copies.get(a, (None, threshold))[1]
50 myscore = score(repo.wread(a))
51 if myscore >= bestscore:
52 copies[a] = (r, myscore)
53 repo.ui.progress(_('searching'), None)
54
55 for dest, v in copies.iteritems():
56 source, score = v
57 yield source, dest, score
58
59