findrenames: first loop over the removed files, it's faster
authorBenoit Boissinot <benoit.boissinot@ens-lyon.org>
Tue, 24 Nov 2009 17:26:42 +0100
changeset 9925 9dfe34bf42c7
parent 9916 3d718761157b
child 9926 4b044b81cb54
findrenames: first loop over the removed files, it's faster Getting the file from the working dir is less expensive than getting it from the repo history, hence the speedup. benchmarked on crew repo with: rm -rf * ; hg up -C ; for i in `find . -name "*.py"` ; do mv $i $i.new;done followed by: hg addremove -s 100 before: Time: real 68.760 secs (user 65.760+0.000 sys 2.490+0.000) after : Time: real 28.890 secs (user 26.920+0.000 sys 1.450+0.000)
mercurial/cmdutil.py
--- a/mercurial/cmdutil.py	Tue Nov 24 16:07:36 2009 +0200
+++ b/mercurial/cmdutil.py	Tue Nov 24 17:26:42 2009 +0100
@@ -270,15 +270,16 @@
 
 def findrenames(repo, added, removed, threshold):
     '''find renamed files -- yields (before, after, score) tuples'''
+    copies = {}
     ctx = repo['.']
-    for a in added:
-        aa = repo.wread(a)
-        bestname, bestscore = None, threshold
-        for r in removed:
-            if r not in ctx:
-                continue
-            rr = ctx.filectx(r).data()
-
+    for r in removed:
+        if r not in ctx:
+            continue
+        fctx = ctx.filectx(r)
+        rr = fctx.data()
+        for a in added:
+            bestscore = copies.get(a, (None, threshold))[1]
+            aa = repo.wread(a)
             # bdiff.blocks() returns blocks of matching lines
             # count the number of bytes in each
             equal = 0
@@ -292,9 +293,10 @@
             if lengths:
                 myscore = equal*2.0 / lengths
                 if myscore >= bestscore:
-                    bestname, bestscore = r, myscore
-        if bestname:
-            yield bestname, a, bestscore
+                    copies[a] = (r, myscore)
+    for dest, v in copies.iteritems():
+        source, score = v
+        yield source, dest, score
 
 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
     if dry_run is None: