Mercurial > hg

--- a/mercurial/cmdutil.py	Sun Mar 04 09:03:21 2007 -0300
+++ b/mercurial/cmdutil.py	Sun Feb 18 20:39:25 2007 +0100
@@ -7,7 +7,7 @@

 from node import *
 from i18n import _
-import os, sys, mdiff, util, templater, patch
+import os, sys, mdiff, bdiff, util, templater, patch

 revrangesep = ':'

@@ -146,20 +146,29 @@
         yield src, fn, util.pathto(repo.getcwd(), fn), fn in exact

 def findrenames(repo, added=None, removed=None, threshold=0.5):
+    '''find renamed files -- yields (before, after, score) tuples'''
     if added is None or removed is None:
         added, removed = repo.status()[1:3]
     ctx = repo.changectx()
     for a in added:
         aa = repo.wread(a)
-        bestscore, bestname = None, None
+        bestname, bestscore = None, threshold
         for r in removed:
             rr = ctx.filectx(r).data()
-            delta = mdiff.textdiff(aa, rr)
-            if len(delta) < len(aa):
-                myscore = 1.0 - (float(len(delta)) / len(aa))
-                if bestscore is None or myscore > bestscore:
-                    bestscore, bestname = myscore, r
-        if bestname and bestscore >= threshold:
+
+            # bdiff.blocks() returns blocks of matching lines
+            # count the number of bytes in each
+            equal = 0
+            alines = mdiff.splitnewlines(aa)
+            matches = bdiff.blocks(aa, rr)
+            for x1,x2,y1,y2 in matches:
+                for line in alines[x1:x2]:
+                    equal += len(line)
+
+            myscore = equal*2.0 / (len(aa)+len(rr))
+            if myscore >= bestscore:
+                bestname, bestscore = r, myscore
+        if bestname:
             yield bestname, a, bestscore

 def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-addremove-similar	Sun Feb 18 20:39:25 2007 +0100
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+hg init rep; cd rep
+
+touch empty-file
+python -c 'for x in range(10000): print x' > large-file
+
+hg addremove
+
+hg commit -m A
+
+rm large-file empty-file
+python -c 'for x in range(10,10000): print x' > another-file
+
+hg addremove -s50
+
+hg commit -m B
+
+cd ..
+
+hg init rep2; cd rep2
+
+python -c 'for x in range(10000): print x' > large-file
+python -c 'for x in range(50): print x' > tiny-file
+
+hg addremove
+
+hg commit -m A
+
+python -c 'for x in range(70): print x' > small-file
+rm tiny-file
+rm large-file
+
+hg addremove -s50
+
+hg commit -m B
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-addremove-similar.out	Sun Feb 18 20:39:25 2007 +0100
@@ -0,0 +1,12 @@
+adding empty-file
+adding large-file
+adding another-file
+removing empty-file
+removing large-file
+recording removal of large-file as rename to another-file (99% similar)
+adding large-file
+adding tiny-file
+adding small-file
+removing large-file
+removing tiny-file
+recording removal of tiny-file as rename to small-file (82% similar)