addremove: add -s/--similarity option
progress on issue 295.
--- a/mercurial/cmdutil.py Fri Aug 18 21:18:01 2006 -0700
+++ b/mercurial/cmdutil.py Fri Aug 18 22:13:58 2006 -0700
@@ -8,7 +8,7 @@
from demandload import demandload
from node import *
from i18n import gettext as _
-demandload(globals(), 'util')
+demandload(globals(), 'mdiff util')
demandload(globals(), 'os sys')
def make_filename(repo, pat, node,
@@ -93,19 +93,53 @@
for r in results:
yield r
-def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None):
+def findrenames(repo, added=None, removed=None, threshold=0.5):
+ if added is None or removed is None:
+ added, removed = repo.status()[1:3]
+ changes = repo.changelog.read(repo.dirstate.parents()[0])
+ mf = repo.manifest.read(changes[0])
+ for a in added:
+ aa = repo.wread(a)
+ bestscore, bestname = None, None
+ for r in removed:
+ rr = repo.file(r).read(mf[r])
+ delta = mdiff.textdiff(aa, rr)
+ if len(delta) < len(aa):
+ myscore = 1.0 - (float(len(delta)) / len(aa))
+ if bestscore is None or myscore > bestscore:
+ bestscore, bestname = myscore, r
+ if bestname and bestscore >= threshold:
+ yield bestname, a, bestscore
+
+def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None,
+ similarity=None):
if dry_run is None:
dry_run = opts.get('dry_run')
+ if similarity is None:
+ similarity = float(opts.get('similarity') or 0)
add, remove = [], []
+ mapping = {}
for src, abs, rel, exact in walk(repo, pats, opts):
if src == 'f' and repo.dirstate.state(abs) == '?':
add.append(abs)
+ mapping[abs] = rel, exact
if repo.ui.verbose or not exact:
repo.ui.status(_('adding %s\n') % ((pats and rel) or abs))
if repo.dirstate.state(abs) != 'r' and not os.path.exists(rel):
remove.append(abs)
+ mapping[abs] = rel, exact
if repo.ui.verbose or not exact:
repo.ui.status(_('removing %s\n') % ((pats and rel) or abs))
if not dry_run:
repo.add(add, wlock=wlock)
repo.remove(remove, wlock=wlock)
+ if similarity > 0:
+ for old, new, score in findrenames(repo, add, remove, similarity):
+ oldrel, oldexact = mapping[old]
+ newrel, newexact = mapping[new]
+ if repo.ui.verbose or not oldexact or not newexact:
+ repo.ui.status(_('recording removal of %s as rename to %s '
+ '(%d%% similar)\n') %
+ (oldrel, newrel, score * 100))
+ if not dry_run:
+ repo.copy(old, new, wlock=wlock)
--- a/mercurial/commands.py Fri Aug 18 21:18:01 2006 -0700
+++ b/mercurial/commands.py Fri Aug 18 22:13:58 2006 -0700
@@ -658,8 +658,17 @@
New files are ignored if they match any of the patterns in .hgignore. As
with add, these changes take effect at the next commit.
+
+ Use the -s option to detect renamed files. With a parameter > 0,
+ this compares every removed file with every added file and records
+ those similar enough as renames. This option takes a percentage
+ between 0 (disabled) and 100 (files must be identical) as its
+ parameter. Detecting renamed files this way can be expensive.
"""
- return cmdutil.addremove(repo, pats, opts)
+ sim = float(opts.get('similarity') or 0)
+ if sim < 0 or sim > 100:
+ raise util.Abort(_('similarity must be between 0 and 100'))
+ return cmdutil.addremove(repo, pats, opts, similarity=sim/100.)
def annotate(ui, repo, *pats, **opts):
"""show changeset information per file line
@@ -2747,7 +2756,10 @@
(addremove,
[('I', 'include', [], _('include names matching the given patterns')),
('X', 'exclude', [], _('exclude names matching the given patterns')),
- ('n', 'dry-run', None, _('do not perform actions, just print output'))],
+ ('n', 'dry-run', None,
+ _('do not perform actions, just print output')),
+ ('s', 'similarity', '',
+ _('guess renamed files by similarity (0<=s<=1)'))],
_('hg addremove [OPTION]... [FILE]...')),
"^annotate":
(annotate,
--- a/tests/test-addremove Fri Aug 18 21:18:01 2006 -0700
+++ b/tests/test-addremove Fri Aug 18 22:13:58 2006 -0700
@@ -10,3 +10,17 @@
touch ../foo_2 bar_2
hg -v addremove
hg -v commit -m "add 2" -d "1000000 0"
+
+cd ..
+hg init sim
+cd sim
+echo a > a
+echo a >> a
+echo a >> a
+echo c > c
+hg commit -Ama
+mv a b
+rm c
+echo d > d
+hg addremove -s 0.5
+hg commit -mb
--- a/tests/test-addremove.out Fri Aug 18 21:18:01 2006 -0700
+++ b/tests/test-addremove.out Fri Aug 18 22:13:58 2006 -0700
@@ -6,3 +6,10 @@
adding foo_2
dir/bar_2
foo_2
+adding a
+adding c
+adding b
+adding d
+removing a
+removing c
+recording removal of a as rename to b (100% similar)