similar: do not look up and create filectx more than once
Benchmark with 50k added/removed files, on tmpfs:
$ hg addremove --dry-run --time -q
previous: real 16.070 secs (user 14.470+0.000 sys 1.580+0.000)
this patch: real 12.420 secs (user 11.120+0.000 sys 1.280+0.000)
--- a/mercurial/similar.py Thu Mar 23 21:10:45 2017 +0900
+++ b/mercurial/similar.py Thu Mar 23 21:17:08 2017 +0900
@@ -93,6 +93,9 @@
source, bscore = v
yield source, dest, bscore
+def _dropempty(fctxs):
+ return [x for x in fctxs if x.size() > 0]
+
def findrenames(repo, added, removed, threshold):
'''find renamed files -- yields (before, after, score) tuples'''
wctx = repo[None]
@@ -101,10 +104,8 @@
# Zero length files will be frequently unrelated to each other, and
# tracking the deletion/addition of such a file will probably cause more
# harm than good. We strip them out here to avoid matching them later on.
- addedfiles = [wctx[fp] for fp in sorted(added)
- if wctx[fp].size() > 0]
- removedfiles = [pctx[fp] for fp in sorted(removed)
- if fp in pctx and pctx[fp].size() > 0]
+ addedfiles = _dropempty(wctx[fp] for fp in sorted(added))
+ removedfiles = _dropempty(pctx[fp] for fp in sorted(removed) if fp in pctx)
# Find exact matches.
matchedfiles = set()