# HG changeset patch # User Yuya Nishihara # Date 1490271428 -32400 # Node ID 2e254165a37cad94746eb0efd4a1e9a480357d45 # Parent b1528d195a136e613c83f2532d8749c639e48fc0 similar: do not look up and create filectx more than once Benchmark with 50k added/removed files, on tmpfs: $ hg addremove --dry-run --time -q previous: real 16.070 secs (user 14.470+0.000 sys 1.580+0.000) this patch: real 12.420 secs (user 11.120+0.000 sys 1.280+0.000) diff -r b1528d195a13 -r 2e254165a37c mercurial/similar.py --- a/mercurial/similar.py Thu Mar 23 21:10:45 2017 +0900 +++ b/mercurial/similar.py Thu Mar 23 21:17:08 2017 +0900 @@ -93,6 +93,9 @@ source, bscore = v yield source, dest, bscore +def _dropempty(fctxs): + return [x for x in fctxs if x.size() > 0] + def findrenames(repo, added, removed, threshold): '''find renamed files -- yields (before, after, score) tuples''' wctx = repo[None] @@ -101,10 +104,8 @@ # Zero length files will be frequently unrelated to each other, and # tracking the deletion/addition of such a file will probably cause more # harm than good. We strip them out here to avoid matching them later on. - addedfiles = [wctx[fp] for fp in sorted(added) - if wctx[fp].size() > 0] - removedfiles = [pctx[fp] for fp in sorted(removed) - if fp in pctx and pctx[fp].size() > 0] + addedfiles = _dropempty(wctx[fp] for fp in sorted(added)) + removedfiles = _dropempty(pctx[fp] for fp in sorted(removed) if fp in pctx) # Find exact matches. matchedfiles = set()