Mercurial: changeset 31582:2e254165a37c

similar: do not look up and create filectx more than once Benchmark with 50k added/removed files, on tmpfs: $ hg addremove --dry-run --time -q previous: real 16.070 secs (user 14.470+0.000 sys 1.580+0.000) this patch: real 12.420 secs (user 11.120+0.000 sys 1.280+0.000)

--- a/mercurial/similar.py	Thu Mar 23 21:10:45 2017 +0900
+++ b/mercurial/similar.py	Thu Mar 23 21:17:08 2017 +0900
@@ -93,6 +93,9 @@
         source, bscore = v
         yield source, dest, bscore
 
+def _dropempty(fctxs):
+    return [x for x in fctxs if x.size() > 0]
+
 def findrenames(repo, added, removed, threshold):
     '''find renamed files -- yields (before, after, score) tuples'''
     wctx = repo[None]
@@ -101,10 +104,8 @@
     # Zero length files will be frequently unrelated to each other, and
     # tracking the deletion/addition of such a file will probably cause more
     # harm than good. We strip them out here to avoid matching them later on.
-    addedfiles = [wctx[fp] for fp in sorted(added)
-                  if wctx[fp].size() > 0]
-    removedfiles = [pctx[fp] for fp in sorted(removed)
-                    if fp in pctx and pctx[fp].size() > 0]
+    addedfiles = _dropempty(wctx[fp] for fp in sorted(added))
+    removedfiles = _dropempty(pctx[fp] for fp in sorted(removed) if fp in pctx)
 
     # Find exact matches.
     matchedfiles = set()

author	Yuya Nishihara <yuya@tcha.org>
	Thu, 23 Mar 2017 21:17:08 +0900
changeset 31582	2e254165a37c
parent 31581	b1528d195a13
child 31583	2efd9771323e