changeset 31580:d3e2af4e0128

similar: get rid of quadratic addedfiles.remove() Instead, build a set of files to be removed and recreate addedfiles only if necessary. Benchmark with 50k added/removed files, on tmpfs: $ hg addremove --dry-run --time -q original: real 16.550 secs (user 15.000+0.000 sys 1.540+0.000) previous: real 16.730 secs (user 15.280+0.000 sys 1.440+0.000) this patch: real 16.070 secs (user 14.470+0.000 sys 1.580+0.000)
author Yuya Nishihara <yuya@tcha.org>
date Thu, 23 Mar 2017 20:50:33 +0900
parents 3a383caa97f4
children b1528d195a13
files mercurial/similar.py
diffstat 1 files changed, 4 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/similar.py	Sun Mar 15 18:58:56 2015 +0900
+++ b/mercurial/similar.py	Thu Mar 23 20:50:33 2017 +0900
@@ -107,12 +107,14 @@
                     if fp in parentctx and parentctx[fp].size() > 0]
 
     # Find exact matches.
-    for (a, b) in _findexactmatches(repo, addedfiles[:], removedfiles):
-        addedfiles.remove(b)
+    matchedfiles = set()
+    for (a, b) in _findexactmatches(repo, addedfiles, removedfiles):
+        matchedfiles.add(b)
         yield (a.path(), b.path(), 1.0)
 
     # If the user requested similar files to be matched, search for them also.
     if threshold < 1.0:
+        addedfiles = [x for x in addedfiles if x not in matchedfiles]
         for (a, b, score) in _findsimilarmatches(repo, addedfiles,
                                                  removedfiles, threshold):
             yield (a.path(), b.path(), score)