changeset 17201:afd75476939e

scmutil: 25% speedup in casecollisionauditor On a large repository, switching casecollisionauditor to lowercasing all file names at once rather than one at a time improves hg-add time by 25%.
author Joshua Redstone <joshua.redstone@fb.com>
date Fri, 06 Jul 2012 13:56:40 -0700
parents 19f5dec2d61f
children 1ae119269ddc
files mercurial/cmdutil.py mercurial/scmutil.py
diffstat 2 files changed, 13 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/cmdutil.py	Tue Jul 17 00:55:22 2012 +0200
+++ b/mercurial/cmdutil.py	Fri Jul 06 13:56:40 2012 -0700
@@ -1473,7 +1473,7 @@
     cca = None
     abort, warn = scmutil.checkportabilityalert(ui)
     if abort or warn:
-        cca = scmutil.casecollisionauditor(ui, abort, wctx)
+        cca = scmutil.casecollisionauditor(ui, abort, repo.dirstate)
     for f in repo.walk(match):
         exact = match.exact(f)
         if exact or not explicitonly and f not in repo.dirstate:
--- a/mercurial/scmutil.py	Tue Jul 17 00:55:22 2012 +0200
+++ b/mercurial/scmutil.py	Fri Jul 06 13:56:40 2012 -0700
@@ -49,22 +49,27 @@
     return abort, warn
 
 class casecollisionauditor(object):
-    def __init__(self, ui, abort, existingiter):
+    def __init__(self, ui, abort, dirstate):
         self._ui = ui
         self._abort = abort
-        self._map = {}
-        for f in existingiter:
-            self._map[encoding.lower(f)] = f
+        allfiles = '\0'.join(dirstate._map)
+        self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
+        self._dirstate = dirstate
+        # The purpose of _newfiles is so that we don't complain about
+        # case collisions if someone were to call this object with the
+        # same filename twice.
+        self._newfiles = set()
 
     def __call__(self, f):
         fl = encoding.lower(f)
-        map = self._map
-        if fl in map and map[fl] != f:
+        if (fl in self._loweredfiles and f not in self._dirstate and
+            f not in self._newfiles):
             msg = _('possible case-folding collision for %s') % f
             if self._abort:
                 raise util.Abort(msg)
             self._ui.warn(_("warning: %s\n") % msg)
-        map[fl] = f
+        self._loweredfiles.add(fl)
+        self._newfiles.add(f)
 
 class pathauditor(object):
     '''ensure that a filesystem path contains no banned components.