scmutil: 25% speedup in casecollisionauditor
On a large repository, switching casecollisionauditor to lowercasing all file
names at once rather than one at a time improves hg-add time by 25%.
--- a/mercurial/cmdutil.py Tue Jul 17 00:55:22 2012 +0200
+++ b/mercurial/cmdutil.py Fri Jul 06 13:56:40 2012 -0700
@@ -1473,7 +1473,7 @@
cca = None
abort, warn = scmutil.checkportabilityalert(ui)
if abort or warn:
- cca = scmutil.casecollisionauditor(ui, abort, wctx)
+ cca = scmutil.casecollisionauditor(ui, abort, repo.dirstate)
for f in repo.walk(match):
exact = match.exact(f)
if exact or not explicitonly and f not in repo.dirstate:
--- a/mercurial/scmutil.py Tue Jul 17 00:55:22 2012 +0200
+++ b/mercurial/scmutil.py Fri Jul 06 13:56:40 2012 -0700
@@ -49,22 +49,27 @@
return abort, warn
class casecollisionauditor(object):
- def __init__(self, ui, abort, existingiter):
+ def __init__(self, ui, abort, dirstate):
self._ui = ui
self._abort = abort
- self._map = {}
- for f in existingiter:
- self._map[encoding.lower(f)] = f
+ allfiles = '\0'.join(dirstate._map)
+ self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
+ self._dirstate = dirstate
+ # The purpose of _newfiles is so that we don't complain about
+ # case collisions if someone were to call this object with the
+ # same filename twice.
+ self._newfiles = set()
def __call__(self, f):
fl = encoding.lower(f)
- map = self._map
- if fl in map and map[fl] != f:
+ if (fl in self._loweredfiles and f not in self._dirstate and
+ f not in self._newfiles):
msg = _('possible case-folding collision for %s') % f
if self._abort:
raise util.Abort(msg)
self._ui.warn(_("warning: %s\n") % msg)
- map[fl] = f
+ self._loweredfiles.add(fl)
+ self._newfiles.add(f)
class pathauditor(object):
'''ensure that a filesystem path contains no banned components.