Mercurial > hg
changeset 7118:619ebf82cef2
Take advantage of fstat calls clustering per directory if OS support it.
util module implements two versions of statfiles function
_statfiles calls lstat per file
_statfiles_clustered takes advantage of optimizations in osutil.c, stats all
files in directory at once when new directory is hit and caches the results
util.statfiles dispatches to appropriate version during module loading
The speedup on directory tree with 2k directories and 63k files is about
factor of 1.8 (1.3s -> 0.8s for hg diff - hg startup overhead about .2s)
At this point only Win32 now benefit from this patch.
Rest of OSes use the non clustered implementation.
author | Petr Kodl <petrkodl@gmail.com> |
---|---|
date | Thu, 09 Oct 2008 10:29:47 -0400 |
parents | ceb8aef03aa7 |
children | 50f4e866d693 |
files | mercurial/dirstate.py mercurial/util.py |
diffstat | 2 files changed, 51 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/dirstate.py Thu Oct 16 17:08:46 2008 +0200 +++ b/mercurial/dirstate.py Thu Oct 09 10:29:47 2008 -0400 @@ -522,17 +522,11 @@ results[nf] = None # step 3: report unseen items in the dmap hash - visit = [f for f in dmap if f not in results and match(f)] - for nf in util.sort(visit): - results[nf] = None - try: - st = lstat(join(nf)) - kind = getkind(st.st_mode) - if kind == regkind or kind == lnkkind: - results[nf] = st - except OSError, inst: - if inst.errno not in (errno.ENOENT, errno.ENOTDIR): - raise + visit = util.sort([f for f in dmap if f not in results and match(f)]) + for nf, st in zip(visit, util.statfiles([join(i) for i in visit])): + if not st is None and not getkind(st.st_mode) in (regkind, lnkkind): + st = None + results[nf] = st del results['.hg'] return results
--- a/mercurial/util.py Thu Oct 16 17:08:46 2008 +0200 +++ b/mercurial/util.py Thu Oct 09 10:29:47 2008 -0400 @@ -826,6 +826,52 @@ '''return true if it is safe to hold open file handles to hardlinks''' return True +def _statfiles(files): + 'Stat each file in files and yield stat or None if file does not exist.' + lstat = os.lstat + for nf in files: + try: + st = lstat(nf) + except OSError, err: + if err.errno not in (errno.ENOENT, errno.ENOTDIR): + raise + st = None + yield st + +def _statfiles_clustered(files): + '''Stat each file in files and yield stat or None if file does not exist. + Cluster and cache stat per directory to minimize number of OS stat calls.''' + lstat = os.lstat + ncase = os.path.normcase + sep = os.sep + dircache = {} # dirname -> filename -> status | None if file does not exist + for nf in files: + nf = ncase(nf) + pos = nf.rfind(sep) + if pos == -1: + dir, base = '.', nf + else: + dir, base = nf[:pos], nf[pos+1:] + cache = dircache.get(dir, None) + if cache is None: + try: + dmap = dict([(ncase(n), s) + for n, k, s in osutil.listdir(dir, True)]) + except OSError, err: + # handle directory not found in Python version prior to 2.5 + # Python <= 2.4 returns native Windows code 3 in errno + # Python >= 2.5 returns ENOENT and adds winerror field + if err.errno not in (3, errno.ENOENT, errno.ENOTDIR): + raise + dmap = {} + cache = dircache.setdefault(dir, dmap) + yield cache.get(base, None) + +if sys.platform == 'win32': + statfiles = _statfiles_clustered +else: + statfiles = _statfiles + getuser_fallback = None def getuser():