grep: reduce the cost of pathauditor checks when grepping working copy
Running `time hg grep zxczxczxczxczxc -l` on mozilla-central:
before:
real 0m20,000s
user 0m15,796s
sys 0m4,189s
after:
real 0m10,903s
user 0m8,964s
sys 0m1,916s
if vfs didn't call pathauditor at all:
real 0m7,781s
user 0m5,968s
sys 0m1,790s
Differential Revision: https://phab.mercurial-scm.org/D8582
--- a/mercurial/commands.py Mon May 25 17:32:25 2020 -0400
+++ b/mercurial/commands.py Mon May 25 17:39:23 2020 -0400
@@ -3609,31 +3609,38 @@
parent = pctx.rev()
matches.setdefault(parent, {})
files = revfiles.setdefault(rev, [])
- for fn in fns:
- # fn might not exist in the revision (could be a file removed by the
- # revision). We could check `fn not in ctx` even when rev is None,
- # but it's less racy to protect againt that in readfile.
- if rev is not None and fn not in ctx:
- continue
-
- copy = None
- if follow:
- copy = getrenamed(fn, rev)
- if copy:
- copies.setdefault(rev, {})[fn] = copy
- if fn in skip:
- skip.add(copy)
- if fn in skip:
- continue
- files.append(fn)
-
- if fn not in matches[rev]:
- grepbody(fn, rev, readfile(ctx, fn))
-
- if diff:
- pfn = copy or fn
- if pfn not in matches[parent] and pfn in pctx:
- grepbody(pfn, parent, readfile(pctx, pfn))
+ if rev is None:
+ # in `hg grep pattern`, 2/3 of the time is spent is spent in
+ # pathauditor checks without this in mozilla-central
+ contextmanager = repo.wvfs.audit.cached
+ else:
+ contextmanager = util.nullcontextmanager
+ with contextmanager():
+ for fn in fns:
+ # fn might not exist in the revision (could be a file removed by
+ # the revision). We could check `fn not in ctx` even when rev is
+ # None, but it's less racy to protect againt that in readfile.
+ if rev is not None and fn not in ctx:
+ continue
+
+ copy = None
+ if follow:
+ copy = getrenamed(fn, rev)
+ if copy:
+ copies.setdefault(rev, {})[fn] = copy
+ if fn in skip:
+ skip.add(copy)
+ if fn in skip:
+ continue
+ files.append(fn)
+
+ if fn not in matches[rev]:
+ grepbody(fn, rev, readfile(ctx, fn))
+
+ if diff:
+ pfn = copy or fn
+ if pfn not in matches[parent] and pfn in pctx:
+ grepbody(pfn, parent, readfile(pctx, pfn))
ui.pager(b'grep')
fm = ui.formatter(b'grep', opts)
--- a/mercurial/pathutil.py Mon May 25 17:32:25 2020 -0400
+++ b/mercurial/pathutil.py Mon May 25 17:39:23 2020 -0400
@@ -1,5 +1,6 @@
from __future__ import absolute_import
+import contextlib
import errno
import os
import posixpath
@@ -148,6 +149,19 @@
except (OSError, error.Abort):
return False
+ @contextlib.contextmanager
+ def cached(self):
+ if self._cached:
+ yield
+ else:
+ try:
+ self._cached = True
+ yield
+ finally:
+ self.audited.clear()
+ self.auditeddir.clear()
+ self._cached = False
+
def canonpath(root, cwd, myname, auditor=None):
'''return the canonical path of myname, given cwd and root