merge: cache unknown dir checks (issue5716) stable
authorMark Thomas <mbthomas@fb.com>
Fri, 24 Nov 2017 12:53:58 -0800
branchstable
changeset 35171 b85962350bb3
parent 35170 c9740b69b9b7
child 35172 a92b9f8e11ba
merge: cache unknown dir checks (issue5716) As mentioned in D1222, the recent pathconflicts change regresses update performance in large repositories when many files are being updated. To mitigate this, we introduce two caches of directories that have already found to be either: - unknown directories, but which are not aliased by files and so don't need to be checked if they are files again; and - missing directores, which cannot cause path conflicts, and cannot contain a file that causes a path conflict. When checking the paths of a file, testing against this caches means we can skip tests that involve touching the filesystem. Differential Revision: https://phab.mercurial-scm.org/D1224
mercurial/merge.py
--- a/mercurial/merge.py	Thu Nov 23 22:17:03 2017 +0900
+++ b/mercurial/merge.py	Fri Nov 24 12:53:58 2017 -0800
@@ -653,7 +653,7 @@
         and repo.dirstate.normalize(f) not in repo.dirstate
         and mctx[f2].cmp(wctx[f]))
 
-def _checkunknowndirs(repo, f):
+class _unknowndirschecker(object):
     """
     Look for any unknown files or directories that may have a path conflict
     with a file.  If any path prefix of the file exists as a file or link,
@@ -663,23 +663,42 @@
     Returns the shortest path at which a conflict occurs, or None if there is
     no conflict.
     """
+    def __init__(self):
+        # A set of paths known to be good.  This prevents repeated checking of
+        # dirs.  It will be updated with any new dirs that are checked and found
+        # to be safe.
+        self._unknowndircache = set()
 
-    # Check for path prefixes that exist as unknown files.
-    for p in reversed(list(util.finddirs(f))):
-        if (repo.wvfs.audit.check(p)
-                and repo.wvfs.isfileorlink(p)
-                and repo.dirstate.normalize(p) not in repo.dirstate):
-            return p
+        # A set of paths that are known to be absent.  This prevents repeated
+        # checking of subdirectories that are known not to exist. It will be
+        # updated with any new dirs that are checked and found to be absent.
+        self._missingdircache = set()
 
-    # Check if the file conflicts with a directory containing unknown files.
-    if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
-        # Does the directory contain any files that are not in the dirstate?
-        for p, dirs, files in repo.wvfs.walk(f):
-            for fn in files:
-                relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
-                if relf not in repo.dirstate:
-                    return f
-    return None
+    def __call__(self, repo, f):
+        # Check for path prefixes that exist as unknown files.
+        for p in reversed(list(util.finddirs(f))):
+            if p in self._missingdircache:
+                return
+            if p in self._unknowndircache:
+                continue
+            if repo.wvfs.audit.check(p):
+                if (repo.wvfs.isfileorlink(p)
+                        and repo.dirstate.normalize(p) not in repo.dirstate):
+                    return p
+                if not repo.wvfs.lexists(p):
+                    self._missingdircache.add(p)
+                    return
+                self._unknowndircache.add(p)
+
+        # Check if the file conflicts with a directory containing unknown files.
+        if repo.wvfs.audit.check(f) and repo.wvfs.isdir(f):
+            # Does the directory contain any files that are not in the dirstate?
+            for p, dirs, files in repo.wvfs.walk(f):
+                for fn in files:
+                    relf = repo.dirstate.normalize(repo.wvfs.reljoin(p, fn))
+                    if relf not in repo.dirstate:
+                        return f
+        return None
 
 def _checkunknownfiles(repo, wctx, mctx, force, actions, mergeforce):
     """
@@ -701,12 +720,13 @@
             elif config == 'warn':
                 warnconflicts.update(conflicts)
 
+        checkunknowndirs = _unknowndirschecker()
         for f, (m, args, msg) in actions.iteritems():
             if m in ('c', 'dc'):
                 if _checkunknownfile(repo, wctx, mctx, f):
                     fileconflicts.add(f)
                 elif pathconfig and f not in wctx:
-                    path = _checkunknowndirs(repo, f)
+                    path = checkunknowndirs(repo, f)
                     if path is not None:
                         pathconflicts.add(path)
             elif m == 'dg':