merge: skip syntactic path checks in [_checkunknownfile]
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Fri, 06 Jan 2023 18:09:19 +0000
changeset 49982 b7cf91ef03ba
parent 49981 789e152a6bdb
child 49983 7b474609f199
merge: skip syntactic path checks in [_checkunknownfile] We don't need to check the paths syntactically, since they are coming from diffing the revisions, so hopefully already checked on the way in. We still need to check what's on the filesystem, to avoid traversing the symlinks or subdirs, which we can't know about statically. Also, we use the directory audit to elide [isfileorlink], this removing ~all lstat calls from hg updates from-empty.
mercurial/merge.py
mercurial/vfs.py
--- a/mercurial/merge.py	Fri Jan 06 16:42:24 2023 +0000
+++ b/mercurial/merge.py	Fri Jan 06 18:09:19 2023 +0000
@@ -46,7 +46,7 @@
     return config
 
 
-def _checkunknownfile(repo, wctx, mctx, f, f2=None):
+def _checkunknownfile(repo, dircache, wctx, mctx, f, f2=None):
     if wctx.isinmemory():
         # Nothing to do in IMM because nothing in the "working copy" can be an
         # unknown file.
@@ -58,8 +58,7 @@
     if f2 is None:
         f2 = f
     return (
-        repo.wvfs.audit.check(f)
-        and repo.wvfs.isfileorlink(f)
+        repo.wvfs.isfileorlink_checkdir(dircache, f)
         and repo.dirstate.normalize(f) not in repo.dirstate
         and mctx[f2].cmp(wctx[f])
     )
@@ -136,6 +135,7 @@
     pathconfig = repo.ui.configbool(
         b'experimental', b'merge.checkpathconflicts'
     )
+    dircache = dict()
     if not force:
 
         def collectconflicts(conflicts, config):
@@ -145,19 +145,18 @@
                 warnconflicts.update(conflicts)
 
         checkunknowndirs = _unknowndirschecker()
-        with repo.wvfs.audit.cached():
-            for f in mresult.files(
-                (
-                    mergestatemod.ACTION_CREATED,
-                    mergestatemod.ACTION_DELETED_CHANGED,
-                )
-            ):
-                if _checkunknownfile(repo, wctx, mctx, f):
-                    fileconflicts.add(f)
-                elif pathconfig and f not in wctx:
-                    path = checkunknowndirs(repo, wctx, f)
-                    if path is not None:
-                        pathconflicts.add(path)
+        for f in mresult.files(
+            (
+                mergestatemod.ACTION_CREATED,
+                mergestatemod.ACTION_DELETED_CHANGED,
+            )
+        ):
+            if _checkunknownfile(repo, dircache, wctx, mctx, f):
+                fileconflicts.add(f)
+            elif pathconfig and f not in wctx:
+                path = checkunknowndirs(repo, wctx, f)
+                if path is not None:
+                    pathconflicts.add(path)
         for f, args, msg in mresult.getactions(
             [mergestatemod.ACTION_LOCAL_DIR_RENAME_GET]
         ):
--- a/mercurial/vfs.py	Fri Jan 06 16:42:24 2023 +0000
+++ b/mercurial/vfs.py	Fri Jan 06 18:09:19 2023 +0000
@@ -422,6 +422,25 @@
                 raise error.Abort(b"%s: %r" % (r, path))
             self.audit(path, mode=mode)
 
+    def isfileorlink_checkdir(
+        self, dircache, path: Optional[bytes] = None
+    ) -> bool:
+        """return True if the path is a regular file or a symlink and
+        the directories along the path are "normal", that is
+        not symlinks or nested hg repositories."""
+        try:
+            for prefix in pathutil.finddirs_rev_noroot(util.localpath(path)):
+                if prefix in dircache:
+                    res = dircache[prefix]
+                else:
+                    res = self.audit._checkfs_exists(prefix, path)
+                    dircache[prefix] = res
+                if not res:
+                    return False
+        except (OSError, error.Abort):
+            return False
+        return self.isfileorlink(path)
+
     def __call__(
         self,
         path: bytes,