fix: prefetch file contents
authorRodrigo Damazio Bovendorp <rdamazio@google.com>
Thu, 09 Jul 2020 20:46:52 -0700
changeset 45074 797ef6f8295e
parent 45073 54009f8c3e25
child 45075 04ef381000a8
fix: prefetch file contents This prevents the worker subprocesses from contacting the server individually, which is either inefficient, or leads to problems if the connection is shared among them. Differential Revision: https://phab.mercurial-scm.org/D8723
hgext/fix.py
--- a/hgext/fix.py	Thu Jul 09 20:45:35 2020 -0700
+++ b/hgext/fix.py	Thu Jul 09 20:46:52 2020 -0700
@@ -271,6 +271,11 @@
         basepaths = getbasepaths(repo, opts, workqueue, basectxs)
         fixers = getfixers(ui)
 
+        # Rather than letting each worker independently fetch the files
+        # (which also would add complications for shared/keepalive
+        # connections), prefetch them all first.
+        _prefetchfiles(repo, workqueue, basepaths)
+
         # There are no data dependencies between the workers fixing each file
         # revision, so we can use all available parallelism.
         def getfixes(items):
@@ -630,6 +635,29 @@
     return basectxs
 
 
+def _prefetchfiles(repo, workqueue, basepaths):
+    toprefetch = set()
+
+    # Prefetch the files that will be fixed.
+    for rev, path in workqueue:
+        if rev == wdirrev:
+            continue
+        toprefetch.add((rev, path))
+
+    # Prefetch the base contents for lineranges().
+    for (baserev, fixrev, path), basepath in basepaths.items():
+        toprefetch.add((baserev, basepath))
+
+    if toprefetch:
+        scmutil.prefetchfiles(
+            repo,
+            [
+                (rev, scmutil.matchfiles(repo, [path]))
+                for rev, path in toprefetch
+            ],
+        )
+
+
 def fixfile(ui, repo, opts, fixers, fixctx, path, basepaths, basectxs):
     """Run any configured fixers that should affect the file in this context