changeset 51590:49faa72b994e

streamclone: stop listing files for entries that have no volatile files This will save a lot of python related time. This significantly boost performance. The following number comes from a large private repository using perf::stream-locked-section: base-line: 35.04 seconds prev-change: 24.51 seconds (-30%) prev-change: 20.88 seconds (-40%) prev-change: 14.22 seconds (-60%) this-change: 11.58 seconds (-67% from baseline; -18% from prev)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 26 Mar 2024 13:46:44 +0000
parents 6e4c8366c5ce
children f28d5d68b71a
files mercurial/store.py mercurial/streamclone.py
diffstat 2 files changed, 17 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/store.py	Tue Mar 26 13:34:05 2024 +0000
+++ b/mercurial/store.py	Tue Mar 26 13:46:44 2024 +0000
@@ -484,6 +484,8 @@
 
     This is returned by `store.walk` and represent some data in the store."""
 
+    maybe_volatile = True
+
     def files(self) -> List[StoreFile]:
         raise NotImplementedError
 
@@ -509,6 +511,7 @@
 
     is_revlog = False
 
+    maybe_volatile = attr.ib()
     _entry_path = attr.ib()
     _is_volatile = attr.ib(default=False)
     _file_size = attr.ib(default=None)
@@ -525,6 +528,7 @@
         self._is_volatile = is_volatile
         self._file_size = file_size
         self._files = None
+        self.maybe_volatile = is_volatile
 
     def files(self) -> List[StoreFile]:
         if self._files is None:
@@ -546,6 +550,7 @@
 
     revlog_type = attr.ib(default=None)
     target_id = attr.ib(default=None)
+    maybe_volatile = attr.ib(default=True)
     _path_prefix = attr.ib(default=None)
     _details = attr.ib(default=None)
     _files = attr.ib(default=None)
@@ -562,6 +567,12 @@
         self.target_id = target_id
         self._path_prefix = path_prefix
         assert b'.i' in details, (path_prefix, details)
+        for ext in details:
+            if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
+                self.maybe_volatile = True
+                break
+        else:
+            self.maybe_volatile = False
         self._details = details
         self._files = None
 
--- a/mercurial/streamclone.py	Tue Mar 26 13:34:05 2024 +0000
+++ b/mercurial/streamclone.py	Tue Mar 26 13:46:44 2024 +0000
@@ -722,10 +722,12 @@
     with TempCopyManager() as copy, progress:
         # create a copy of volatile files
         for k, vfs, e in entries:
-            for f in e.files():
-                if f.is_volatile:
-                    f.file_size(vfs)  # record the expected size under lock
-                    copy(vfs.join(f.unencoded_path))
+            if e.maybe_volatile:
+                for f in e.files():
+                    if f.is_volatile:
+                        # record the expected size under lock
+                        f.file_size(vfs)
+                        copy(vfs.join(f.unencoded_path))
         # the first yield release the lock on the repository
         yield None