streamclone: stop listing files for entries that have no volatile files
This will save a lot of python related time.
This significantly boost performance. The following number comes from a large
private repository using perf::stream-locked-section:
base-line: 35.04 seconds
prev-change: 24.51 seconds (-30%)
prev-change: 20.88 seconds (-40%)
prev-change: 14.22 seconds (-60%)
this-change: 11.58 seconds (-67% from baseline; -18% from prev)
--- a/mercurial/store.py Tue Mar 26 13:34:05 2024 +0000
+++ b/mercurial/store.py Tue Mar 26 13:46:44 2024 +0000
@@ -484,6 +484,8 @@
This is returned by `store.walk` and represent some data in the store."""
+ maybe_volatile = True
+
def files(self) -> List[StoreFile]:
raise NotImplementedError
@@ -509,6 +511,7 @@
is_revlog = False
+ maybe_volatile = attr.ib()
_entry_path = attr.ib()
_is_volatile = attr.ib(default=False)
_file_size = attr.ib(default=None)
@@ -525,6 +528,7 @@
self._is_volatile = is_volatile
self._file_size = file_size
self._files = None
+ self.maybe_volatile = is_volatile
def files(self) -> List[StoreFile]:
if self._files is None:
@@ -546,6 +550,7 @@
revlog_type = attr.ib(default=None)
target_id = attr.ib(default=None)
+ maybe_volatile = attr.ib(default=True)
_path_prefix = attr.ib(default=None)
_details = attr.ib(default=None)
_files = attr.ib(default=None)
@@ -562,6 +567,12 @@
self.target_id = target_id
self._path_prefix = path_prefix
assert b'.i' in details, (path_prefix, details)
+ for ext in details:
+ if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
+ self.maybe_volatile = True
+ break
+ else:
+ self.maybe_volatile = False
self._details = details
self._files = None
--- a/mercurial/streamclone.py Tue Mar 26 13:34:05 2024 +0000
+++ b/mercurial/streamclone.py Tue Mar 26 13:46:44 2024 +0000
@@ -722,10 +722,12 @@
with TempCopyManager() as copy, progress:
# create a copy of volatile files
for k, vfs, e in entries:
- for f in e.files():
- if f.is_volatile:
- f.file_size(vfs) # record the expected size under lock
- copy(vfs.join(f.unencoded_path))
+ if e.maybe_volatile:
+ for f in e.files():
+ if f.is_volatile:
+ # record the expected size under lock
+ f.file_size(vfs)
+ copy(vfs.join(f.unencoded_path))
# the first yield release the lock on the repository
yield None