stream-clone: disable gc for `_entries_walk` duration
The number of small container created turn Python in a gc-frenzy that seriously
impact performance.
This significantly boost performance. The following number comes from a large
private repository using perf::stream-locked-section:
base-line: 35.04 seconds
prev-change: 24.51 seconds (-30%)
this-change: 20.88 seconds (-40% from baseline; -15% from previous changes)
--- a/mercurial/streamclone.py Tue Mar 26 13:28:52 2024 +0000
+++ b/mercurial/streamclone.py Tue Mar 26 13:32:46 2024 +0000
@@ -770,23 +770,26 @@
matcher = narrowspec.match(repo.root, includes, excludes)
phase = not repo.publishing()
- entries = _walkstreamfiles(
- repo,
- matcher,
- phase=phase,
- obsolescence=includeobsmarkers,
- )
- for entry in entries:
- yield (_srcstore, entry)
+ # Python is getting crazy at all the small container we creates, disabling
+ # the gc while we do so helps performance a lot.
+ with util.nogc():
+ entries = _walkstreamfiles(
+ repo,
+ matcher,
+ phase=phase,
+ obsolescence=includeobsmarkers,
+ )
+ for entry in entries:
+ yield (_srcstore, entry)
- for name in cacheutil.cachetocopy(repo):
- if repo.cachevfs.exists(name):
- # not really a StoreEntry, but close enough
- entry = store.SimpleStoreEntry(
- entry_path=name,
- is_volatile=True,
- )
- yield (_srccache, entry)
+ for name in cacheutil.cachetocopy(repo):
+ if repo.cachevfs.exists(name):
+ # not really a StoreEntry, but close enough
+ entry = store.SimpleStoreEntry(
+ entry_path=name,
+ is_volatile=True,
+ )
+ yield (_srccache, entry)
def generatev2(repo, includes, excludes, includeobsmarkers):