upgrade: no longer keep all revlogs in memory at any point
Keeping all object open is unsustainable, so we will open them on demand. This
mean opening them multiple times, but this is a lesser evil.
Each revlog consume a small amount of memory (index content, associated nodemap,
etc). While there are few "big" revlog, the sheer amount of small filelog can
become a significant issue memory wise, consuming multiple GB of memory. If you
combines this extra usage with the use of multiprocessing, this usage can
quickly get out of control. This can effectively block the upgrade of larger
repository. This changeset fixes this issue.
--- a/mercurial/upgrade_utils/engine.py Wed Nov 02 14:23:09 2022 -0400
+++ b/mercurial/upgrade_utils/engine.py Fri Nov 04 16:15:12 2022 -0400
@@ -233,18 +233,18 @@
# This is for the separate progress bars.
if rl_type & store.FILEFLAGS_CHANGELOG:
- changelogs[unencoded] = (rl_type, rl)
+ changelogs[unencoded] = rl_type
crevcount += len(rl)
csrcsize += datasize
crawsize += rawsize
elif rl_type & store.FILEFLAGS_MANIFESTLOG:
- manifests[unencoded] = (rl_type, rl)
+ manifests[unencoded] = rl_type
mcount += 1
mrevcount += len(rl)
msrcsize += datasize
mrawsize += rawsize
elif rl_type & store.FILEFLAGS_FILELOG:
- filelogs[unencoded] = (rl_type, rl)
+ filelogs[unencoded] = rl_type
fcount += 1
frevcount += len(rl)
fsrcsize += datasize
@@ -289,7 +289,9 @@
)
)
progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
- for unencoded, (rl_type, oldrl) in sorted(filelogs.items()):
+ for unencoded, rl_type in sorted(filelogs.items()):
+ oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
+
newrl = _perform_clone(
ui,
dstrepo,
@@ -329,7 +331,8 @@
progress = srcrepo.ui.makeprogress(
_(b'manifest revisions'), total=mrevcount
)
- for unencoded, (rl_type, oldrl) in sorted(manifests.items()):
+ for unencoded, rl_type in sorted(manifests.items()):
+ oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
newrl = _perform_clone(
ui,
dstrepo,
@@ -368,7 +371,8 @@
progress = srcrepo.ui.makeprogress(
_(b'changelog revisions'), total=crevcount
)
- for unencoded, (rl_type, oldrl) in sorted(changelogs.items()):
+ for unencoded, rl_type in sorted(changelogs.items()):
+ oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
newrl = _perform_clone(
ui,
dstrepo,