transaction: split new files into a separate set
Journal entries with size 0 are common as they represent new revlog
files. Move them from the dictionary into a set as the former is more
dense. This reduces peak RSS by 70MB for the NetBSD test repository with
around 450k files under .hg/store.
Differential Revision: https://phab.mercurial-scm.org/D9278
--- a/mercurial/repair.py Sat Nov 07 21:34:09 2020 +0100
+++ b/mercurial/repair.py Sat Nov 07 22:31:29 2020 +0100
@@ -210,6 +210,7 @@
# using append-only files. We'll need some kind of storage
# API to handle stripping for us.
oldfiles = set(tr._offsetmap.keys())
+ oldfiles.update(tr._newfiles)
tr.startgroup()
cl.strip(striprev, tr)
--- a/mercurial/transaction.py Sat Nov 07 21:34:09 2020 +0100
+++ b/mercurial/transaction.py Sat Nov 07 22:31:29 2020 +0100
@@ -159,6 +159,7 @@
self._vfsmap = vfsmap
self._after = after
self._offsetmap = {}
+ self._newfiles = set()
self._journal = journalname
self._undoname = undoname
self._queue = []
@@ -248,7 +249,11 @@
@active
def add(self, file, offset):
"""record the state of an append-only file before update"""
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
if self._queue:
self._queue[-1].append((file, offset))
@@ -258,9 +263,16 @@
def _addentry(self, file, offset):
"""add a append-only entry to memory and on-disk state"""
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
- self._offsetmap[file] = offset
+ if offset:
+ self._offsetmap[file] = offset
+ else:
+ self._newfiles.add(file)
# add enough data to the journal to do the truncate
self._file.write(b"%s\0%d\n" % (file, offset))
self._file.flush()
@@ -280,7 +292,11 @@
msg = b'cannot use transaction.addbackup inside "group"'
raise error.ProgrammingError(msg)
- if file in self._offsetmap or file in self._backupmap:
+ if (
+ file in self._newfiles
+ or file in self._offsetmap
+ or file in self._backupmap
+ ):
return
vfs = self._vfsmap[location]
dirname, filename = vfs.split(file)
@@ -394,6 +410,8 @@
@active
def findoffset(self, file):
+ if file in self._newfiles:
+ return 0
return self._offsetmap.get(file)
@active
@@ -411,10 +429,19 @@
replace can only replace already committed entries
that are not pending in the queue
'''
-
- if file not in self._offsetmap:
+ if file in self._newfiles:
+ if not offset:
+ return
+ self._newfiles.remove(file)
+ self._offsetmap[file] = offset
+ elif file in self._offsetmap:
+ if not offset:
+ del self._offsetmap[file]
+ self._newfiles.add(file)
+ else:
+ self._offsetmap[file] = offset
+ else:
raise KeyError(file)
- self._offsetmap[file] = offset
self._file.write(b"%s\0%d\n" % (file, offset))
self._file.flush()
@@ -555,6 +582,7 @@
b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
)
self._offsetmap = {}
+ self._newfiles = set()
self._writeundo()
if self._after:
self._after()
@@ -638,7 +666,7 @@
self._backupsfile.close()
try:
- if not self._offsetmap and not self._backupentries:
+ if not entries and not self._backupentries:
if self._backupjournal:
self._opener.unlink(self._backupjournal)
if self._journal: