Mercurial > hg
changeset 51375:02e7d79edf62
branchmap: use mmap for faster revbranchcache loading
A typical revbranchmap usage is:
- load the entire revbranchmap file into memory
- maybe do a few lookups
- add a few bytes to it
- write the addition to disk
There's no reason to load the entire revbranchmap into memory.
We can split it into a large immutable prefix and a mutable suffix,
and then memorymap the prefix, thus saving all the useless loading.
Benchmarking on some real-world pushes suggests that out of ~100s server-side
push handling revbranchcache handling is responsible for:
* ~7s with no change
* ~1.3s with the change, without mmap
* 0.04s with the change, with mmap
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Wed, 10 Jan 2024 18:58:42 +0000 |
parents | 54a75576287a |
children | 0f3a091d887b |
files | mercurial/branchmap.py mercurial/configitems.toml tests/test-branches.t |
diffstat | 3 files changed, 95 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/branchmap.py Fri Feb 02 04:46:54 2024 +0100 +++ b/mercurial/branchmap.py Wed Jan 10 18:58:42 2024 +0000 @@ -621,6 +621,74 @@ _rbccloseflag = 0x80000000 +class rbcrevs: + """a byte string consisting of an immutable prefix followed by a mutable suffix""" + + def __init__(self, revs): + self._prefix = revs + self._rest = bytearray() + + def __len__(self): + return len(self._prefix) + len(self._rest) + + def unpack_record(self, rbcrevidx): + if rbcrevidx < len(self._prefix): + return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx) + else: + return unpack_from( + _rbcrecfmt, + util.buffer(self._rest), + rbcrevidx - len(self._prefix), + ) + + def make_mutable(self): + if len(self._prefix) > 0: + entirety = bytearray() + entirety[:] = self._prefix + entirety.extend(self._rest) + self._rest = entirety + self._prefix = bytearray() + + def truncate(self, pos): + self.make_mutable() + del self._rest[pos:] + + def pack_into(self, rbcrevidx, node, branchidx): + if rbcrevidx < len(self._prefix): + self.make_mutable() + buf = self._rest + start_offset = rbcrevidx - len(self._prefix) + end_offset = start_offset + _rbcrecsize + + if len(self._rest) < end_offset: + # bytearray doesn't allocate extra space at least in Python 3.7. + # When multiple changesets are added in a row, precise resize would + # result in quadratic complexity. Overallocate to compensate by + # using the classic doubling technique for dynamic arrays instead. + # If there was a gap in the map before, less space will be reserved. + self._rest.extend(b'\0' * end_offset) + return pack_into( + _rbcrecfmt, + buf, + start_offset, + node, + branchidx, + ) + + def extend(self, extension): + return self._rest.extend(extension) + + def slice(self, begin, end): + if begin < len(self._prefix): + acc = bytearray() + acc[:] = self._prefix[begin:end] + acc.extend( + self._rest[begin - len(self._prefix) : end - len(self._prefix)] + ) + return acc + return self._rest[begin - len(self._prefix) : end - len(self._prefix)] + + class revbranchcache: """Persistent cache, mapping from revision number to branch name and close. This is a low level cache, independent of filtering. @@ -648,7 +716,7 @@ assert repo.filtername is None self._repo = repo self._names = [] # branch names in local encoding with static index - self._rbcrevs = bytearray() + self._rbcrevs = rbcrevs(bytearray()) self._rbcsnameslen = 0 # length of names read at _rbcsnameslen try: bndata = repo.cachevfs.read(_rbcnames) @@ -664,8 +732,12 @@ if self._names: try: - data = repo.cachevfs.read(_rbcrevs) - self._rbcrevs[:] = data + if repo.ui.configbool(b'format', b'mmap-revbranchcache'): + with repo.cachevfs(_rbcrevs) as fp: + data = util.buffer(util.mmapread(fp)) + else: + data = repo.cachevfs.read(_rbcrevs) + self._rbcrevs = rbcrevs(data) except (IOError, OSError) as inst: repo.ui.debug( b"couldn't read revision branch cache: %s\n" @@ -685,7 +757,7 @@ del self._names[:] self._rbcnamescount = 0 self._rbcrevslen = len(self._repo.changelog) - self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize) + self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize)) util.clearcachedproperty(self, b'_namesreverse') @util.propertycache @@ -708,9 +780,7 @@ # fast path: extract data from cache, use it if node is matching reponode = changelog.node(rev)[:_rbcnodelen] - cachenode, branchidx = unpack_from( - _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx - ) + cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx) close = bool(branchidx & _rbccloseflag) if close: branchidx &= _rbcbranchidxmask @@ -733,7 +803,7 @@ b"revision branch cache to revision %d\n" % rev ) truncate = rbcrevidx + _rbcrecsize - del self._rbcrevs[truncate:] + self._rbcrevs.truncate(truncate) self._rbcrevslen = min(self._rbcrevslen, truncate) # fall back to slow path and make sure it will be written to disk @@ -782,16 +852,7 @@ if rev == nullrev: return rbcrevidx = rev * _rbcrecsize - requiredsize = rbcrevidx + _rbcrecsize - rbccur = len(self._rbcrevs) - if rbccur < requiredsize: - # bytearray doesn't allocate extra space at least in Python 3.7. - # When multiple changesets are added in a row, precise resize would - # result in quadratic complexity. Overallocate to compensate by - # use the classic doubling technique for dynamic arrays instead. - # If there was a gap in the map before, less space will be reserved. - self._rbcrevs.extend(b'\0' * max(_rbcmininc, requiredsize)) - pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx) + self._rbcrevs.pack_into(rbcrevidx, node, branchidx) self._rbcrevslen = min(self._rbcrevslen, rev) tr = self._repo.currenttransaction() @@ -866,5 +927,5 @@ f.seek(start) f.truncate() end = revs * _rbcrecsize - f.write(self._rbcrevs[start:end]) + f.write(self._rbcrevs.slice(start, end)) self._rbcrevslen = revs
--- a/mercurial/configitems.toml Fri Feb 02 04:46:54 2024 +0100 +++ b/mercurial/configitems.toml Wed Jan 10 18:58:42 2024 +0000 @@ -2913,3 +2913,8 @@ name = "date-format" default = "" in_core_extension = "blackbox" + +[[items]] +section = "format" +name = "mmap-revbranchcache" +default = false
--- a/tests/test-branches.t Fri Feb 02 04:46:54 2024 +0100 +++ b/tests/test-branches.t Wed Jan 10 18:58:42 2024 +0000 @@ -1,3 +1,12 @@ +#testcases mmap nommap + +#if mmap + $ cat <<EOF >> $HGRCPATH + > [format] + > mmap-revbranchcache=true + > EOF +#endif + $ hg init a $ cd a @@ -921,17 +930,10 @@ $ f --size --hexdump .hg/cache/rbc-* .hg/cache/rbc-names-v1: size=1 0000: 61 |a| - .hg/cache/rbc-revs-v1: size=152 + .hg/cache/rbc-revs-v1: size=48 0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 0020: 00 00 00 00 00 00 00 00 d8 cb c6 1d 00 00 00 00 |................| - 0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| - 0090: 00 00 00 00 00 00 00 00 |........| $ cd ..