Mercurial > hg
changeset 38781:0a57945aaf7f
manifest: persist the manifestfulltext cache
Reconstructing the manifest from the revlog takes time, so much so that there
already is a LRU cache to avoid having to load a manifest multiple times.
This patch persists that LRU cache in the .hg/cache directory, so we can re-use
this cache across hg commands. Commit benchmark (run on Macos 10.13 on a
2017-model Macbook Pro with Core i7 2.9GHz and flash drive), testing without
and with patch run 5 times, baseline is r2a227782e754:
* committing to an existing file, against the mozilla-central repository.
Baseline real time average 1.9692, with patch 1.3786.
A new debugcommand "hg debugmanifestfulltextcache" lets you inspect the cache,
clear it, or add specific manifest nodeids to it. When calling
repo.updatecaches(), the manifest(s) for the working copy parents are added to
the cache.
The hg perfmanifest command has an additional --clear-disk switch to clear this
cache when testing manifest loading performance.
Using this command to test performance on the firefox repository for revision
f947d902ed91, whose manifest has a delta chain length of 60540, we see:
$ hg perfmanifest f947d902ed91 --clear-disk
! wall 0.972253 comb 0.970000 user 0.850000 sys 0.120000 (best of 10)
$ hg debugmanifestfulltextcache -a `hg log --debug -r f947d902ed91 | grep manifest | cut -d: -f3`
Cache contains 1 manifest entries, in order of most to least recent:
id: 0294517df4aad07c70701db43bc7ff24c3ce7dbc, size 25.6 MB
Total cache data size 25.6 MB, on-disk 0 bytes
$ hg perfmanifest f947d902ed91
! wall 0.036748 comb 0.040000 user 0.020000 sys 0.020000 (best of 100)
Worst-case scenario: a manifest text loaded from a single delta; in the firefox
repository manifest node 9a1246ff762e is the chain base for the manifest
attached to revision f947d902ed91. Loading this from a full cache file is just
as fast as without the cache; the extra node ids ensure a big full cache:
$ for node in 9a1246ff762e 1a1922c14a3e 54a31d11a36a 0294517df4aa; do
> hgd debugmanifestfulltextcache -a $node > /dev/null
> done
$ hgd perfmanifest -m 9a1246ff762e
! wall 0.077513 comb 0.080000 user 0.030000 sys 0.050000 (best of 100)
$ hgd perfmanifest -m 9a1246ff762e --clear-disk
! wall 0.078547 comb 0.080000 user 0.070000 sys 0.010000 (best of 100)
author | Martijn Pieters <mj@zopatista.com> |
---|---|
date | Tue, 31 Jul 2018 19:37:54 +0200 |
parents | ddb15a83ae0b |
children | 7eba8f83129b |
files | contrib/perf.py mercurial/debugcommands.py mercurial/localrepo.py mercurial/manifest.py tests/test-clone.t tests/test-completion.t tests/test-debugcommands.t tests/test-fncache.t tests/test-hardlinks.t tests/test-help.t tests/test-inherit-mode.t tests/test-share.t |
diffstat | 12 files changed, 215 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/perf.py Tue Jul 31 19:37:48 2018 +0200 +++ b/contrib/perf.py Tue Jul 31 19:37:54 2018 +0200 @@ -807,7 +807,7 @@ else: t = repo.manifestlog._revlog.lookup(rev) def d(): - repo.manifestlog.clearcaches() + repo.manifestlog.clearcaches(clear_persisted_data=clear_disk) repo.manifestlog[t].read() timer(d) fm.end()
--- a/mercurial/debugcommands.py Tue Jul 31 19:37:48 2018 +0200 +++ b/mercurial/debugcommands.py Tue Jul 31 19:37:54 2018 +0200 @@ -1446,6 +1446,53 @@ return held +@command('debugmanifestfulltextcache', [ + ('', 'clear', False, _('clear the cache')), + ('a', 'add', '', _('add the given manifest node to the cache'), + _('NODE')) + ], '') +def debugmanifestfulltextcache(ui, repo, add=None, **opts): + """show, clear or amend the contents of the manifest fulltext cache""" + with repo.lock(): + r = repo.manifestlog._revlog + try: + cache = r._fulltextcache + except AttributeError: + ui.warn(_( + "Current revlog implementation doesn't appear to have a " + 'manifest fulltext cache\n')) + return + + if opts.get(r'clear'): + cache.clear() + + if add: + try: + manifest = repo.manifestlog[r.lookup(add)] + except error.LookupError as e: + raise error.Abort(e, hint="Check your manifest node id") + manifest.read() # stores revisision in cache too + + if not len(cache): + ui.write(_('Cache empty')) + else: + ui.write( + _('Cache contains %d manifest entries, in order of most to ' + 'least recent:\n') % (len(cache),)) + totalsize = 0 + for nodeid in cache: + # Use cache.get to not update the LRU order + data = cache.get(nodeid) + size = len(data) + totalsize += size + 24 # 20 bytes nodeid, 4 bytes size + ui.write(_('id: %s, size %s\n') % ( + hex(nodeid), util.bytecount(size))) + ondisk = cache._opener.stat('manifestfulltextcache').st_size + ui.write( + _('Total cache data size %s, on-disk %s\n') % ( + util.bytecount(totalsize), util.bytecount(ondisk)) + ) + @command('debugmergestate', [], '') def debugmergestate(ui, repo, *args): """print merge state
--- a/mercurial/localrepo.py Tue Jul 31 19:37:48 2018 +0200 +++ b/mercurial/localrepo.py Tue Jul 31 19:37:54 2018 +0200 @@ -1612,6 +1612,10 @@ rbc.branchinfo(r) rbc.write() + # ensure the working copy parents are in the manifestfulltextcache + for ctx in self['.'].parents(): + ctx.manifest() # accessing the manifest is enough + def invalidatecaches(self): if '_tagscache' in vars(self):
--- a/mercurial/manifest.py Tue Jul 31 19:37:48 2018 +0200 +++ b/mercurial/manifest.py Tue Jul 31 19:37:54 2018 +0200 @@ -10,6 +10,7 @@ import heapq import itertools import struct +import weakref from .i18n import _ from .node import ( @@ -1136,6 +1137,115 @@ for subtree in subm.walksubtrees(matcher=matcher): yield subtree +class manifestfulltextcache(util.lrucachedict): + """File-backed LRU cache for the manifest cache + + File consists of entries, up to EOF: + + - 20 bytes node, 4 bytes length, <length> manifest data + + These are written in reverse cache order (oldest to newest). + + """ + def __init__(self, max): + super(manifestfulltextcache, self).__init__(max) + self._dirty = False + self._read = False + self._opener = None + + def read(self): + if self._read or self._opener is None: + return + + try: + with self._opener('manifestfulltextcache') as fp: + set = super(manifestfulltextcache, self).__setitem__ + # ignore trailing data, this is a cache, corruption is skipped + while True: + node = fp.read(20) + if len(node) < 20: + break + try: + size = struct.unpack('>L', fp.read(4))[0] + except struct.error: + break + value = bytearray(fp.read(size)) + if len(value) != size: + break + set(node, value) + except IOError: + # the file is allowed to be missing + pass + + self._read = True + self._dirty = False + + def write(self): + if not self._dirty or self._opener is None: + return + # rotate backwards to the first used node + with self._opener( + 'manifestfulltextcache', 'w', atomictemp=True, checkambig=True + ) as fp: + node = self._head.prev + while True: + if node.key in self._cache: + fp.write(node.key) + fp.write(struct.pack('>L', len(node.value))) + fp.write(node.value) + if node is self._head: + break + node = node.prev + + def __len__(self): + if not self._read: + self.read() + return super(manifestfulltextcache, self).__len__() + + def __contains__(self, k): + if not self._read: + self.read() + return super(manifestfulltextcache, self).__contains__(k) + + def __iter__(self): + if not self._read: + self.read() + return super(manifestfulltextcache, self).__iter__() + + def __getitem__(self, k): + if not self._read: + self.read() + # the cache lru order can change on read + setdirty = self._cache.get(k) is not self._head + value = super(manifestfulltextcache, self).__getitem__(k) + if setdirty: + self._dirty = True + return value + + def __setitem__(self, k, v): + if not self._read: + self.read() + super(manifestfulltextcache, self).__setitem__(k, v) + self._dirty = True + + def __delitem__(self, k): + if not self._read: + self.read() + super(manifestfulltextcache, self).__delitem__(k) + self._dirty = True + + def get(self, k, default=None): + if not self._read: + self.read() + return super(manifestfulltextcache, self).get(k, default=default) + + def clear(self, clear_persisted_data=False): + super(manifestfulltextcache, self).clear() + if clear_persisted_data: + self._dirty = True + self.write() + self._read = False + class manifestrevlog(revlog.revlog): '''A revlog that stores manifest texts. This is responsible for caching the full-text manifest contents. @@ -1164,7 +1274,7 @@ self._treeondisk = optiontreemanifest or treemanifest - self._fulltextcache = util.lrucachedict(cachesize) + self._fulltextcache = manifestfulltextcache(cachesize) if dir: assert self._treeondisk, 'opts is %r' % opts @@ -1186,13 +1296,35 @@ checkambig=not bool(dir), mmaplargeindex=True) + def _setupmanifestcachehooks(self, repo): + """Persist the manifestfulltextcache on lock release""" + if not util.safehasattr(repo, '_lockref'): + return + + self._fulltextcache._opener = repo.cachevfs + reporef = weakref.ref(repo) + manifestrevlogref = weakref.ref(self) + + def persistmanifestcache(): + repo = reporef() + self = manifestrevlogref() + if repo is None or self is None: + return + if repo.manifestlog._revlog is not self: + # there's a different manifest in play now, abort + return + self._fulltextcache.write() + + if repo._currentlock(repo._lockref) is not None: + repo._afterlock(persistmanifestcache) + @property def fulltextcache(self): return self._fulltextcache - def clearcaches(self): + def clearcaches(self, clear_persisted_data=False): super(manifestrevlog, self).clearcaches() - self._fulltextcache.clear() + self._fulltextcache.clear(clear_persisted_data=clear_persisted_data) self._dirlogcache = {'': self} def dirlog(self, d): @@ -1288,6 +1420,7 @@ self._treeinmem = usetreemanifest self._revlog = repo._constructmanifest() + self._revlog._setupmanifestcachehooks(repo) self._narrowmatch = repo.narrowmatch() # A cache of the manifestctx or treemanifestctx for each directory @@ -1345,9 +1478,9 @@ mancache[node] = m return m - def clearcaches(self): + def clearcaches(self, clear_persisted_data=False): self._dirmancache.clear() - self._revlog.clearcaches() + self._revlog.clearcaches(clear_persisted_data=clear_persisted_data) def rev(self, node): return self._revlog.rev(node) @@ -1421,9 +1554,12 @@ self._data = manifestdict() else: rl = self._revlog() - text = rl.revision(self._node) - arraytext = bytearray(text) - rl._fulltextcache[self._node] = arraytext + if self._node in rl._fulltextcache: + text = pycompat.bytestr(rl._fulltextcache[self._node]) + else: + text = rl.revision(self._node) + arraytext = bytearray(text) + rl._fulltextcache[self._node] = arraytext self._data = manifestdict(text) return self._data @@ -1523,9 +1659,12 @@ m.setnode(self._node) self._data = m else: - text = rl.revision(self._node) - arraytext = bytearray(text) - rl.fulltextcache[self._node] = arraytext + if self._node in rl.fulltextcache: + text = pycompat.bytestr(rl.fulltextcache[self._node]) + else: + text = rl.revision(self._node) + arraytext = bytearray(text) + rl.fulltextcache[self._node] = arraytext self._data = treemanifest(dir=self._dir, text=text) return self._data
--- a/tests/test-clone.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-clone.t Tue Jul 31 19:37:54 2018 +0200 @@ -47,6 +47,7 @@ checklink (symlink !) checklink-target (symlink !) checknoexec (execbit !) + manifestfulltextcache rbc-names-v1 rbc-revs-v1
--- a/tests/test-completion.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-completion.t Tue Jul 31 19:37:54 2018 +0200 @@ -98,6 +98,7 @@ debugknown debuglabelcomplete debuglocks + debugmanifestfulltextcache debugmergestate debugnamecomplete debugobsolete @@ -284,6 +285,7 @@ debugknown: debuglabelcomplete: debuglocks: force-lock, force-wlock, set-lock, set-wlock + debugmanifestfulltextcache: clear, add debugmergestate: debugnamecomplete: debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template
--- a/tests/test-debugcommands.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-debugcommands.t Tue Jul 31 19:37:54 2018 +0200 @@ -411,6 +411,7 @@ $ ls -r .hg/cache/* .hg/cache/rbc-revs-v1 .hg/cache/rbc-names-v1 + .hg/cache/manifestfulltextcache .hg/cache/branch2-served Test debugcolor
--- a/tests/test-fncache.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-fncache.t Tue Jul 31 19:37:54 2018 +0200 @@ -88,6 +88,7 @@ .hg/00manifest.i .hg/cache .hg/cache/branch2-served + .hg/cache/manifestfulltextcache .hg/cache/rbc-names-v1 .hg/cache/rbc-revs-v1 .hg/data @@ -121,6 +122,7 @@ .hg/00changelog.i .hg/cache .hg/cache/branch2-served + .hg/cache/manifestfulltextcache .hg/cache/rbc-names-v1 .hg/cache/rbc-revs-v1 .hg/dirstate
--- a/tests/test-hardlinks.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-hardlinks.t Tue Jul 31 19:37:54 2018 +0200 @@ -241,6 +241,7 @@ 2 r4/.hg/cache/checkisexec (execbit !) ? r4/.hg/cache/checklink-target (glob) (symlink !) 2 r4/.hg/cache/checknoexec (execbit !) + 2 r4/.hg/cache/manifestfulltextcache 2 r4/.hg/cache/rbc-names-v1 2 r4/.hg/cache/rbc-revs-v1 2 r4/.hg/dirstate @@ -291,6 +292,7 @@ 2 r4/.hg/cache/checkisexec (execbit !) 2 r4/.hg/cache/checklink-target (symlink !) 2 r4/.hg/cache/checknoexec (execbit !) + 2 r4/.hg/cache/manifestfulltextcache 2 r4/.hg/cache/rbc-names-v1 2 r4/.hg/cache/rbc-revs-v1 1 r4/.hg/dirstate
--- a/tests/test-help.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-help.t Tue Jul 31 19:37:54 2018 +0200 @@ -966,6 +966,9 @@ debuginstall test Mercurial installation debugknown test whether node ids are known to a repo debuglocks show or modify state of locks + debugmanifestfulltextcache + show, clear or amend the contents of the manifest fulltext + cache debugmergestate print merge state debugnamecomplete
--- a/tests/test-inherit-mode.t Tue Jul 31 19:37:48 2018 +0200 +++ b/tests/test-inherit-mode.t Tue Jul 31 19:37:54 2018 +0200 @@ -69,6 +69,7 @@ 00600 ./.hg/00changelog.i 00770 ./.hg/cache/ 00660 ./.hg/cache/branch2-served + 00660 ./.hg/cache/manifestfulltextcache 00660 ./.hg/cache/rbc-names-v1 00660 ./.hg/cache/rbc-revs-v1 00660 ./.hg/dirstate