Mercurial > hg
changeset 51108:0250e45040f1
revlog: add a small cache of unfiltered chunk
This can provides a massive boost to the reading of multiple revision and the
computation of a valid delta chain.
This greatly help operation like `hg log --patch`, delta computation (helping
pull/unbundle), linkrev adjustment (helping copy tracing).
A first round of benchmark for `hg log --patch --limit 1000` shows improvement
in the 10-20% range on "small" repository like pypy or mercurial and large
improvements (about 33%) for more complex ones like netbeans and mozilla's.
These speeds up are consistent with the improvement to `hg pull` (from a server
sending poor deltas) I saw benchmarking this last year. Further benchmark will
be run during the freeze.
I added some configuration in the experimental space to be able to further test
the effect of various tuning for now. This feature should fit well in the
"usage/resource profile" configuration that we should land next cycle.
When it does not provides a benefit the overhead of the cache seem to be around
2%, a small price for the big improvement. In addition I believe we could shave
most of this overhead with a more efficent lru implementation.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 27 Oct 2023 08:54:41 +0200 |
parents | c2d2e5b65def |
children | 687e192dae16 |
files | mercurial/configitems.toml mercurial/localrepo.py mercurial/revlog.py |
diffstat | 3 files changed, 93 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/configitems.toml Fri Oct 27 02:57:09 2023 +0200 +++ b/mercurial/configitems.toml Fri Oct 27 08:54:41 2023 +0200 @@ -1111,6 +1111,28 @@ [[items]] section = "experimental" +name = "revlog.uncompressed-cache.enabled" +default = true +experimental = true +documentation = """Enable some caching of uncompressed chunk, greatly boosting +performance at the cost of memory usage.""" + +[[items]] +section = "experimental" +name = "revlog.uncompressed-cache.factor" +default = 4 +experimental = true +documentation = """The size of the cache compared to the largest revision seen.""" + +[[items]] +section = "experimental" +name = "revlog.uncompressed-cache.count" +default = 10000 +experimental = true +documentation = """The number of chunk cached.""" + +[[items]] +section = "experimental" name = "stream-v3" default = false
--- a/mercurial/localrepo.py Fri Oct 27 02:57:09 2023 +0200 +++ b/mercurial/localrepo.py Fri Oct 27 08:54:41 2023 +0200 @@ -1089,6 +1089,16 @@ if chunkcachesize is not None: data_config.chunk_cache_size = chunkcachesize + if ui.configbool(b'experimental', b'revlog.uncompressed-cache.enabled'): + factor = ui.configint( + b'experimental', b'revlog.uncompressed-cache.factor' + ) + count = ui.configint( + b'experimental', b'revlog.uncompressed-cache.count' + ) + data_config.uncompressed_cache_factor = factor + data_config.uncompressed_cache_count = count + delta_config.delta_both_parents = ui.configbool( b'storage', b'revlog.optimize-delta-parent-choice' )
--- a/mercurial/revlog.py Fri Oct 27 02:57:09 2023 +0200 +++ b/mercurial/revlog.py Fri Oct 27 08:54:41 2023 +0200 @@ -295,6 +295,12 @@ # How much data to read and cache into the raw revlog data cache. chunk_cache_size = attr.ib(default=65536) + # The size of the uncompressed cache compared to the largest revision seen. + uncompressed_cache_factor = attr.ib(default=None) + + # The number of chunk cached + uncompressed_cache_count = attr.ib(default=None) + # Allow sparse reading of the revlog data with_sparse_read = attr.ib(default=False) # minimal density of a sparse read chunk @@ -396,6 +402,18 @@ # 3-tuple of (node, rev, text) for a raw revision. self._revisioncache = None + # cache some uncompressed chunks + # rev → uncompressed_chunk + # + # the max cost is dynamically updated to be proportionnal to the + # size of revision we actually encounter. + self._uncompressed_chunk_cache = None + if self.data_config.uncompressed_cache_factor is not None: + self._uncompressed_chunk_cache = util.lrucachedict( + self.data_config.uncompressed_cache_count, + maxcost=65536, # some arbitrary initial value + ) + self._delay_buffer = None @property @@ -414,6 +432,8 @@ def clear_cache(self): assert not self.is_delaying self._revisioncache = None + if self._uncompressed_chunk_cache is not None: + self._uncompressed_chunk_cache.clear() self._segmentfile.clear_cache() self._segmentfile_sidedata.clear_cache() @@ -865,18 +885,26 @@ Returns a str holding uncompressed data for the requested revision. """ + if self._uncompressed_chunk_cache is not None: + uncomp = self._uncompressed_chunk_cache.get(rev) + if uncomp is not None: + return uncomp + compression_mode = self.index[rev][10] data = self.get_segment_for_revs(rev, rev)[1] if compression_mode == COMP_MODE_PLAIN: - return data + uncomp = data elif compression_mode == COMP_MODE_DEFAULT: - return self._decompressor(data) + uncomp = self._decompressor(data) elif compression_mode == COMP_MODE_INLINE: - return self.decompress(data) + uncomp = self.decompress(data) else: msg = b'unknown compression mode %d' msg %= compression_mode raise error.RevlogError(msg) + if self._uncompressed_chunk_cache is not None: + self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp)) + return uncomp def _chunks(self, revs, targetsize=None): """Obtain decompressed chunks for the specified revisions. @@ -899,17 +927,30 @@ iosize = self.index.entry_size buffer = util.buffer - l = [] - ladd = l.append + fetched_revs = [] + fadd = fetched_revs.append + chunks = [] ladd = chunks.append - if not self.data_config.with_sparse_read: - slicedchunks = (revs,) + if self._uncompressed_chunk_cache is None: + fetched_revs = revs + else: + for rev in revs: + cached_value = self._uncompressed_chunk_cache.get(rev) + if cached_value is None: + fadd(rev) + else: + ladd((rev, cached_value)) + + if not fetched_revs: + slicedchunks = () + elif not self.data_config.with_sparse_read: + slicedchunks = (fetched_revs,) else: slicedchunks = deltautil.slicechunk( self, - revs, + fetched_revs, targetsize=targetsize, ) @@ -949,7 +990,10 @@ msg %= comp_mode raise error.RevlogError(msg) ladd((rev, c)) - + if self._uncompressed_chunk_cache is not None: + self._uncompressed_chunk_cache.insert(rev, c, len(c)) + + chunks.sort() return [x[1] for x in chunks] def raw_text(self, node, rev): @@ -981,6 +1025,14 @@ if 0 <= rawsize: targetsize = 4 * rawsize + if self._uncompressed_chunk_cache is not None: + # dynamically update the uncompressed_chunk_cache size to the + # largest revision we saw in this revlog. + factor = self.data_config.uncompressed_cache_factor + candidate_size = rawsize * factor + if candidate_size > self._uncompressed_chunk_cache.maxcost: + self._uncompressed_chunk_cache.maxcost = candidate_size + bins = self._chunks(chain, targetsize=targetsize) if basetext is None: basetext = bytes(bins[0])