Mercurial > hg
changeset 42462:bc4373babd04
revlog: add the option to track the expected compression upper bound
There are various optimization we can do if we can estimate the size of delta
before actually spending CPU compressing them. So we add a attributed dedicated
to tracking that.
We only use it on Manifest because (1) it structure is quite stable across all
Mercurial repository so its compression ratio is fairly universal. This is the
revlog with most extreme delta (cf the sparse-revlog optimization).
This will be put to use in later changesets.
Right now the compression upper bound is set to 10. This is a fairly
conservative value (observed value is more around 3), but I prefer to be safe
while introducing the optimization principles. We can tune the optimization
threshold later.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 26 Apr 2019 00:28:22 +0200 |
parents | 74e2f4b609f6 |
children | a0b26fc8fbba |
files | contrib/perf.py mercurial/manifest.py mercurial/revlog.py |
diffstat | 3 files changed, 18 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/perf.py Wed Jun 12 17:30:24 2019 +0100 +++ b/contrib/perf.py Fri Apr 26 00:28:22 2019 +0200 @@ -2277,6 +2277,10 @@ if orig._inline: raise error.Abort('not supporting inline revlog (yet)') + revlogkwargs = {} + k = 'upperboundcomp' + if util.safehasattr(orig, k): + revlogkwargs[k] = getattr(orig, k) origindexpath = orig.opener.join(orig.indexfile) origdatapath = orig.opener.join(orig.datafile) @@ -2308,7 +2312,7 @@ dest = revlog.revlog(vfs, indexfile=indexname, - datafile=dataname) + datafile=dataname, **revlogkwargs) if dest._inline: raise error.Abort('not supporting inline revlog (yet)') # make sure internals are initialized
--- a/mercurial/manifest.py Wed Jun 12 17:30:24 2019 +0100 +++ b/mercurial/manifest.py Fri Apr 26 00:28:22 2019 +0200 @@ -1417,6 +1417,10 @@ self.write() self._read = False +# and upper bound of what we expect from compression +# (real live value seems to be "3") +MAXCOMPRESSION = 10 + @interfaceutil.implementer(repository.imanifeststorage) class manifestrevlog(object): '''A revlog that stores manifest texts. This is responsible for caching the @@ -1467,7 +1471,8 @@ self._revlog = revlog.revlog(opener, indexfile, # only root indexfile is cached checkambig=not bool(tree), - mmaplargeindex=True) + mmaplargeindex=True, + upperboundcomp=MAXCOMPRESSION) self.index = self._revlog.index self.version = self._revlog.version
--- a/mercurial/revlog.py Wed Jun 12 17:30:24 2019 +0100 +++ b/mercurial/revlog.py Fri Apr 26 00:28:22 2019 +0200 @@ -334,15 +334,21 @@ configured threshold. If censorable is True, the revlog can have censored revisions. + + If `upperboundcomp` is not None, this is the expected maximal gain from + compression for the data content. """ def __init__(self, opener, indexfile, datafile=None, checkambig=False, - mmaplargeindex=False, censorable=False): + mmaplargeindex=False, censorable=False, + upperboundcomp=None): """ create a revlog object opener is a function that abstracts the file opening operation and can be used to implement COW semantics or the like. + """ + self.upperboundcomp = upperboundcomp self.indexfile = indexfile self.datafile = datafile or (indexfile[:-2] + ".d") self.opener = opener