deltas: set estimated compression upper bound to "3x" instead of "10x"
In pratice, we very rarely observer compression better than "3x" on manifest
deltas. Having a more aggressive estimate significantly helps our pathological
use case on a private repository. Here are a comparison of timings using
different upper bound.
Estimated compression | ø | ×10 | ×5 | ×3 |
timing | 14.11 | 2.61 | 1.96 | 1.53 |
We also tested the impact of this series on an array of public repositories.
This shown no impact in either size nor timing.
Full data set below for those interested.
Size
----
Regarding size, not significant impact have been noticed on neither public nor
private repositories. Here are the number we gathered on public repositories:
zlib/upperbound | no | 10x | 5x | 3x
mercurial | 5 875 730 | 5 875 730 | 5 875 730 | 5 875 730
pypy | 27 782 913 | 27 782 913 | 27 782 913 | 27 782 913
netbeans | 159 161 207 | 159 161 207 | 159 161 207 | 159 959 879 (+0.5%)
mozilla-central | 323 841 642 | 323 841 642 | 323 841 642 | 319 867 519 (-2.5%)
mozilla-try | 746 649 123 | 746 649 123 | 746 649 123 | 741 155 568 (-0.7%)
private-repo | 1 485 287 294 | 1 485 287 294 | 1 485 287 294 | 1 409 248 382 (-5.1%)
zstd/upperbound | no | 10x | 5x | 3x
mercurial | 5 895 206 | 5 895 206 | 5 895 206 | 5 895 206
pypy | 28 689 230 | 28 689 230 | 28 689 230 | 28 689 230
netbeans | 157 636 387 | 157 636 387 | 157 636 387 | 159 692 678 (+1.3%)
mozilla-central | 317 650 281 | 317 650 281 | 317 650 281 | 319 613 603 (+0.6%)
mozilla-try | 737 555 275 | 737 555 275 | 737 555 275 | 738 079 473 (+0.1%)
private-repo | 1 352 362 982 | 1 352 362 982 | 1 346 961 880 | 1 361 327 384 (+0.7%)
Speed
------
Timing gathered using `hg perfrevlogwrite -m`. Value are in seconds.
mercurial
zlib | no | 10x | 5x | 3x |
total | 65.551783 | 65.388887 | 65.260658 | 65.321199 |
max | 0.034544 | 0.034571 | 0.034659 | 0.034521 |
99.99% | 0.034544 | 0.034571 | 0.034659 | 0.034521 |
zstd | no | 10x | 5x | 3x |
total | 49.118449 | 49.054062 | 48.753588 | 48.740230 |
max | 0.009338 | 0.009239 | 0.009202 | 0.009178 |
99.99% | 0.007618 | 0.007639 | 0.007626 | 0.007621 |
pypy
zlib | no | 10x | 5x | 3x |
total | 560.865984 | 558.983817 | 559.083815 | 559.349152 |
max | 0.219614 | 0.215922 | 0.218112 | 0.218107 |
99.99% | 0.219614 | 0.215922 | 0.218112 | 0.218107 |
zstd | no | 10x | 5x | 3x |
total | 349.393280 | 347.395819 | 347.185407 | 345.643985 |
max | 0.084143 | 0.083536 | 0.081834 | 0.082178 |
99.99% | 0.039445 | 0.039639 | 0.039612 | 0.039175 |
netbeans
zlib | no | 10x | 5x | 3x |
total | 33103.327727 | 33314.932260 | 33211.745233 | 33345.891778 |
max | 2.666852 | 2.672059 | 2.662453 | 2.662936 |
99.99% | 2.058772 | 2.070429 | 2.069569 | 2.064653 |
zstd | no | 10x | 5x | 3x |
total | 20112.102708 | 20095.879719 | 20083.390300 | 20123.221859 |
max | 2.063482 | 2.062851 | 2.065229 | 2.060147 |
99.99% | 1.146647 | 1.143794 | 1.142933 | 1.146529 |
mozilla
zlib | no | 10x | 5x | 3x |
total | 41374.102138 | 41418.816773 | 41381.956370 | 41334.280732 |
max | 3.383474 | 3.387400 | 3.405711 | 3.387316 |
99.99% | 1.006755 | 1.005954 | 1.007700 | 1.007373 |
zstd | no | 10x | 5x | 3x |
total | 24689.691520 | 24643.939662 | 24664.630027 | 24664.512714 |
max | 1.460822 | 1.449640 | 1.439747 | 1.465304 |
99.99% | 0.527111 | 0.527377 | 0.527807 | 0.527226 |
#!/usr/bin/env python
#
# checkseclevel - checking section title levels in each online help document
from __future__ import absolute_import
import optparse
import os
import sys
# import from the live mercurial repo
os.environ['HGMODULEPOLICY'] = 'py'
sys.path.insert(0, "..")
from mercurial import demandimport; demandimport.enable()
from mercurial import (
commands,
extensions,
help,
minirst,
ui as uimod,
)
table = commands.table
helptable = help.helptable
level2mark = [b'"', b'=', b'-', b'.', b'#']
reservedmarks = [b'"']
mark2level = {}
for m, l in zip(level2mark, range(len(level2mark))):
if m not in reservedmarks:
mark2level[m] = l
initlevel_topic = 0
initlevel_cmd = 1
initlevel_ext = 1
initlevel_ext_cmd = 3
def showavailables(ui, initlevel):
avail = (' available marks and order of them in this help: %s\n') % (
', '.join(['%r' % (m * 4) for m in level2mark[initlevel + 1:]]))
ui.warn(avail.encode('utf-8'))
def checkseclevel(ui, doc, name, initlevel):
ui.note(('checking "%s"\n') % name)
if not isinstance(doc, bytes):
doc = doc.encode('utf-8')
blocks, pruned = minirst.parse(doc, 0, ['verbose'])
errorcnt = 0
curlevel = initlevel
for block in blocks:
if block[b'type'] != b'section':
continue
mark = block[b'underline']
title = block[b'lines'][0]
if (mark not in mark2level) or (mark2level[mark] <= initlevel):
ui.warn((('invalid section mark %r for "%s" of %s\n') %
(mark * 4, title, name)).encode('utf-8'))
showavailables(ui, initlevel)
errorcnt += 1
continue
nextlevel = mark2level[mark]
if curlevel < nextlevel and curlevel + 1 != nextlevel:
ui.warn(('gap of section level at "%s" of %s\n') %
(title, name))
showavailables(ui, initlevel)
errorcnt += 1
continue
ui.note(('appropriate section level for "%s %s"\n') %
(mark * (nextlevel * 2), title))
curlevel = nextlevel
return errorcnt
def checkcmdtable(ui, cmdtable, namefmt, initlevel):
errorcnt = 0
for k, entry in cmdtable.items():
name = k.split(b"|")[0].lstrip(b"^")
if not entry[0].__doc__:
ui.note(('skip checking %s: no help document\n') %
(namefmt % name))
continue
errorcnt += checkseclevel(ui, entry[0].__doc__,
namefmt % name,
initlevel)
return errorcnt
def checkhghelps(ui):
errorcnt = 0
for h in helptable:
names, sec, doc = h[0:3]
if callable(doc):
doc = doc(ui)
errorcnt += checkseclevel(ui, doc,
'%s help topic' % names[0],
initlevel_topic)
errorcnt += checkcmdtable(ui, table, '%s command', initlevel_cmd)
for name in sorted(list(extensions.enabled()) +
list(extensions.disabled())):
mod = extensions.load(ui, name, None)
if not mod.__doc__:
ui.note(('skip checking %s extension: no help document\n') % name)
continue
errorcnt += checkseclevel(ui, mod.__doc__,
'%s extension' % name,
initlevel_ext)
cmdtable = getattr(mod, 'cmdtable', None)
if cmdtable:
errorcnt += checkcmdtable(ui, cmdtable,
'%%s command of %s extension' % name,
initlevel_ext_cmd)
return errorcnt
def checkfile(ui, filename, initlevel):
if filename == '-':
filename = 'stdin'
doc = sys.stdin.read()
else:
with open(filename) as fp:
doc = fp.read()
ui.note(('checking input from %s with initlevel %d\n') %
(filename, initlevel))
return checkseclevel(ui, doc, 'input from %s' % filename, initlevel)
def main():
optparser = optparse.OptionParser("""%prog [options]
This checks all help documents of Mercurial (topics, commands,
extensions and commands of them), if no file is specified by --file
option.
""")
optparser.add_option("-v", "--verbose",
help="enable additional output",
action="store_true")
optparser.add_option("-d", "--debug",
help="debug mode",
action="store_true")
optparser.add_option("-f", "--file",
help="filename to read in (or '-' for stdin)",
action="store", default="")
optparser.add_option("-t", "--topic",
help="parse file as help topic",
action="store_const", dest="initlevel", const=0)
optparser.add_option("-c", "--command",
help="parse file as help of core command",
action="store_const", dest="initlevel", const=1)
optparser.add_option("-e", "--extension",
help="parse file as help of extension",
action="store_const", dest="initlevel", const=1)
optparser.add_option("-C", "--extension-command",
help="parse file as help of extension command",
action="store_const", dest="initlevel", const=3)
optparser.add_option("-l", "--initlevel",
help="set initial section level manually",
action="store", type="int", default=0)
(options, args) = optparser.parse_args()
ui = uimod.ui.load()
ui.setconfig(b'ui', b'verbose', options.verbose, b'--verbose')
ui.setconfig(b'ui', b'debug', options.debug, b'--debug')
if options.file:
if checkfile(ui, options.file, options.initlevel):
sys.exit(1)
else:
if checkhghelps(ui):
sys.exit(1)
if __name__ == "__main__":
main()