# HG changeset patch # User Boris Feld # Date 1538556809 -7200 # Node ID 164b2e77f9a58e89907db8b1170c0d7af66f8e75 # Parent d6ec45b79277eeefea92374ddf497ed1f296a486 perf: introduce a perfrevlogwrite command The command record times taken by adding many revisions to a revlog. Timing each addition, individually. The "added revision" are recreations of the original ones. To time each addition individually, we have to handle the timing and the reporting ourselves. This command is introduced to track the impact of sparse-revlog format on delta computations at initial storage time. It starts with the full text, a situation similar to the "commit". Additions from an existing delta are better timed with bundles. The complaints from `check-perf-code.py` are not relevant. We are accessing and "revlog" opener, not a repository opener. diff -r d6ec45b79277 -r 164b2e77f9a5 contrib/perf.py --- a/contrib/perf.py Tue Nov 06 10:41:00 2018 -0500 +++ b/contrib/perf.py Wed Oct 03 10:53:29 2018 +0200 @@ -24,8 +24,10 @@ import gc import os import random +import shutil import struct import sys +import tempfile import threading import time from mercurial import ( @@ -1565,6 +1567,161 @@ timer(d) fm.end() +@command(b'perfrevlogwrite', revlogopts + formatteropts + + [(b's', b'startrev', 1000, b'revision to start writing at'), + (b'', b'stoprev', -1, b'last revision to write'), + (b'', b'count', 3, b'last revision to write'), + ], + b'-c|-m|FILE') +def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts): + """Benchmark writing a series of revisions to a revlog. + """ + opts = _byteskwargs(opts) + + rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts) + rllen = getlen(ui)(rl) + if startrev < 0: + startrev = rllen + startrev + if stoprev < 0: + stoprev = rllen + stoprev + + ### actually gather results + count = opts['count'] + if count <= 0: + raise error.Abort('invalide run count: %d' % count) + allresults = [] + for c in range(count): + allresults.append(_timeonewrite(ui, rl, startrev, stoprev, c + 1)) + + ### consolidate the results in a single list + results = [] + for idx, (rev, t) in enumerate(allresults[0]): + ts = [t] + for other in allresults[1:]: + orev, ot = other[idx] + assert orev == rev + ts.append(ot) + results.append((rev, ts)) + resultcount = len(results) + + ### Compute and display relevant statistics + + # get a formatter + fm = ui.formatter(b'perf', opts) + displayall = ui.configbool(b"perf", b"all-timing", False) + + # sorts results by median time + results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2]) + # list of (name, index) to display) + relevants = [ + ("min", 0), + ("10%", resultcount * 10 // 100), + ("25%", resultcount * 25 // 100), + ("50%", resultcount * 70 // 100), + ("75%", resultcount * 75 // 100), + ("90%", resultcount * 90 // 100), + ("95%", resultcount * 95 // 100), + ("99%", resultcount * 99 // 100), + ("max", -1), + ] + for name, idx in relevants: + data = results[idx] + title = '%s of %d, rev %d' % (name, resultcount, data[0]) + formatone(fm, data[1], title=title, displayall=displayall) + + # XXX summing that many float will not be very precise, we ignore this fact + # for now + totaltime = [] + for item in allresults: + totaltime.append((sum(x[1][0] for x in item), + sum(x[1][1] for x in item), + sum(x[1][2] for x in item),) + ) + formatone(fm, totaltime, title="total time (%d revs)" % resultcount, + displayall=displayall) + fm.end() + +class _faketr(object): + def add(s, x, y, z=None): + return None + +def _timeonewrite(ui, orig, startrev, stoprev, runidx=None): + timings = [] + tr = _faketr() + with _temprevlog(ui, orig, startrev) as dest: + revs = list(orig.revs(startrev, stoprev)) + total = len(revs) + topic = 'adding' + if runidx is not None: + topic += ' (run #%d)' % runidx + for idx, rev in enumerate(revs): + ui.progress(topic, idx, unit='revs', total=total) + addargs, addkwargs = _getrevisionseed(orig, rev, tr) + with timeone() as r: + dest.addrawrevision(*addargs, **addkwargs) + timings.append((rev, r[0])) + ui.progress(topic, total, unit='revs', total=total) + ui.progress(topic, None, unit='revs', total=total) + return timings + +def _getrevisionseed(orig, rev, tr): + linkrev = orig.linkrev(rev) + node = orig.node(rev) + p1, p2 = orig.parents(node) + flags = orig.flags(rev) + cachedelta = None + text = orig.revision(rev) + + return ((text, tr, linkrev, p1, p2), + {'node': node, 'flags': flags, 'cachedelta': cachedelta}) + +@contextlib.contextmanager +def _temprevlog(ui, orig, truncaterev): + from mercurial import vfs as vfsmod + + if orig._inline: + raise error.Abort('not supporting inline revlog (yet)') + + origindexpath = orig.opener.join(orig.indexfile) + origdatapath = orig.opener.join(orig.datafile) + indexname = 'revlog.i' + dataname = 'revlog.d' + + tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-') + try: + # copy the data file in a temporary directory + ui.debug('copying data in %s\n' % tmpdir) + destindexpath = os.path.join(tmpdir, 'revlog.i') + destdatapath = os.path.join(tmpdir, 'revlog.d') + shutil.copyfile(origindexpath, destindexpath) + shutil.copyfile(origdatapath, destdatapath) + + # remove the data we want to add again + ui.debug('truncating data to be rewritten\n') + with open(destindexpath, 'ab') as index: + index.seek(0) + index.truncate(truncaterev * orig._io.size) + with open(destdatapath, 'ab') as data: + data.seek(0) + data.truncate(orig.start(truncaterev)) + + # instantiate a new revlog from the temporary copy + ui.debug('truncating adding to be rewritten\n') + vfs = vfsmod.vfs(tmpdir) + vfs.options = getattr(orig.opener, 'options', None) + + dest = revlog.revlog(vfs, + indexfile=indexname, + datafile=dataname) + if dest._inline: + raise error.Abort('not supporting inline revlog (yet)') + # make sure internals are initialized + dest.revision(len(dest) - 1) + yield dest + del dest, vfs + finally: + shutil.rmtree(tmpdir, True) + @command(b'perfrevlogchunks', revlogopts + formatteropts + [(b'e', b'engines', b'', b'compression engines to use'), (b's', b'startrev', 0, b'revision to start at')], diff -r d6ec45b79277 -r 164b2e77f9a5 tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t Tue Nov 06 10:41:00 2018 -0500 +++ b/tests/test-contrib-perf.t Wed Oct 03 10:53:29 2018 +0200 @@ -114,6 +114,8 @@ Benchmark obtaining a revlog revision. perfrevlogrevisions Benchmark reading a series of revisions from a revlog. + perfrevlogwrite + Benchmark writing a series of revisions to a revlog. perfrevrange (no help text available) perfrevset benchmark the execution time of a revset perfstartup (no help text available) @@ -265,4 +267,16 @@ contrib/perf.py:\d+: (re) > from mercurial import ( import newer module separately in try clause for early Mercurial + contrib/perf.py:\d+: (re) + > origindexpath = orig.opener.join(orig.indexfile) + use getvfs()/getsvfs() for early Mercurial + contrib/perf.py:\d+: (re) + > origdatapath = orig.opener.join(orig.datafile) + use getvfs()/getsvfs() for early Mercurial + contrib/perf.py:\d+: (re) + > vfs = vfsmod.vfs(tmpdir) + use getvfs()/getsvfs() for early Mercurial + contrib/perf.py:\d+: (re) + > vfs.options = getattr(orig.opener, 'options', None) + use getvfs()/getsvfs() for early Mercurial [1]