Mercurial > hg

--- a/contrib/perf.py	Tue Nov 06 10:41:00 2018 -0500
+++ b/contrib/perf.py	Wed Oct 03 10:53:29 2018 +0200
@@ -24,8 +24,10 @@
 import gc
 import os
 import random
+import shutil
 import struct
 import sys
+import tempfile
 import threading
 import time
 from mercurial import (
@@ -1565,6 +1567,161 @@
     timer(d)
     fm.end()

+@command(b'perfrevlogwrite', revlogopts + formatteropts +
+         [(b's', b'startrev', 1000, b'revision to start writing at'),
+          (b'', b'stoprev', -1, b'last revision to write'),
+          (b'', b'count', 3, b'last revision to write'),
+         ],
+         b'-c|-m|FILE')
+def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
+    """Benchmark writing a series of revisions to a revlog.
+    """
+    opts = _byteskwargs(opts)
+
+    rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
+    rllen = getlen(ui)(rl)
+    if startrev < 0:
+        startrev = rllen + startrev
+    if stoprev < 0:
+        stoprev = rllen + stoprev
+
+    ### actually gather results
+    count = opts['count']
+    if count <= 0:
+        raise error.Abort('invalide run count: %d' % count)
+    allresults = []
+    for c in range(count):
+        allresults.append(_timeonewrite(ui, rl, startrev, stoprev, c + 1))
+
+    ### consolidate the results in a single list
+    results = []
+    for idx, (rev, t) in enumerate(allresults[0]):
+        ts = [t]
+        for other in allresults[1:]:
+            orev, ot = other[idx]
+            assert orev == rev
+            ts.append(ot)
+        results.append((rev, ts))
+    resultcount = len(results)
+
+    ### Compute and display relevant statistics
+
+    # get a formatter
+    fm = ui.formatter(b'perf', opts)
+    displayall = ui.configbool(b"perf", b"all-timing", False)
+
+    # sorts results by median time
+    results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
+    # list of (name, index) to display)
+    relevants = [
+        ("min", 0),
+        ("10%", resultcount * 10 // 100),
+        ("25%", resultcount * 25 // 100),
+        ("50%", resultcount * 70 // 100),
+        ("75%", resultcount * 75 // 100),
+        ("90%", resultcount * 90 // 100),
+        ("95%", resultcount * 95 // 100),
+        ("99%", resultcount * 99 // 100),
+        ("max", -1),
+    ]
+    for name, idx in relevants:
+        data = results[idx]
+        title = '%s of %d, rev %d' % (name, resultcount, data[0])
+        formatone(fm, data[1], title=title, displayall=displayall)
+
+    # XXX summing that many float will not be very precise, we ignore this fact
+    # for now
+    totaltime = []
+    for item in allresults:
+        totaltime.append((sum(x[1][0] for x in item),
+                          sum(x[1][1] for x in item),
+                          sum(x[1][2] for x in item),)
+        )
+    formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
+              displayall=displayall)
+    fm.end()
+
+class _faketr(object):
+    def add(s, x, y, z=None):
+        return None
+
+def _timeonewrite(ui, orig, startrev, stoprev, runidx=None):
+    timings = []
+    tr = _faketr()
+    with _temprevlog(ui, orig, startrev) as dest:
+        revs = list(orig.revs(startrev, stoprev))
+        total = len(revs)
+        topic = 'adding'
+        if runidx is not None:
+            topic += ' (run #%d)' % runidx
+        for idx, rev in enumerate(revs):
+            ui.progress(topic, idx, unit='revs', total=total)
+            addargs, addkwargs = _getrevisionseed(orig, rev, tr)
+            with timeone() as r:
+                dest.addrawrevision(*addargs, **addkwargs)
+            timings.append((rev, r[0]))
+        ui.progress(topic, total, unit='revs', total=total)
+        ui.progress(topic, None, unit='revs', total=total)
+    return timings
+
+def _getrevisionseed(orig, rev, tr):
+    linkrev = orig.linkrev(rev)
+    node = orig.node(rev)
+    p1, p2 = orig.parents(node)
+    flags = orig.flags(rev)
+    cachedelta = None
+    text = orig.revision(rev)
+
+    return ((text, tr, linkrev, p1, p2),
+            {'node': node, 'flags': flags, 'cachedelta': cachedelta})
+
+@contextlib.contextmanager
+def _temprevlog(ui, orig, truncaterev):
+    from mercurial import vfs as vfsmod
+
+    if orig._inline:
+        raise error.Abort('not supporting inline revlog (yet)')
+
+    origindexpath = orig.opener.join(orig.indexfile)
+    origdatapath = orig.opener.join(orig.datafile)
+    indexname = 'revlog.i'
+    dataname = 'revlog.d'
+
+    tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
+    try:
+        # copy the data file in a temporary directory
+        ui.debug('copying data in %s\n' % tmpdir)
+        destindexpath = os.path.join(tmpdir, 'revlog.i')
+        destdatapath = os.path.join(tmpdir, 'revlog.d')
+        shutil.copyfile(origindexpath, destindexpath)
+        shutil.copyfile(origdatapath, destdatapath)
+
+        # remove the data we want to add again
+        ui.debug('truncating data to be rewritten\n')
+        with open(destindexpath, 'ab') as index:
+            index.seek(0)
+            index.truncate(truncaterev * orig._io.size)
+        with open(destdatapath, 'ab') as data:
+            data.seek(0)
+            data.truncate(orig.start(truncaterev))
+
+        # instantiate a new revlog from the temporary copy
+        ui.debug('truncating adding to be rewritten\n')
+        vfs = vfsmod.vfs(tmpdir)
+        vfs.options = getattr(orig.opener, 'options', None)
+
+        dest = revlog.revlog(vfs,
+                             indexfile=indexname,
+                             datafile=dataname)
+        if dest._inline:
+            raise error.Abort('not supporting inline revlog (yet)')
+        # make sure internals are initialized
+        dest.revision(len(dest) - 1)
+        yield dest
+        del dest, vfs
+    finally:
+        shutil.rmtree(tmpdir, True)
+
 @command(b'perfrevlogchunks', revlogopts + formatteropts +
          [(b'e', b'engines', b'', b'compression engines to use'),
           (b's', b'startrev', 0, b'revision to start at')],
--- a/tests/test-contrib-perf.t	Tue Nov 06 10:41:00 2018 -0500
+++ b/tests/test-contrib-perf.t	Wed Oct 03 10:53:29 2018 +0200
@@ -114,6 +114,8 @@
                  Benchmark obtaining a revlog revision.
    perfrevlogrevisions
                  Benchmark reading a series of revisions from a revlog.
+   perfrevlogwrite
+                 Benchmark writing a series of revisions to a revlog.
    perfrevrange  (no help text available)
    perfrevset    benchmark the execution time of a revset
    perfstartup   (no help text available)
@@ -265,4 +267,16 @@
   contrib/perf.py:\d+: (re)
    >     from mercurial import (
    import newer module separately in try clause for early Mercurial
+  contrib/perf.py:\d+: (re)
+   >     origindexpath = orig.opener.join(orig.indexfile)
+   use getvfs()/getsvfs() for early Mercurial
+  contrib/perf.py:\d+: (re)
+   >     origdatapath = orig.opener.join(orig.datafile)
+   use getvfs()/getsvfs() for early Mercurial
+  contrib/perf.py:\d+: (re)
+   >         vfs = vfsmod.vfs(tmpdir)
+   use getvfs()/getsvfs() for early Mercurial
+  contrib/perf.py:\d+: (re)
+   >         vfs.options = getattr(orig.opener, 'options', None)
+   use getvfs()/getsvfs() for early Mercurial
   [1]