perf: benchmark command for revlog indexes
authorGregory Szorc <gregory.szorc@gmail.com>
Sun, 28 May 2017 11:13:10 -0700
changeset 32565 e4f514627514
parent 32564 7236facefd4f
child 32566 d68f3d6bc214
perf: benchmark command for revlog indexes We didn't have explicit microbenchmark coverage for loading revlog indexes. That seems like a useful thing to have, so let's add it. We currently measure the low-level nodemap APIs. There is room to hook in at the actual revlog layer. This could be done as a follow-up. The hackiest thing about this patch is specifying revlog paths. Other commands have arguments that allow resolution of changelog, manifest, and filelog. I needed to hook in at a lower level of the revlog API than what the existing helper functions to resolve revlogs allowed. I was too lazy to write some new APIs. This could be done as a follow-up easily enough. Example output for `hg perfrevlogindex 00changelog.i` on my Firefox repo (404418 revisions): ! revlog constructor ! wall 0.003106 comb 0.000000 user 0.000000 sys 0.000000 (best of 912) ! read ! wall 0.003077 comb 0.000000 user 0.000000 sys 0.000000 (best of 924) ! create index object ! wall 0.000000 comb 0.000000 user 0.000000 sys 0.000000 (best of 1803994) ! retrieve index entry for rev 0 ! wall 0.000193 comb 0.000000 user 0.000000 sys 0.000000 (best of 14037) ! look up missing node ! wall 0.003313 comb 0.000000 user 0.000000 sys 0.000000 (best of 865) ! look up node at rev 0 ! wall 0.003295 comb 0.010000 user 0.010000 sys 0.000000 (best of 858) ! look up node at 1/4 len ! wall 0.002598 comb 0.010000 user 0.010000 sys 0.000000 (best of 1103) ! look up node at 1/2 len ! wall 0.001909 comb 0.000000 user 0.000000 sys 0.000000 (best of 1507) ! look up node at 3/4 len ! wall 0.001213 comb 0.000000 user 0.000000 sys 0.000000 (best of 2275) ! look up node at tip ! wall 0.000453 comb 0.000000 user 0.000000 sys 0.000000 (best of 5697) ! look up all nodes (forward) ! wall 0.094615 comb 0.100000 user 0.100000 sys 0.000000 (best of 100) ! look up all nodes (reverse) ! wall 0.045889 comb 0.050000 user 0.050000 sys 0.000000 (best of 100) ! retrieve all index entries (forward) ! wall 0.078398 comb 0.080000 user 0.060000 sys 0.020000 (best of 100) ! retrieve all index entries (reverse) ! wall 0.079376 comb 0.080000 user 0.070000 sys 0.010000 (best of 100)
contrib/perf.py
tests/test-contrib-perf.t
--- a/contrib/perf.py	Sun May 28 10:56:28 2017 -0700
+++ b/contrib/perf.py	Sun May 28 11:13:10 2017 -0700
@@ -23,6 +23,7 @@
 import gc
 import os
 import random
+import struct
 import sys
 import time
 from mercurial import (
@@ -34,6 +35,7 @@
     extensions,
     mdiff,
     merge,
+    revlog,
     util,
 )
 
@@ -857,6 +859,122 @@
         timer(d, title)
     fm.end()
 
+@command('perfrevlogindex', revlogopts + formatteropts,
+         '-c|-m|FILE')
+def perfrevlogindex(ui, repo, file_=None, **opts):
+    """Benchmark operations against a revlog index.
+
+    This tests constructing a revlog instance, reading index data,
+    parsing index data, and performing various operations related to
+    index data.
+    """
+
+    rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
+
+    opener = getattr(rl, 'opener')  # trick linter
+    indexfile = rl.indexfile
+    data = opener.read(indexfile)
+
+    header = struct.unpack('>I', data[0:4])[0]
+    version = header & 0xFFFF
+    if version == 1:
+        revlogio = revlog.revlogio()
+        inline = header & (1 << 16)
+    else:
+        raise error.Abort(('unsupported revlog version: %d') % version)
+
+    rllen = len(rl)
+
+    node0 = rl.node(0)
+    node25 = rl.node(rllen // 4)
+    node50 = rl.node(rllen // 2)
+    node75 = rl.node(rllen // 4 * 3)
+    node100 = rl.node(rllen - 1)
+
+    allrevs = range(rllen)
+    allrevsrev = list(reversed(allrevs))
+    allnodes = [rl.node(rev) for rev in range(rllen)]
+    allnodesrev = list(reversed(allnodes))
+
+    def constructor():
+        revlog.revlog(opener, indexfile)
+
+    def read():
+        with opener(indexfile) as fh:
+            fh.read()
+
+    def parseindex():
+        revlogio.parseindex(data, inline)
+
+    def getentry(revornode):
+        index = revlogio.parseindex(data, inline)[0]
+        index[revornode]
+
+    def getentries(revs, count=1):
+        index = revlogio.parseindex(data, inline)[0]
+
+        for i in range(count):
+            for rev in revs:
+                index[rev]
+
+    def resolvenode(node):
+        nodemap = revlogio.parseindex(data, inline)[1]
+        # This only works for the C code.
+        if nodemap is None:
+            return
+
+        try:
+            nodemap[node]
+        except error.RevlogError:
+            pass
+
+    def resolvenodes(nodes, count=1):
+        nodemap = revlogio.parseindex(data, inline)[1]
+        if nodemap is None:
+            return
+
+        for i in range(count):
+            for node in nodes:
+                try:
+                    nodemap[node]
+                except error.RevlogError:
+                    pass
+
+    benches = [
+        (constructor, 'revlog constructor'),
+        (read, 'read'),
+        (parseindex, 'create index object'),
+        (lambda: getentry(0), 'retrieve index entry for rev 0'),
+        (lambda: resolvenode('a' * 20), 'look up missing node'),
+        (lambda: resolvenode(node0), 'look up node at rev 0'),
+        (lambda: resolvenode(node25), 'look up node at 1/4 len'),
+        (lambda: resolvenode(node50), 'look up node at 1/2 len'),
+        (lambda: resolvenode(node75), 'look up node at 3/4 len'),
+        (lambda: resolvenode(node100), 'look up node at tip'),
+        # 2x variation is to measure caching impact.
+        (lambda: resolvenodes(allnodes),
+         'look up all nodes (forward)'),
+        (lambda: resolvenodes(allnodes, 2),
+         'look up all nodes 2x (forward)'),
+        (lambda: resolvenodes(allnodesrev),
+         'look up all nodes (reverse)'),
+        (lambda: resolvenodes(allnodesrev, 2),
+         'look up all nodes 2x (reverse)'),
+        (lambda: getentries(allrevs),
+         'retrieve all index entries (forward)'),
+        (lambda: getentries(allrevs, 2),
+         'retrieve all index entries 2x (forward)'),
+        (lambda: getentries(allrevsrev),
+         'retrieve all index entries (reverse)'),
+        (lambda: getentries(allrevsrev, 2),
+         'retrieve all index entries 2x (reverse)'),
+    ]
+
+    for fn, title in benches:
+        timer, fm = gettimer(ui, opts)
+        timer(fn, title=title)
+        fm.end()
+
 @command('perfrevlogrevisions', revlogopts + formatteropts +
          [('d', 'dist', 100, 'distance between the revisions'),
           ('s', 'startrev', 0, 'revision to start reading at'),
--- a/tests/test-contrib-perf.t	Sun May 28 10:56:28 2017 -0700
+++ b/tests/test-contrib-perf.t	Sun May 28 11:13:10 2017 -0700
@@ -97,6 +97,8 @@
    perfrawfiles  (no help text available)
    perfrevlogchunks
                  Benchmark operations on revlog chunks.
+   perfrevlogindex
+                 Benchmark operations against a revlog index.
    perfrevlogrevision
                  Benchmark obtaining a revlog revision.
    perfrevlogrevisions
@@ -147,6 +149,7 @@
   $ hg perfnodelookup 2
   $ hg perfpathcopies 1 2
   $ hg perfrawfiles 2
+  $ hg perfrevlogindex -c
   $ hg perfrevlogrevisions .hg/store/data/a.i
   $ hg perfrevlogrevision -m 0
   $ hg perfrevlogchunks -c