perf: add command to benchmark bundle reading
authorGregory Szorc <gregory.szorc@gmail.com>
Mon, 13 Nov 2017 19:20:34 -0800
changeset 35132 e96613048bdd
parent 35131 8b1c887d52e7
child 35133 073eec083e25
perf: add command to benchmark bundle reading Upcoming commits will be refactoring bundle2 I/O code. This commit establishes a `hg perfbundleread` command that measures how long it takes to read a bundle using various mechanisms. As a baseline, here's output from an uncompressed bundle1 bundle of my Firefox repo (7,098,622,890 bytes): ! read(8k) ! wall 0.763481 comb 0.760000 user 0.160000 sys 0.600000 (best of 6) ! read(16k) ! wall 0.644512 comb 0.640000 user 0.110000 sys 0.530000 (best of 16) ! read(32k) ! wall 0.581172 comb 0.590000 user 0.060000 sys 0.530000 (best of 18) ! read(128k) ! wall 0.535183 comb 0.530000 user 0.010000 sys 0.520000 (best of 19) ! cg1 deltaiter() ! wall 0.873500 comb 0.880000 user 0.840000 sys 0.040000 (best of 12) ! cg1 getchunks() ! wall 6.283797 comb 6.270000 user 5.570000 sys 0.700000 (best of 3) ! cg1 read(8k) ! wall 1.097173 comb 1.100000 user 0.400000 sys 0.700000 (best of 10) ! cg1 read(16k) ! wall 0.810750 comb 0.800000 user 0.200000 sys 0.600000 (best of 13) ! cg1 read(32k) ! wall 0.671215 comb 0.670000 user 0.110000 sys 0.560000 (best of 15) ! cg1 read(128k) ! wall 0.597857 comb 0.600000 user 0.020000 sys 0.580000 (best of 15) And from an uncompressed bundle2 bundle (6,070,036,163 bytes): ! read(8k) ! wall 0.676997 comb 0.680000 user 0.160000 sys 0.520000 (best of 15) ! read(16k) ! wall 0.592706 comb 0.590000 user 0.080000 sys 0.510000 (best of 17) ! read(32k) ! wall 0.529395 comb 0.530000 user 0.050000 sys 0.480000 (best of 16) ! read(128k) ! wall 0.491270 comb 0.490000 user 0.010000 sys 0.480000 (best of 19) ! bundle2 forwardchunks() ! wall 2.997131 comb 2.990000 user 2.270000 sys 0.720000 (best of 4) ! bundle2 iterparts() ! wall 12.247197 comb 10.670000 user 8.170000 sys 2.500000 (best of 3) ! bundle2 part seek() ! wall 11.761675 comb 10.500000 user 8.240000 sys 2.260000 (best of 3) ! bundle2 part read(8k) ! wall 9.116163 comb 9.110000 user 8.240000 sys 0.870000 (best of 3) ! bundle2 part read(16k) ! wall 8.984362 comb 8.970000 user 8.110000 sys 0.860000 (best of 3) ! bundle2 part read(32k) ! wall 8.758364 comb 8.740000 user 7.860000 sys 0.880000 (best of 3) ! bundle2 part read(128k) ! wall 8.749040 comb 8.730000 user 7.830000 sys 0.900000 (best of 3) We already see some interesting data. Notably that bundle2 has significant overhead compared to bundle1. This matters for e.g. stream clone bundles, which can be applied at >1Gbps. Differential Revision: https://phab.mercurial-scm.org/D1385
contrib/perf.py
tests/test-contrib-perf.t
--- a/contrib/perf.py	Mon Nov 20 01:40:26 2017 -0800
+++ b/contrib/perf.py	Mon Nov 13 19:20:34 2017 -0800
@@ -488,6 +488,117 @@
     timer(d)
     fm.end()
 
+@command('perfbundleread', formatteropts, 'BUNDLE')
+def perfbundleread(ui, repo, bundlepath, **opts):
+    """Benchmark reading of bundle files.
+
+    This command is meant to isolate the I/O part of bundle reading as
+    much as possible.
+    """
+    from mercurial import (
+        bundle2,
+        exchange,
+        streamclone,
+    )
+
+    def makebench(fn):
+        def run():
+            with open(bundlepath, 'rb') as fh:
+                bundle = exchange.readbundle(ui, fh, bundlepath)
+                fn(bundle)
+
+        return run
+
+    def makereadnbytes(size):
+        def run():
+            with open(bundlepath, 'rb') as fh:
+                bundle = exchange.readbundle(ui, fh, bundlepath)
+                while bundle.read(size):
+                    pass
+
+        return run
+
+    def makestdioread(size):
+        def run():
+            with open(bundlepath, 'rb') as fh:
+                while fh.read(size):
+                    pass
+
+        return run
+
+    # bundle1
+
+    def deltaiter(bundle):
+        for delta in bundle.deltaiter():
+            pass
+
+    def iterchunks(bundle):
+        for chunk in bundle.getchunks():
+            pass
+
+    # bundle2
+
+    def forwardchunks(bundle):
+        for chunk in bundle._forwardchunks():
+            pass
+
+    def iterparts(bundle):
+        for part in bundle.iterparts():
+            pass
+
+    def seek(bundle):
+        for part in bundle.iterparts():
+            part.seek(0, os.SEEK_END)
+
+    def makepartreadnbytes(size):
+        def run():
+            with open(bundlepath, 'rb') as fh:
+                bundle = exchange.readbundle(ui, fh, bundlepath)
+                for part in bundle.iterparts():
+                    while part.read(size):
+                        pass
+
+        return run
+
+    benches = [
+        (makestdioread(8192), 'read(8k)'),
+        (makestdioread(16384), 'read(16k)'),
+        (makestdioread(32768), 'read(32k)'),
+        (makestdioread(131072), 'read(128k)'),
+    ]
+
+    with open(bundlepath, 'rb') as fh:
+        bundle = exchange.readbundle(ui, fh, bundlepath)
+
+        if isinstance(bundle, changegroup.cg1unpacker):
+            benches.extend([
+                (makebench(deltaiter), 'cg1 deltaiter()'),
+                (makebench(iterchunks), 'cg1 getchunks()'),
+                (makereadnbytes(8192), 'cg1 read(8k)'),
+                (makereadnbytes(16384), 'cg1 read(16k)'),
+                (makereadnbytes(32768), 'cg1 read(32k)'),
+                (makereadnbytes(131072), 'cg1 read(128k)'),
+            ])
+        elif isinstance(bundle, bundle2.unbundle20):
+            benches.extend([
+                (makebench(forwardchunks), 'bundle2 forwardchunks()'),
+                (makebench(iterparts), 'bundle2 iterparts()'),
+                (makebench(seek), 'bundle2 part seek()'),
+                (makepartreadnbytes(8192), 'bundle2 part read(8k)'),
+                (makepartreadnbytes(16384), 'bundle2 part read(16k)'),
+                (makepartreadnbytes(32768), 'bundle2 part read(32k)'),
+                (makepartreadnbytes(131072), 'bundle2 part read(128k)'),
+            ])
+        elif isinstance(bundle, streamclone.streamcloneapplier):
+            raise error.Abort('stream clone bundles not supported')
+        else:
+            raise error.Abort('unhandled bundle type: %s' % type(bundle))
+
+    for fn, title in benches:
+        timer, fm = gettimer(ui, opts)
+        timer(fn, title=title)
+        fm.end()
+
 @command('perfchangegroupchangelog', formatteropts +
          [('', 'version', '02', 'changegroup version'),
           ('r', 'rev', '', 'revisions to add to changegroup')])
--- a/tests/test-contrib-perf.t	Mon Nov 20 01:40:26 2017 -0800
+++ b/tests/test-contrib-perf.t	Mon Nov 13 19:20:34 2017 -0800
@@ -55,6 +55,8 @@
                  benchmark parsing bookmarks from disk to memory
    perfbranchmap
                  benchmark the update of a branchmap
+   perfbundleread
+                 Benchmark reading of bundle files.
    perfcca       (no help text available)
    perfchangegroupchangelog
                  Benchmark producing a changelog group for a changegroup.
@@ -173,3 +175,7 @@
   $ (testrepohg files -r 1.2 glob:mercurial/*.c glob:mercurial/*.py;
   >  testrepohg files -r tip glob:mercurial/*.c glob:mercurial/*.py) |
   > "$TESTDIR"/check-perf-code.py contrib/perf.py
+  contrib/perf.py:498:
+   >     from mercurial import (
+   import newer module separately in try clause for early Mercurial
+  [1]