perf: add threading capability to perfbdiff
Since we are releasing the GIL during diffing, it is interesting to see how a
thread pool would perform on diffing. We add a new `--threads` argument to
commands. Synchronizing the thread pool is a bit complex because we want to be
able to reuse it from one run to another.
On my computer (i7 with 4 cores + hyperthreading), I get the following data for
about 12000 revisions:
threads wall comb wall gain comb overhead
none 31.596715 31.59 0.00% 0.00%
1 31.621228 31.62 -0.08% 0.09%
2 16.406202 32.8 48.08% 3.83%
3 11.598334 34.76 63.29% 10.03%
4 9.205421 36.77 70.87% 16.40%
5 8.517604 42.51 73.04% 34.57%
6 7.94645 47.58 74.85% 50.62%
7 7.434972 51.92 76.47% 64.36%
8 7.070638 55.34 77.62% 75.18%
Compared to the feature disabled (threads=0), the overhead is negligible with
the threading code (threads=1), and the gain is already 48% with two threads.
--- a/contrib/perf.py Wed Jan 10 17:24:25 2018 +0100
+++ b/contrib/perf.py Sun Dec 17 04:31:27 2017 +0100
@@ -25,7 +25,9 @@
import random
import struct
import sys
+import threading
import time
+import util.queue
from mercurial import (
changegroup,
cmdutil,
@@ -933,11 +935,25 @@
timer(d)
fm.end()
+def _bdiffworker(q, ready, done):
+ while not done.is_set():
+ pair = q.get()
+ while pair is not None:
+ mdiff.textdiff(*pair)
+ q.task_done()
+ pair = q.get()
+ q.task_done() # for the None one
+ with ready:
+ ready.wait()
+
@command('perfbdiff', revlogopts + formatteropts + [
('', 'count', 1, 'number of revisions to test (when using --startrev)'),
- ('', 'alldata', False, 'test bdiffs for all associated revisions')],
+ ('', 'alldata', False, 'test bdiffs for all associated revisions'),
+ ('', 'threads', 0, 'number of thread to use (disable with 0)'),
+ ],
+
'-c|-m|FILE REV')
-def perfbdiff(ui, repo, file_, rev=None, count=None, **opts):
+def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
"""benchmark a bdiff between revisions
By default, benchmark a bdiff between its delta parent and itself.
@@ -983,14 +999,39 @@
dp = r.deltaparent(rev)
textpairs.append((r.revision(dp), r.revision(rev)))
- def d():
- for pair in textpairs:
- mdiff.textdiff(*pair)
-
+ withthreads = threads > 0
+ if not withthreads:
+ def d():
+ for pair in textpairs:
+ mdiff.textdiff(*pair)
+ else:
+ q = util.queue()
+ for i in xrange(threads):
+ q.put(None)
+ ready = threading.Condition()
+ done = threading.Event()
+ for i in xrange(threads):
+ threading.Thread(target=_bdiffworker, args=(q, ready, done)).start()
+ q.join()
+ def d():
+ for pair in textpairs:
+ q.put(pair)
+ for i in xrange(threads):
+ q.put(None)
+ with ready:
+ ready.notify_all()
+ q.join()
timer, fm = gettimer(ui, opts)
timer(d)
fm.end()
+ if withthreads:
+ done.set()
+ for i in xrange(threads):
+ q.put(None)
+ with ready:
+ ready.notify_all()
+
@command('perfdiffwd', formatteropts)
def perfdiffwd(ui, repo, **opts):
"""Profile diff of working directory changes"""
--- a/tests/test-contrib-perf.t Wed Jan 10 17:24:25 2018 +0100
+++ b/tests/test-contrib-perf.t Sun Dec 17 04:31:27 2017 +0100
@@ -175,7 +175,7 @@
$ (testrepohg files -r 1.2 glob:mercurial/*.c glob:mercurial/*.py;
> testrepohg files -r tip glob:mercurial/*.c glob:mercurial/*.py) |
> "$TESTDIR"/check-perf-code.py contrib/perf.py
- contrib/perf.py:498:
+ contrib/perf.py:\d+: (re)
> from mercurial import (
import newer module separately in try clause for early Mercurial
[1]