Mercurial > hg
comparison contrib/perf.py @ 40550:164b2e77f9a5
perf: introduce a perfrevlogwrite command
The command record times taken by adding many revisions to a revlog. Timing
each addition, individually. The "added revision" are recreations of the
original ones.
To time each addition individually, we have to handle the timing and the
reporting ourselves.
This command is introduced to track the impact of sparse-revlog format on
delta computations at initial storage time. It starts with the full text, a
situation similar to the "commit". Additions from an existing delta are better
timed with bundles.
The complaints from `check-perf-code.py` are not relevant. We are accessing
and "revlog" opener, not a repository opener.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Wed, 03 Oct 2018 10:53:29 +0200 |
parents | cbd251d479bb |
children | 4dd7edeb3da9 |
comparison
equal
deleted
inserted
replaced
40549:d6ec45b79277 | 40550:164b2e77f9a5 |
---|---|
22 import contextlib | 22 import contextlib |
23 import functools | 23 import functools |
24 import gc | 24 import gc |
25 import os | 25 import os |
26 import random | 26 import random |
27 import shutil | |
27 import struct | 28 import struct |
28 import sys | 29 import sys |
30 import tempfile | |
29 import threading | 31 import threading |
30 import time | 32 import time |
31 from mercurial import ( | 33 from mercurial import ( |
32 changegroup, | 34 changegroup, |
33 cmdutil, | 35 cmdutil, |
1563 | 1565 |
1564 timer, fm = gettimer(ui, opts) | 1566 timer, fm = gettimer(ui, opts) |
1565 timer(d) | 1567 timer(d) |
1566 fm.end() | 1568 fm.end() |
1567 | 1569 |
1570 @command(b'perfrevlogwrite', revlogopts + formatteropts + | |
1571 [(b's', b'startrev', 1000, b'revision to start writing at'), | |
1572 (b'', b'stoprev', -1, b'last revision to write'), | |
1573 (b'', b'count', 3, b'last revision to write'), | |
1574 ], | |
1575 b'-c|-m|FILE') | |
1576 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts): | |
1577 """Benchmark writing a series of revisions to a revlog. | |
1578 """ | |
1579 opts = _byteskwargs(opts) | |
1580 | |
1581 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts) | |
1582 rllen = getlen(ui)(rl) | |
1583 if startrev < 0: | |
1584 startrev = rllen + startrev | |
1585 if stoprev < 0: | |
1586 stoprev = rllen + stoprev | |
1587 | |
1588 ### actually gather results | |
1589 count = opts['count'] | |
1590 if count <= 0: | |
1591 raise error.Abort('invalide run count: %d' % count) | |
1592 allresults = [] | |
1593 for c in range(count): | |
1594 allresults.append(_timeonewrite(ui, rl, startrev, stoprev, c + 1)) | |
1595 | |
1596 ### consolidate the results in a single list | |
1597 results = [] | |
1598 for idx, (rev, t) in enumerate(allresults[0]): | |
1599 ts = [t] | |
1600 for other in allresults[1:]: | |
1601 orev, ot = other[idx] | |
1602 assert orev == rev | |
1603 ts.append(ot) | |
1604 results.append((rev, ts)) | |
1605 resultcount = len(results) | |
1606 | |
1607 ### Compute and display relevant statistics | |
1608 | |
1609 # get a formatter | |
1610 fm = ui.formatter(b'perf', opts) | |
1611 displayall = ui.configbool(b"perf", b"all-timing", False) | |
1612 | |
1613 # sorts results by median time | |
1614 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2]) | |
1615 # list of (name, index) to display) | |
1616 relevants = [ | |
1617 ("min", 0), | |
1618 ("10%", resultcount * 10 // 100), | |
1619 ("25%", resultcount * 25 // 100), | |
1620 ("50%", resultcount * 70 // 100), | |
1621 ("75%", resultcount * 75 // 100), | |
1622 ("90%", resultcount * 90 // 100), | |
1623 ("95%", resultcount * 95 // 100), | |
1624 ("99%", resultcount * 99 // 100), | |
1625 ("max", -1), | |
1626 ] | |
1627 for name, idx in relevants: | |
1628 data = results[idx] | |
1629 title = '%s of %d, rev %d' % (name, resultcount, data[0]) | |
1630 formatone(fm, data[1], title=title, displayall=displayall) | |
1631 | |
1632 # XXX summing that many float will not be very precise, we ignore this fact | |
1633 # for now | |
1634 totaltime = [] | |
1635 for item in allresults: | |
1636 totaltime.append((sum(x[1][0] for x in item), | |
1637 sum(x[1][1] for x in item), | |
1638 sum(x[1][2] for x in item),) | |
1639 ) | |
1640 formatone(fm, totaltime, title="total time (%d revs)" % resultcount, | |
1641 displayall=displayall) | |
1642 fm.end() | |
1643 | |
1644 class _faketr(object): | |
1645 def add(s, x, y, z=None): | |
1646 return None | |
1647 | |
1648 def _timeonewrite(ui, orig, startrev, stoprev, runidx=None): | |
1649 timings = [] | |
1650 tr = _faketr() | |
1651 with _temprevlog(ui, orig, startrev) as dest: | |
1652 revs = list(orig.revs(startrev, stoprev)) | |
1653 total = len(revs) | |
1654 topic = 'adding' | |
1655 if runidx is not None: | |
1656 topic += ' (run #%d)' % runidx | |
1657 for idx, rev in enumerate(revs): | |
1658 ui.progress(topic, idx, unit='revs', total=total) | |
1659 addargs, addkwargs = _getrevisionseed(orig, rev, tr) | |
1660 with timeone() as r: | |
1661 dest.addrawrevision(*addargs, **addkwargs) | |
1662 timings.append((rev, r[0])) | |
1663 ui.progress(topic, total, unit='revs', total=total) | |
1664 ui.progress(topic, None, unit='revs', total=total) | |
1665 return timings | |
1666 | |
1667 def _getrevisionseed(orig, rev, tr): | |
1668 linkrev = orig.linkrev(rev) | |
1669 node = orig.node(rev) | |
1670 p1, p2 = orig.parents(node) | |
1671 flags = orig.flags(rev) | |
1672 cachedelta = None | |
1673 text = orig.revision(rev) | |
1674 | |
1675 return ((text, tr, linkrev, p1, p2), | |
1676 {'node': node, 'flags': flags, 'cachedelta': cachedelta}) | |
1677 | |
1678 @contextlib.contextmanager | |
1679 def _temprevlog(ui, orig, truncaterev): | |
1680 from mercurial import vfs as vfsmod | |
1681 | |
1682 if orig._inline: | |
1683 raise error.Abort('not supporting inline revlog (yet)') | |
1684 | |
1685 origindexpath = orig.opener.join(orig.indexfile) | |
1686 origdatapath = orig.opener.join(orig.datafile) | |
1687 indexname = 'revlog.i' | |
1688 dataname = 'revlog.d' | |
1689 | |
1690 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-') | |
1691 try: | |
1692 # copy the data file in a temporary directory | |
1693 ui.debug('copying data in %s\n' % tmpdir) | |
1694 destindexpath = os.path.join(tmpdir, 'revlog.i') | |
1695 destdatapath = os.path.join(tmpdir, 'revlog.d') | |
1696 shutil.copyfile(origindexpath, destindexpath) | |
1697 shutil.copyfile(origdatapath, destdatapath) | |
1698 | |
1699 # remove the data we want to add again | |
1700 ui.debug('truncating data to be rewritten\n') | |
1701 with open(destindexpath, 'ab') as index: | |
1702 index.seek(0) | |
1703 index.truncate(truncaterev * orig._io.size) | |
1704 with open(destdatapath, 'ab') as data: | |
1705 data.seek(0) | |
1706 data.truncate(orig.start(truncaterev)) | |
1707 | |
1708 # instantiate a new revlog from the temporary copy | |
1709 ui.debug('truncating adding to be rewritten\n') | |
1710 vfs = vfsmod.vfs(tmpdir) | |
1711 vfs.options = getattr(orig.opener, 'options', None) | |
1712 | |
1713 dest = revlog.revlog(vfs, | |
1714 indexfile=indexname, | |
1715 datafile=dataname) | |
1716 if dest._inline: | |
1717 raise error.Abort('not supporting inline revlog (yet)') | |
1718 # make sure internals are initialized | |
1719 dest.revision(len(dest) - 1) | |
1720 yield dest | |
1721 del dest, vfs | |
1722 finally: | |
1723 shutil.rmtree(tmpdir, True) | |
1724 | |
1568 @command(b'perfrevlogchunks', revlogopts + formatteropts + | 1725 @command(b'perfrevlogchunks', revlogopts + formatteropts + |
1569 [(b'e', b'engines', b'', b'compression engines to use'), | 1726 [(b'e', b'engines', b'', b'compression engines to use'), |
1570 (b's', b'startrev', 0, b'revision to start at')], | 1727 (b's', b'startrev', 0, b'revision to start at')], |
1571 b'-c|-m|FILE') | 1728 b'-c|-m|FILE') |
1572 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts): | 1729 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts): |