comparison contrib/perf.py @ 40550:164b2e77f9a5

perf: introduce a perfrevlogwrite command The command record times taken by adding many revisions to a revlog. Timing each addition, individually. The "added revision" are recreations of the original ones. To time each addition individually, we have to handle the timing and the reporting ourselves. This command is introduced to track the impact of sparse-revlog format on delta computations at initial storage time. It starts with the full text, a situation similar to the "commit". Additions from an existing delta are better timed with bundles. The complaints from `check-perf-code.py` are not relevant. We are accessing and "revlog" opener, not a repository opener.
author Boris Feld <boris.feld@octobus.net>
date Wed, 03 Oct 2018 10:53:29 +0200
parents cbd251d479bb
children 4dd7edeb3da9
comparison
equal deleted inserted replaced
40549:d6ec45b79277 40550:164b2e77f9a5
22 import contextlib 22 import contextlib
23 import functools 23 import functools
24 import gc 24 import gc
25 import os 25 import os
26 import random 26 import random
27 import shutil
27 import struct 28 import struct
28 import sys 29 import sys
30 import tempfile
29 import threading 31 import threading
30 import time 32 import time
31 from mercurial import ( 33 from mercurial import (
32 changegroup, 34 changegroup,
33 cmdutil, 35 cmdutil,
1563 1565
1564 timer, fm = gettimer(ui, opts) 1566 timer, fm = gettimer(ui, opts)
1565 timer(d) 1567 timer(d)
1566 fm.end() 1568 fm.end()
1567 1569
1570 @command(b'perfrevlogwrite', revlogopts + formatteropts +
1571 [(b's', b'startrev', 1000, b'revision to start writing at'),
1572 (b'', b'stoprev', -1, b'last revision to write'),
1573 (b'', b'count', 3, b'last revision to write'),
1574 ],
1575 b'-c|-m|FILE')
1576 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
1577 """Benchmark writing a series of revisions to a revlog.
1578 """
1579 opts = _byteskwargs(opts)
1580
1581 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
1582 rllen = getlen(ui)(rl)
1583 if startrev < 0:
1584 startrev = rllen + startrev
1585 if stoprev < 0:
1586 stoprev = rllen + stoprev
1587
1588 ### actually gather results
1589 count = opts['count']
1590 if count <= 0:
1591 raise error.Abort('invalide run count: %d' % count)
1592 allresults = []
1593 for c in range(count):
1594 allresults.append(_timeonewrite(ui, rl, startrev, stoprev, c + 1))
1595
1596 ### consolidate the results in a single list
1597 results = []
1598 for idx, (rev, t) in enumerate(allresults[0]):
1599 ts = [t]
1600 for other in allresults[1:]:
1601 orev, ot = other[idx]
1602 assert orev == rev
1603 ts.append(ot)
1604 results.append((rev, ts))
1605 resultcount = len(results)
1606
1607 ### Compute and display relevant statistics
1608
1609 # get a formatter
1610 fm = ui.formatter(b'perf', opts)
1611 displayall = ui.configbool(b"perf", b"all-timing", False)
1612
1613 # sorts results by median time
1614 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
1615 # list of (name, index) to display)
1616 relevants = [
1617 ("min", 0),
1618 ("10%", resultcount * 10 // 100),
1619 ("25%", resultcount * 25 // 100),
1620 ("50%", resultcount * 70 // 100),
1621 ("75%", resultcount * 75 // 100),
1622 ("90%", resultcount * 90 // 100),
1623 ("95%", resultcount * 95 // 100),
1624 ("99%", resultcount * 99 // 100),
1625 ("max", -1),
1626 ]
1627 for name, idx in relevants:
1628 data = results[idx]
1629 title = '%s of %d, rev %d' % (name, resultcount, data[0])
1630 formatone(fm, data[1], title=title, displayall=displayall)
1631
1632 # XXX summing that many float will not be very precise, we ignore this fact
1633 # for now
1634 totaltime = []
1635 for item in allresults:
1636 totaltime.append((sum(x[1][0] for x in item),
1637 sum(x[1][1] for x in item),
1638 sum(x[1][2] for x in item),)
1639 )
1640 formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
1641 displayall=displayall)
1642 fm.end()
1643
1644 class _faketr(object):
1645 def add(s, x, y, z=None):
1646 return None
1647
1648 def _timeonewrite(ui, orig, startrev, stoprev, runidx=None):
1649 timings = []
1650 tr = _faketr()
1651 with _temprevlog(ui, orig, startrev) as dest:
1652 revs = list(orig.revs(startrev, stoprev))
1653 total = len(revs)
1654 topic = 'adding'
1655 if runidx is not None:
1656 topic += ' (run #%d)' % runidx
1657 for idx, rev in enumerate(revs):
1658 ui.progress(topic, idx, unit='revs', total=total)
1659 addargs, addkwargs = _getrevisionseed(orig, rev, tr)
1660 with timeone() as r:
1661 dest.addrawrevision(*addargs, **addkwargs)
1662 timings.append((rev, r[0]))
1663 ui.progress(topic, total, unit='revs', total=total)
1664 ui.progress(topic, None, unit='revs', total=total)
1665 return timings
1666
1667 def _getrevisionseed(orig, rev, tr):
1668 linkrev = orig.linkrev(rev)
1669 node = orig.node(rev)
1670 p1, p2 = orig.parents(node)
1671 flags = orig.flags(rev)
1672 cachedelta = None
1673 text = orig.revision(rev)
1674
1675 return ((text, tr, linkrev, p1, p2),
1676 {'node': node, 'flags': flags, 'cachedelta': cachedelta})
1677
1678 @contextlib.contextmanager
1679 def _temprevlog(ui, orig, truncaterev):
1680 from mercurial import vfs as vfsmod
1681
1682 if orig._inline:
1683 raise error.Abort('not supporting inline revlog (yet)')
1684
1685 origindexpath = orig.opener.join(orig.indexfile)
1686 origdatapath = orig.opener.join(orig.datafile)
1687 indexname = 'revlog.i'
1688 dataname = 'revlog.d'
1689
1690 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
1691 try:
1692 # copy the data file in a temporary directory
1693 ui.debug('copying data in %s\n' % tmpdir)
1694 destindexpath = os.path.join(tmpdir, 'revlog.i')
1695 destdatapath = os.path.join(tmpdir, 'revlog.d')
1696 shutil.copyfile(origindexpath, destindexpath)
1697 shutil.copyfile(origdatapath, destdatapath)
1698
1699 # remove the data we want to add again
1700 ui.debug('truncating data to be rewritten\n')
1701 with open(destindexpath, 'ab') as index:
1702 index.seek(0)
1703 index.truncate(truncaterev * orig._io.size)
1704 with open(destdatapath, 'ab') as data:
1705 data.seek(0)
1706 data.truncate(orig.start(truncaterev))
1707
1708 # instantiate a new revlog from the temporary copy
1709 ui.debug('truncating adding to be rewritten\n')
1710 vfs = vfsmod.vfs(tmpdir)
1711 vfs.options = getattr(orig.opener, 'options', None)
1712
1713 dest = revlog.revlog(vfs,
1714 indexfile=indexname,
1715 datafile=dataname)
1716 if dest._inline:
1717 raise error.Abort('not supporting inline revlog (yet)')
1718 # make sure internals are initialized
1719 dest.revision(len(dest) - 1)
1720 yield dest
1721 del dest, vfs
1722 finally:
1723 shutil.rmtree(tmpdir, True)
1724
1568 @command(b'perfrevlogchunks', revlogopts + formatteropts + 1725 @command(b'perfrevlogchunks', revlogopts + formatteropts +
1569 [(b'e', b'engines', b'', b'compression engines to use'), 1726 [(b'e', b'engines', b'', b'compression engines to use'),
1570 (b's', b'startrev', 0, b'revision to start at')], 1727 (b's', b'startrev', 0, b'revision to start at')],
1571 b'-c|-m|FILE') 1728 b'-c|-m|FILE')
1572 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts): 1729 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):