contrib/revsetbenchmarks.py
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Wed, 23 Jul 2014 00:10:24 +0900
branchstable
changeset 21934 0cb34b3991f8
parent 21549 ea3d75ebea6d
child 22059 d5cef58d8ec8
permissions -rwxr-xr-x
largefiles: use "normallookup" on "lfdirstate" while reverting Before this patch, largefiles gotten from revisions other than the parent of the working directory at "hg revert" become "clean" unexpectedly in steps below: 1. "repo.status()" is invoked (for status check before reverting) 1-1 "dirstate" entry for standinfile SF is "normal"-ed (1-2 "lfdirstate" entry of largefile LF (for SF) is "normal"-ed) 2. "cmdutil.revert()" is invoked 2-1 standinfile SF is updated in the working directory 2-2 "dirstate" entry for SF is NOT updated 3. "lfcommands.updatelfiles()" is invoked (by "overrides.overriderevert()") 3-1 largefile LF (for SF) is updated in the working directory 3-2 "dirstate" returns "n" and valid timestamp for SF (by 1-1 and 2-2) 3-3 "lfdirstate" entry for LF is "normal"-ed 3-4 "lfdirstate" is written into ".hg/largefiles/dirstate", and timestamp of LF is stored into "lfdirstate" file (by 3-3) (ASSUMPTION: timestamp of LF differs from one of "lfdirstate" file) Then, "hs status" treats LF as "clean", even though LF is updated by "other" revision (by 3-1), because "lfilesrepo.status()" always treats "normal"-ed files (by 3-3 and 3-4) as "clean". When largefiles are reverted, they should be "normallookup"-ed forcibly. This patch uses "normallookup" on "lfdirstate" while reverting, by passing "True" to newly added argument "normallookup". Forcible "normallookup"-ing is not so expensive, because list of target largefiles is explicitly specified in this case. This patch uses "[debug] dirstate.delaywrite" feature in the test, to ensure that timestamp of the largefile gotten from "other" revision is stored into ".hg/largefiles/dirstate" (for ASSUMPTION at 3-4)

#!/usr/bin/env python

# Measure the performance of a list of revsets against multiple revisions
# defined by parameter. Checkout one by one and run perfrevset with every
# revset in the list to benchmark its performance.
#
# - First argument is a revset of mercurial own repo to runs against.
# - Second argument is the file from which the revset array will be taken
#   If second argument is omitted read it from standard input
#
# You should run this from the root of your mercurial repository.
#
# This script also does one run of the current version of mercurial installed
# to compare performance.

import sys
import os
from subprocess import check_call, Popen, CalledProcessError, STDOUT, PIPE
# cannot use argparse, python 2.7 only
from optparse import OptionParser



def check_output(*args, **kwargs):
    kwargs.setdefault('stderr', PIPE)
    kwargs.setdefault('stdout', PIPE)
    proc = Popen(*args, **kwargs)
    output, error = proc.communicate()
    if proc.returncode != 0:
        raise CalledProcessError(proc.returncode, ' '.join(args[0]))
    return output

def update(rev):
    """update the repo to a revision"""
    try:
        check_call(['hg', 'update', '--quiet', '--check', str(rev)])
    except CalledProcessError, exc:
        print >> sys.stderr, 'update to revision %s failed, aborting' % rev
        sys.exit(exc.returncode)

def perf(revset, target=None):
    """run benchmark for this very revset"""
    try:
        cmd = ['./hg',
               '--config',
               'extensions.perf='
               + os.path.join(contribdir, 'perf.py'),
               'perfrevset',
               revset]
        if target is not None:
            cmd.append('-R')
            cmd.append(target)
        output = check_output(cmd, stderr=STDOUT)
        output = output.lstrip('!') # remove useless ! in this context
        return output.strip()
    except CalledProcessError, exc:
        print >> sys.stderr, 'abort: cannot run revset benchmark'
        sys.exit(exc.returncode)

def printrevision(rev):
    """print data about a revision"""
    sys.stdout.write("Revision: ")
    sys.stdout.flush()
    check_call(['hg', 'log', '--rev', str(rev), '--template',
               '{desc|firstline}\n'])

def getrevs(spec):
    """get the list of rev matched by a revset"""
    try:
        out = check_output(['hg', 'log', '--template={rev}\n', '--rev', spec])
    except CalledProcessError, exc:
        print >> sys.stderr, "abort, can't get revision from %s" % spec
        sys.exit(exc.returncode)
    return [r for r in out.split() if r]


parser = OptionParser(usage="usage: %prog [options] <revs>")
parser.add_option("-f", "--file",
                  help="read revset from FILE", metavar="FILE")
parser.add_option("-R", "--repo",
                  help="run benchmark on REPO", metavar="REPO")

(options, args) = parser.parse_args()

if len(sys.argv) < 2:
    parser.print_help()
    sys.exit(255)

# the directory where both this script and the perf.py extension live.
contribdir = os.path.dirname(__file__)

target_rev = args[0]

revsetsfile = sys.stdin
if options.file:
    revsetsfile = open(options.file)

revsets = [l.strip() for l in revsetsfile]

print "Revsets to benchmark"
print "----------------------------"

for idx, rset in enumerate(revsets):
    print "%i) %s" % (idx, rset)

print "----------------------------"
print


revs = getrevs(target_rev)

results = []
for r in revs:
    print "----------------------------"
    printrevision(r)
    print "----------------------------"
    update(r)
    res = []
    results.append(res)
    for idx, rset in enumerate(revsets):
        data = perf(rset, target=options.repo)
        res.append(data)
        print "%i)" % idx, data
        sys.stdout.flush()
    print "----------------------------"


print """

Result by revset
================
"""

print 'Revision:', revs
for idx, rev in enumerate(revs):
    sys.stdout.write('%i) ' % idx)
    sys.stdout.flush()
    printrevision(rev)

print
print

for ridx, rset in enumerate(revsets):

    print "revset #%i: %s" % (ridx, rset)
    for idx, data in enumerate(results):
        print '%i) %s' % (idx, data[ridx])
    print