view tests/test-parseindex2.py @ 18275:9818f22785b7

performance: speedup computation of unstable revisions In their current state, revset calls can be very costly, as we test predicates on the entire repository. This change drops revset call in favor of direct testing of the phase of changesets. Performance test on my Mercurial checkout - 19857 total changesets, - 1584 obsolete changesets, - 13310 obsolescence markers. Before: ! unstable ! wall 0.017366 After this changes: ! unstable ! wall 0.008093 Performance test on a Mozilla central checkout: - 117293 total changesets, - 1 obsolete changeset, - 1 obsolescence marker. Before: ! unstable ! wall 0.045190 After: ! unstable ! wall 0.000032
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Fri, 04 Jan 2013 03:15:21 +0100
parents e22d6b1dec1d
children e57c532c3835
line wrap: on
line source

from mercurial import parsers
from mercurial.node import nullid, nullrev
import struct

# This unit test compares the return value of the original Python
# implementation of parseindex and the new C implementation for
# an index file with and without inlined data

# original python implementation
def gettype(q):
    return int(q & 0xFFFF)

def offset_type(offset, type):
    return long(long(offset) << 16 | type)

indexformatng = ">Qiiiiii20s12x"

def py_parseindex(data, inline) :
    s = 64
    cache = None
    index = []
    nodemap =  {nullid: nullrev}
    n = off = 0

    l = len(data) - s
    append = index.append
    if inline:
        cache = (0, data)
        while off <= l:
            e = struct.unpack(indexformatng, data[off:off + s])
            nodemap[e[7]] = n
            append(e)
            n += 1
            if e[1] < 0:
                break
            off += e[1] + s
    else:
        while off <= l:
            e = struct.unpack(indexformatng, data[off:off + s])
            nodemap[e[7]] = n
            append(e)
            n += 1
            off += s

    e = list(index[0])
    type = gettype(e[0])
    e[0] = offset_type(0, type)
    index[0] = tuple(e)

    # add the magic null revision at -1
    index.append((0, 0, 0, -1, -1, -1, -1, nullid))

    return index, cache

data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
    '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
    '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
    'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
    'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
    '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
    '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
    '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
    '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
    '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
    '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
    '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
    'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
    '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
    '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
    '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
    '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
    '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
    '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
    'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
    '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
    '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
    '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'

data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
    '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
    '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
    '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
    '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
    '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
    '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
    '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
    '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
    '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
    '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
    '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
    '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
    '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
    '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
    '\x00\x00\x00\x00\x00\x00\x00\x00\x00'

def parse_index2(data, inline):
    index, chunkcache = parsers.parse_index2(data, inline)
    return list(index), chunkcache

def runtest() :
    py_res_1 = py_parseindex(data_inlined, True)
    c_res_1 = parse_index2(data_inlined, True)

    py_res_2 = py_parseindex(data_non_inlined, False)
    c_res_2 = parse_index2(data_non_inlined, False)

    if py_res_1 != c_res_1:
        print "Parse index result (with inlined data) differs!"

    if py_res_2 != c_res_2:
        print "Parse index result (no inlined data) differs!"

    ix = parsers.parse_index2(data_inlined, True)[0]
    for i, r in enumerate(ix):
        if r[7] == nullid:
            i = -1
        try:
            if ix[r[7]] != i:
                print 'Reverse lookup inconsistent for %r' % r[7].encode('hex')
        except TypeError:
            # pure version doesn't support this
            break

    print "done"

runtest()