mercurial/pure/parsers.py
author Martin Geisler <mg@aragost.com>
Mon, 23 Jul 2012 15:55:26 -0600
branchstable
changeset 17236 9fb8312dbdbd
parent 14995 8d928799dab5
child 17425 e95ec38f86b0
permissions -rw-r--r--
encoding: add fast-path for ASCII uppercase. This copies the performance hack from encoding.lower (c481761033bd). The case-folding logic that kicks in on case-insensitive filesystems hits encoding.upper hard: with a repository with 75k files, the timings went from hg perfstatus ! wall 3.156000 comb 3.156250 user 1.625000 sys 1.531250 (best of 3) to hg perfstatus ! wall 2.390000 comb 2.390625 user 1.078125 sys 1.312500 (best of 5) This is a 24% decrease. For comparison, Mercurial 2.0 gives: hg perfstatus ! wall 2.172000 comb 2.171875 user 0.984375 sys 1.187500 (best of 5) so we're only 10% slower than before we added the extra case-folding logic. The same decrease is seen when executing 'hg status' as normal, where we go from: hg status --time time: real 4.322 secs (user 2.219+0.000 sys 2.094+0.000) to hg status --time time: real 3.307 secs (user 1.750+0.000 sys 1.547+0.000)

# parsers.py - Python implementation of parsers.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from mercurial.node import bin, nullid
from mercurial import util
import struct, zlib

_pack = struct.pack
_unpack = struct.unpack
_compress = zlib.compress
_decompress = zlib.decompress
_sha = util.sha1

def parse_manifest(mfdict, fdict, lines):
    for l in lines.splitlines():
        f, n = l.split('\0')
        if len(n) > 40:
            fdict[f] = n[40:]
            mfdict[f] = bin(n[:40])
        else:
            mfdict[f] = bin(n)

def parse_index2(data, inline):
    def gettype(q):
        return int(q & 0xFFFF)

    def offset_type(offset, type):
        return long(long(offset) << 16 | type)

    indexformatng = ">Qiiiiii20s12x"

    s = struct.calcsize(indexformatng)
    index = []
    cache = None
    off = 0

    l = len(data) - s
    append = index.append
    if inline:
        cache = (0, data)
        while off <= l:
            e = _unpack(indexformatng, data[off:off + s])
            append(e)
            if e[1] < 0:
                break
            off += e[1] + s
    else:
        while off <= l:
            e = _unpack(indexformatng, data[off:off + s])
            append(e)
            off += s

    if off != len(data):
        raise ValueError('corrupt index file')

    if index:
        e = list(index[0])
        type = gettype(e[0])
        e[0] = offset_type(0, type)
        index[0] = tuple(e)

    # add the magic null revision at -1
    index.append((0, 0, 0, -1, -1, -1, -1, nullid))

    return index, cache

def parse_dirstate(dmap, copymap, st):
    parents = [st[:20], st[20: 40]]
    # deref fields so they will be local in loop
    format = ">cllll"
    e_size = struct.calcsize(format)
    pos1 = 40
    l = len(st)

    # the inner loop
    while pos1 < l:
        pos2 = pos1 + e_size
        e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
        pos1 = pos2 + e[4]
        f = st[pos2:pos1]
        if '\0' in f:
            f, c = f.split('\0')
            copymap[f] = c
        dmap[f] = e[:4]
    return parents