view mercurial/pure/bdiff.py @ 16719:e7bf09acd410

localrepo: add branchtip() method for faster single-branch lookups For the PyPy repo with 744 branches and 843 branch heads, this brings hg log -r default over NFS from: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 3249 0 1.3222 1.3222 <open> 3244 0 0.6211 0.6211 <method 'close' of 'file' objects> 3243 0 0.0800 0.0800 <method 'read' of 'file' objects> 3241 0 0.0660 0.0660 <method 'seek' of 'file' objects> 3905 0 0.0476 0.0476 <zlib.decompress> 3281 0 2.6756 0.0472 mercurial.changelog:182(read) +3281 0 2.5256 0.0453 +mercurial.revlog:881(revision) +3276 0 0.0389 0.0196 +mercurial.changelog:28(decodeextra) +6562 0 0.0123 0.0123 +<method 'split' of 'str' objects> +6562 0 0.0408 0.0073 +mercurial.encoding:61(tolocal) +3281 0 0.0054 0.0054 +<method 'index' of 'str' objects> 3241 0 2.2464 0.0456 mercurial.revlog:818(_loadchunk) +3241 0 0.6205 0.6205 +<method 'close' of 'file' objects> +3241 0 0.0765 0.0765 +<method 'read' of 'file' objects> +3241 0 0.0660 0.0660 +<method 'seek' of 'file' objects> +3241 0 1.4209 0.0135 +mercurial.store:374(__call__) +3241 0 0.0122 0.0107 +mercurial.revlog:810(_addchunk) 3281 0 2.5256 0.0453 mercurial.revlog:881(revision) +3280 0 0.0175 0.0175 +mercurial.revlog:305(rev) +3281 0 2.2819 0.0119 +mercurial.revlog:847(_chunkraw) +3281 0 0.0603 0.0083 +mercurial.revlog:945(_checkhash) +3281 0 0.0051 0.0051 +mercurial.revlog:349(flags) +3281 0 0.0040 0.0040 +<mercurial.mpatch.patches> 13682 0 0.0479 0.0248 <method 'decode' of 'str' objects> +7418 0 0.0228 0.0076 +encodings.utf_8:15(decode) +1 0 0.0003 0.0000 +encodings:71(search_function) 3248 0 1.3995 0.0246 mercurial.scmutil:218(__call__) +3248 0 1.3222 1.3222 +<open> +3248 0 0.0235 0.0184 +os.path:80(split) +3248 0 0.0084 0.0068 +mercurial.scmutil:92(__call__) Time: real 2.750 secs (user 0.680+0.000 sys 0.360+0.000) down to: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 55 31 0.0197 0.0163 <__import__> +1 0 0.0006 0.0002 +mercurial.context:8(<module>) +1 0 0.0042 0.0001 +mercurial.revlog:12(<module>) +1 0 0.0002 0.0001 +mercurial.match:8(<module>) +1 0 0.0003 0.0001 +mercurial.dirstate:7(<module>) +1 0 0.0057 0.0001 +mercurial.changelog:8(<module>) 1 0 0.0117 0.0032 mercurial.localrepo:525(_readbranchcache) +844 0 0.0015 0.0015 +<binascii.unhexlify> +845 0 0.0010 0.0010 +<method 'split' of 'str' objects> +843 0 0.0045 0.0009 +mercurial.encoding:61(tolocal) +843 0 0.0004 0.0004 +<method 'setdefault' of 'dict' objects> +1 0 0.0003 0.0003 +<method 'close' of 'file' objects> 3 0 0.0029 0.0029 <method 'read' of 'file' objects> 9 0 0.0018 0.0018 <open> 990 0 0.0017 0.0017 <binascii.unhexlify> 53 0 0.0016 0.0016 mercurial.demandimport:43(__init__) 862 0 0.0015 0.0015 <_codecs.utf_8_decode> 862 0 0.0037 0.0014 <method 'decode' of 'str' objects> +862 0 0.0023 0.0008 +encodings.utf_8:15(decode) 981 0 0.0011 0.0011 <method 'split' of 'str' objects> 861 0 0.0046 0.0009 mercurial.encoding:61(tolocal) +861 0 0.0037 0.0014 +<method 'decode' of 'str' objects> 862 0 0.0023 0.0008 encodings.utf_8:15(decode) +862 0 0.0015 0.0015 +<_codecs.utf_8_decode> 4 0 0.0008 0.0008 <method 'close' of 'file' objects> 179 154 0.0202 0.0004 mercurial.demandimport:83(__getattribute__) +36 11 0.0199 0.0003 +mercurial.demandimport:55(_load) +72 0 0.0001 0.0001 +mercurial.demandimport:83(__getattribute__) +36 0 0.0000 0.0000 +<getattr> 1 0 0.0015 0.0004 mercurial.tags:148(_readtagcache) Time: real 0.060 secs (user 0.030+0.000 sys 0.010+0.000)
author Brodie Rao <brodie@sf.io>
date Sun, 13 May 2012 14:04:04 +0200
parents eeac5e179243
children c4e3ff497f89
line wrap: on
line source

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import struct, difflib, re

def splitnewlines(text):
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + '\n' for l in text.split('\n')]
    if lines:
        if lines[-1] == '\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines

def _normalizeblocks(a, b, blocks):
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (a1end + shift < a2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        elif b1end == b2:
            while (b1end + shift < b2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift
    r.append(prev)
    return r

def bdiff(a, b):
    a = str(a).splitlines(True)
    b = str(b).splitlines(True)

    if not a:
        s = "".join(b)
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a: p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = "".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return "".join(bin)

def blocks(a, b):
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]

def fixws(text, allws):
    if allws:
        text = re.sub('[ \t\r]+', '', text)
    else:
        text = re.sub('[ \t\r]+', ' ', text)
        text = text.replace(' \n', '\n')
    return text