mercurial/pure/bdiff.py
author Joerg Sonnenberger <joerg@bec.de>
Wed, 06 Dec 2017 15:46:41 +0100
changeset 35318 d13526333835
parent 34436 5326e4ef1dab
child 36184 29dd37a418aa
permissions -rw-r--r--
phases: drop the list with phase of each rev, always comput phase sets Change the C implementation of phasecache.loadphaserevs to provide only the sets for draft and secret phase as well as the number of revisions seen. Change the pure Python implementation of the same functino to compute the sets instead of the list of phases for each revision. Change phasecache.phase to check the phase sets and assume public if the revision is in neither draft nor secret set. This is computationally slightly more expensive. Change phasecache.getrevset for public() based queries to compute the set of non-matching revisions and return the result as filtered fullreposet. A shortcut is taken when no draft or secret revision exists. Bump the module version for the changed interface contract. Overall, this saves around 16 Bytes per revision whenever the phasecache is used, for the test case in issue5691 it is around 3MB. getrevset() for a large repository is around 13% slower here, that seems an acceptable trade off. Performance impact for phase() should be similar. Differential Revision: https://phab.mercurial-scm.org/D1606

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import difflib
import re
import struct

def splitnewlines(text):
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + '\n' for l in text.split('\n')]
    if lines:
        if lines[-1] == '\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines

def _normalizeblocks(a, b, blocks):
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (a1end + shift < a2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        elif b1end == b2:
            while (b1end + shift < b2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift
    r.append(prev)
    return r

def bdiff(a, b):
    a = bytes(a).splitlines(True)
    b = bytes(b).splitlines(True)

    if not a:
        s = "".join(b)
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a:
        p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = "".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return "".join(bin)

def blocks(a, b):
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]

def fixws(text, allws):
    if allws:
        text = re.sub('[ \t\r]+', '', text)
    else:
        text = re.sub('[ \t\r]+', ' ', text)
        text = text.replace(' \n', '\n')
    return text