mercurial/pure/mpatch.py
author Martin Geisler <mg@daimi.au.dk>
Sat, 24 Jan 2009 00:12:17 +0100
changeset 7699 fac054f84600
child 7775 5280c39778b6
permissions -rw-r--r--
pure Python implementation of mpatch.c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7699
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# mpatch.py - Python implementation of mpatch.c
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     5
# This software may be used and distributed according to the terms
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     6
# of the GNU General Public License, incorporated herein by reference.
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     8
import struct, mmap
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     9
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    10
devzero = file("/dev/zero")
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    11
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    12
# This attempts to apply a series of patches in time proportional to
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    13
# the total size of the patches, rather than patches * len(text). This
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    14
# means rather than shuffling strings around, we shuffle around
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    15
# pointers to fragments with fragment lists.
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    16
#
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    17
# When the fragment lists get too long, we collapse them. To do this
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    18
# efficiently, we do all our operations inside a buffer created by
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    19
# mmap and simply use memmove. This avoids creating a bunch of large
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    20
# temporary string buffers.
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    21
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    22
def patches(a, bins):
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    23
    if not bins: return a
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    24
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    25
    plens = [len(x) for x in bins]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    26
    pl = sum(plens)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    27
    bl = len(a) + pl
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    28
    tl = bl + bl + pl # enough for the patches and two working texts
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    29
    b1, b2 = 0, bl
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    30
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    31
    if not tl: return a
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    32
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
    m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
    # load our original text
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    36
    m.write(a)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    37
    frags = [(len(a), b1)]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    38
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
    # copy all the patches into our segment so we can memmove from them
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
    pos = b2 + bl
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    41
    m.seek(pos)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
    for p in bins: m.write(p)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
    def pull(dst, src, l): # pull l bytes from src
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
        while l:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    46
            f = src.pop(0)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    47
            if f[0] > l: # do we need to split?
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
                src.insert(0, (f[0] - l, f[1] + l))
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
                dst.append((l, f[1]))
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
                return
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
            dst.append(f)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
            l -= f[0]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    54
    def collect(buf, list):
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    55
        start = buf
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    56
        for l, p in list:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    57
            m.move(buf, p, l)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    58
            buf += l
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    59
        return (buf - start, start)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    60
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    61
    for plen in plens:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    62
        # if our list gets too long, execute it
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    63
        if len(frags) > 128:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
            b2, b1 = b1, b2
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    65
            frags = [collect(b1, frags)]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    66
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
        new = []
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
        end = pos + plen
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
        last = 0
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    70
        while pos < end:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    71
            p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    72
            pull(new, frags, p1 - last) # what didn't change
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    73
            pull([], frags, p2 - p1)    # what got deleted
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    74
            new.append((l, pos + 12))        # what got added
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    75
            pos += l + 12
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    76
            last = p2
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    77
        frags = new + frags                    # what was left at the end
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    78
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    79
    t = collect(b2, frags)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    80
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    81
    return m[t[1]:t[1] + t[0]]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    82
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    83
def patchedsize(orig, delta):
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    84
    outlen, last, bin = 0, 0, 0
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    85
    binend = len(delta)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    86
    data = 12
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    87
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    88
    while data <= binend:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    89
        decode = delta[bin:bin + 12]
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    90
        start, end, length = struct.unpack(">lll", decode)
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    91
        if start > end:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    92
            break
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    93
        bin = data + length
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    94
        data = bin + 12
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    95
        outlen += start - last
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    96
        last = end
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    97
        outlen += length
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    98
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    99
    if bin != binend:
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   100
        raise Exception("patch cannot be decoded")
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   101
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   102
    outlen += orig - last
fac054f84600 pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   103
    return outlen