Mercurial > hg-stable
changeset 7699:fac054f84600
pure Python implementation of mpatch.c
author | Martin Geisler <mg@daimi.au.dk> |
---|---|
date | Sat, 24 Jan 2009 00:12:17 +0100 |
parents | 934f0667f6fa |
children | f410c552fa15 |
files | mercurial/pure/mpatch.py |
diffstat | 1 files changed, 103 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/pure/mpatch.py Sat Jan 24 00:12:17 2009 +0100 @@ -0,0 +1,103 @@ +# mpatch.py - Python implementation of mpatch.c +# +# Copyright 2009 Matt Mackall <mpm@selenic.com> and others +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import struct, mmap + +devzero = file("/dev/zero") + +# This attempts to apply a series of patches in time proportional to +# the total size of the patches, rather than patches * len(text). This +# means rather than shuffling strings around, we shuffle around +# pointers to fragments with fragment lists. +# +# When the fragment lists get too long, we collapse them. To do this +# efficiently, we do all our operations inside a buffer created by +# mmap and simply use memmove. This avoids creating a bunch of large +# temporary string buffers. + +def patches(a, bins): + if not bins: return a + + plens = [len(x) for x in bins] + pl = sum(plens) + bl = len(a) + pl + tl = bl + bl + pl # enough for the patches and two working texts + b1, b2 = 0, bl + + if not tl: return a + + m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) + + # load our original text + m.write(a) + frags = [(len(a), b1)] + + # copy all the patches into our segment so we can memmove from them + pos = b2 + bl + m.seek(pos) + for p in bins: m.write(p) + + def pull(dst, src, l): # pull l bytes from src + while l: + f = src.pop(0) + if f[0] > l: # do we need to split? + src.insert(0, (f[0] - l, f[1] + l)) + dst.append((l, f[1])) + return + dst.append(f) + l -= f[0] + + def collect(buf, list): + start = buf + for l, p in list: + m.move(buf, p, l) + buf += l + return (buf - start, start) + + for plen in plens: + # if our list gets too long, execute it + if len(frags) > 128: + b2, b1 = b1, b2 + frags = [collect(b1, frags)] + + new = [] + end = pos + plen + last = 0 + while pos < end: + p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) + pull(new, frags, p1 - last) # what didn't change + pull([], frags, p2 - p1) # what got deleted + new.append((l, pos + 12)) # what got added + pos += l + 12 + last = p2 + frags = new + frags # what was left at the end + + t = collect(b2, frags) + + return m[t[1]:t[1] + t[0]] + +def patchedsize(orig, delta): + outlen, last, bin = 0, 0, 0 + binend = len(delta) + data = 12 + + while data <= binend: + decode = delta[bin:bin + 12] + start, end, length = struct.unpack(">lll", decode) + if start > end: + break + bin = data + length + data = bin + 12 + outlen += start - last + last = end + outlen += length + + if bin != binend: + raise Exception("patch cannot be decoded") + + outlen += orig - last + return outlen