author | Martin Geisler <mg@daimi.au.dk> |
Thu, 29 Jan 2009 19:25:25 +0100 | |
changeset 7721 | a1138f437640 |
parent 7699 | fac054f84600 |
child 7775 | 5280c39778b6 |
permissions | -rw-r--r-- |
7699
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
1 |
# mpatch.py - Python implementation of mpatch.c |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
2 |
# |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
3 |
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
4 |
# |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
5 |
# This software may be used and distributed according to the terms |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
6 |
# of the GNU General Public License, incorporated herein by reference. |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
7 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
8 |
import struct, mmap |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
9 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
10 |
devzero = file("/dev/zero") |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
11 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
12 |
# This attempts to apply a series of patches in time proportional to |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
13 |
# the total size of the patches, rather than patches * len(text). This |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
14 |
# means rather than shuffling strings around, we shuffle around |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
15 |
# pointers to fragments with fragment lists. |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
16 |
# |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
17 |
# When the fragment lists get too long, we collapse them. To do this |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
18 |
# efficiently, we do all our operations inside a buffer created by |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
19 |
# mmap and simply use memmove. This avoids creating a bunch of large |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
20 |
# temporary string buffers. |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
21 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
22 |
def patches(a, bins): |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
23 |
if not bins: return a |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
24 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
25 |
plens = [len(x) for x in bins] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
26 |
pl = sum(plens) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
27 |
bl = len(a) + pl |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
28 |
tl = bl + bl + pl # enough for the patches and two working texts |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
29 |
b1, b2 = 0, bl |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
30 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
31 |
if not tl: return a |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
32 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
33 |
m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
34 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
35 |
# load our original text |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
36 |
m.write(a) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
37 |
frags = [(len(a), b1)] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
38 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
39 |
# copy all the patches into our segment so we can memmove from them |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
40 |
pos = b2 + bl |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
41 |
m.seek(pos) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
42 |
for p in bins: m.write(p) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
43 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
44 |
def pull(dst, src, l): # pull l bytes from src |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
45 |
while l: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
46 |
f = src.pop(0) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
47 |
if f[0] > l: # do we need to split? |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
48 |
src.insert(0, (f[0] - l, f[1] + l)) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
49 |
dst.append((l, f[1])) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
50 |
return |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
51 |
dst.append(f) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
52 |
l -= f[0] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
53 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
54 |
def collect(buf, list): |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
55 |
start = buf |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
56 |
for l, p in list: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
57 |
m.move(buf, p, l) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
58 |
buf += l |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
59 |
return (buf - start, start) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
60 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
61 |
for plen in plens: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
62 |
# if our list gets too long, execute it |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
63 |
if len(frags) > 128: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
64 |
b2, b1 = b1, b2 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
65 |
frags = [collect(b1, frags)] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
66 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
67 |
new = [] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
68 |
end = pos + plen |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
69 |
last = 0 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
70 |
while pos < end: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
71 |
p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
72 |
pull(new, frags, p1 - last) # what didn't change |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
73 |
pull([], frags, p2 - p1) # what got deleted |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
74 |
new.append((l, pos + 12)) # what got added |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
75 |
pos += l + 12 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
76 |
last = p2 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
77 |
frags = new + frags # what was left at the end |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
78 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
79 |
t = collect(b2, frags) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
80 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
81 |
return m[t[1]:t[1] + t[0]] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
82 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
83 |
def patchedsize(orig, delta): |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
84 |
outlen, last, bin = 0, 0, 0 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
85 |
binend = len(delta) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
86 |
data = 12 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
87 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
88 |
while data <= binend: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
89 |
decode = delta[bin:bin + 12] |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
90 |
start, end, length = struct.unpack(">lll", decode) |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
91 |
if start > end: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
92 |
break |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
93 |
bin = data + length |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
94 |
data = bin + 12 |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
95 |
outlen += start - last |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
96 |
last = end |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
97 |
outlen += length |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
98 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
99 |
if bin != binend: |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
100 |
raise Exception("patch cannot be decoded") |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
101 |
|
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
102 |
outlen += orig - last |
fac054f84600
pure Python implementation of mpatch.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
103 |
return outlen |