--- a/mercurial/mdiff.py Sat May 14 10:13:49 2005 -0800
+++ b/mercurial/mdiff.py Sat May 14 10:27:14 2005 -0800
@@ -1,6 +1,7 @@
#!/usr/bin/python
-import difflib, struct
-from cStringIO import StringIO
+import difflib, struct, mmap
+
+devzero = file("/dev/zero")
def unidiff(a, ad, b, bd, fn):
if not a and not b: return ""
@@ -52,6 +53,74 @@
return "".join(bin)
+# This attempts to apply a series of patches in time proportional to
+# the total size of the patches, rather than patches * len(text). This
+# means rather than shuffling strings around, we shuffle around
+# pointers to fragments with fragment lists.
+#
+# When the fragment lists get too long, we collapse them. To do this
+# efficiently, we do all our operations inside a buffer created by
+# mmap and simply use memmove. This avoids creating a bunch of large
+# temporary string buffers.
+
+def patches(a, bins):
+ if not bins: return a
+
+ plens = [len(x) for x in bins]
+ pl = sum(plens)
+ bl = len(a) + pl
+ tl = bl + bl + pl # enough for the patches and two working texts
+ b1, b2 = 0, bl
+ m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
+
+ # load our original text
+ m.write(a)
+ frags = [(len(a), b1)]
+
+ # copy all the patches into our segment so we can memmove from them
+ pos = b2 + bl
+ m.seek(pos)
+ for p in bins: m.write(p)
+
+ def pull(dst, src, l): # pull l bytes from src
+ while l:
+ f = src.pop(0)
+ if f[0] > l: # do we need to split?
+ src.insert(0, (f[0] - l, f[1] + l))
+ dst.append((l, f[1]))
+ return
+ dst.append(f)
+ l -= f[0]
+
+ def collect(buf, list):
+ start = buf
+ for l, p in list:
+ m.move(buf, p, l)
+ buf += l
+ return (buf - start, start)
+
+ for plen in plens:
+ # if our list gets too long, execute it
+ if len(frags) > 128:
+ b2, b1 = b1, b2
+ frags = [collect(b1, frags)]
+
+ new = []
+ end = pos + plen
+ last = 0
+ while pos < end:
+ p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
+ pull(new, frags, p1 - last) # what didn't change
+ pull([], frags, p2 - p1) # what got deleted
+ new.append((l, pos + 12)) # what got added
+ pos += l + 12
+ last = p2
+ frags = new + frags # what was left at the end
+
+ t = collect(b2, frags)
+
+ return m[t[1]:t[1] + t[0]]
+
def patch(a, bin):
last = pos = 0
r = []
--- a/mercurial/revlog.py Sat May 14 10:13:49 2005 -0800
+++ b/mercurial/revlog.py Sat May 14 10:27:14 2005 -0800
@@ -41,7 +41,7 @@
n = 0
i = self.opener(self.indexfile).read()
s = struct.calcsize(indexformat)
- for f in range(0, len(i), s):
+ for f in xrange(0, len(i), s):
# offset, size, base, linkrev, p1, p2, nodeid
e = struct.unpack(indexformat, i[f:f + s])
self.nodemap[e[6]] = n
@@ -87,9 +87,6 @@
def diff(self, a, b):
return mdiff.textdiff(a, b)
- def patch(self, text, patch):
- return mdiff.patch(text, patch)
-
def revision(self, node):
if node == nullid: return ""
if self.cache and self.cache[0] == node: return self.cache[2]
@@ -114,12 +111,14 @@
last = self.length(base)
text = decompress(data[:last])
+ bins = []
for r in xrange(base + 1, rev + 1):
s = self.length(r)
- b = decompress(data[last:last + s])
- text = self.patch(text, b)
+ bins.append(decompress(data[last:last + s]))
last = last + s
+ text = mdiff.patches(text, bins)
+
(p1, p2) = self.parents(node)
if node != hash(text, p1, p2):
raise "integrity check failed on %s:%d" % (self.datafile, rev)
@@ -301,14 +300,12 @@
# helper to reconstruct intermediate versions
def construct(text, base, rev):
- for r in range(base + 1, rev + 1):
- b = decompress(chunks[r])
- text = self.patch(text, b)
- return text
+ bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
+ return mdiff.patches(text, bins)
# build deltas
deltas = []
- for d in range(0, len(revs) - 1):
+ for d in xrange(0, len(revs) - 1):
a, b = revs[d], revs[d + 1]
n = self.node(b)