Mercurial > hg
view mercurial/pure/mpatch.py @ 8849:80cc4b1a62d0
compare grep result between target and its parent
I found that typical case is that grep target is added at (*) revision
in the tree shown below.
+--- 1(*) --- 3
0
+--- 2 ------ 4
Now, I expect 'hg grep --all' to show only rev:1 which is first
appearance of target line.
But 'hg grep --all' will tell:
target line dis-appeared at 3 => 4
target line appeared at 2 => 3
target line dis-appeared at 1 => 2
target line appeared at 0 => 1
because current 'hg grep' implementation compares not between target
revision and its parent, but between neighbor revisions in walkthrough
order.
I checked performance of this patch by "hg grep --follow --all
walkchangerevs" on whole Mercurial repo, and patched version could
complete as fast as un-patched one.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Tue, 19 May 2009 16:49:54 +0900 |
parents | 46293a0c7e9f |
children | 25e572394f5c |
line wrap: on
line source
# mpatch.py - Python implementation of mpatch.c # # Copyright 2009 Matt Mackall <mpm@selenic.com> and others # # This software may be used and distributed according to the terms of the # GNU General Public License version 2, incorporated herein by reference. import struct try: from cStringIO import StringIO except ImportError: from StringIO import StringIO # This attempts to apply a series of patches in time proportional to # the total size of the patches, rather than patches * len(text). This # means rather than shuffling strings around, we shuffle around # pointers to fragments with fragment lists. # # When the fragment lists get too long, we collapse them. To do this # efficiently, we do all our operations inside a buffer created by # mmap and simply use memmove. This avoids creating a bunch of large # temporary string buffers. def patches(a, bins): if not bins: return a plens = [len(x) for x in bins] pl = sum(plens) bl = len(a) + pl tl = bl + bl + pl # enough for the patches and two working texts b1, b2 = 0, bl if not tl: return a m = StringIO() def move(dest, src, count): """move count bytes from src to dest The file pointer is left at the end of dest. """ m.seek(src) buf = m.read(count) m.seek(dest) m.write(buf) # load our original text m.write(a) frags = [(len(a), b1)] # copy all the patches into our segment so we can memmove from them pos = b2 + bl m.seek(pos) for p in bins: m.write(p) def pull(dst, src, l): # pull l bytes from src while l: f = src.pop(0) if f[0] > l: # do we need to split? src.insert(0, (f[0] - l, f[1] + l)) dst.append((l, f[1])) return dst.append(f) l -= f[0] def collect(buf, list): start = buf for l, p in list: move(buf, p, l) buf += l return (buf - start, start) for plen in plens: # if our list gets too long, execute it if len(frags) > 128: b2, b1 = b1, b2 frags = [collect(b1, frags)] new = [] end = pos + plen last = 0 while pos < end: m.seek(pos) p1, p2, l = struct.unpack(">lll", m.read(12)) pull(new, frags, p1 - last) # what didn't change pull([], frags, p2 - p1) # what got deleted new.append((l, pos + 12)) # what got added pos += l + 12 last = p2 frags = new + frags # what was left at the end t = collect(b2, frags) m.seek(t[1]) return m.read(t[0]) def patchedsize(orig, delta): outlen, last, bin = 0, 0, 0 binend = len(delta) data = 12 while data <= binend: decode = delta[bin:bin + 12] start, end, length = struct.unpack(">lll", decode) if start > end: break bin = data + length data = bin + 12 outlen += start - last last = end outlen += length if bin != binend: raise Exception("patch cannot be decoded") outlen += orig - last return outlen