# HG changeset patch # User Eric Hopper # Date 1125868311 25200 # Node ID 78ceaf83f28fb949c181868fccecb114c7eb09ca # Parent 66f7d39461090a45569d5d657ca6a961d939312d Created a class in util called chunkbuffer that buffers reads from an iterator over strings (aka chunks). Also added to util (for future use) is a generator function that iterates over a file n bytes at a time. Lastly, localrepo was changed to use this new chunkbuffer class when reading changegroups form the local repository. diff -r 66f7d3946109 -r 78ceaf83f28f mercurial/localrepo.py --- a/mercurial/localrepo.py Sat Sep 03 23:52:39 2005 -0700 +++ b/mercurial/localrepo.py Sun Sep 04 14:11:51 2005 -0700 @@ -888,21 +888,7 @@ return remote.addchangegroup(cg) def changegroup(self, basenodes): - class genread: - def __init__(self, generator): - self.g = generator - self.buf = "" - def fillbuf(self): - self.buf += "".join(self.g) - - def read(self, l): - while l > len(self.buf): - try: - self.buf += self.g.next() - except StopIteration: - break - d, self.buf = self.buf[:l], self.buf[l:] - return d + genread = util.chunkbuffer def gengroup(): nodes = self.newer(basenodes) diff -r 66f7d3946109 -r 78ceaf83f28f mercurial/util.py --- a/mercurial/util.py Sat Sep 03 23:52:39 2005 -0700 +++ b/mercurial/util.py Sun Sep 04 14:11:51 2005 -0700 @@ -12,7 +12,7 @@ import os, errno from demandload import * -demandload(globals(), "re") +demandload(globals(), "re cStringIO") def binary(s): """return true if a string is binary data using diff's heuristic""" @@ -352,3 +352,71 @@ val = os.WSTOPSIG(code) return "stopped by signal %d" % val, val raise ValueError("invalid exit code") + +class chunkbuffer(object): + """Allow arbitrary sized chunks of data to be efficiently read from an + iterator over chunks of arbitrary size.""" + def __init__(self, in_iter, targetsize = 2**16): + """in_iter is the iterator that's iterating over the input chunks. + targetsize is how big a buffer to try to maintain.""" + self.in_iter = iter(in_iter) + self.buf = '' + targetsize = int(targetsize) + if (targetsize <= 0): + raise ValueError("targetsize must be greater than 0, was %d" % targetsize) + self.targetsize = int(targetsize) + self.iterempty = False + def fillbuf(self): + """x.fillbuf() + + Ignore the target size, and just read every chunk from the iterator + until it's empty.""" + if not self.iterempty: + collector = cStringIO.StringIO() + collector.write(self.buf) + for ch in self.in_iter: + collector.write(ch) + self.buf = collector.getvalue() + collector.close() + collector = None + self.iterempty = True + + def read(self, l): + """x.read(l) -> str + Read l bytes of data from the iterator of chunks of data. Returns less + than l bytes if the iterator runs dry.""" + if l > len(self.buf) and not self.iterempty: + # Clamp to a multiple of self.targetsize + targetsize = self.targetsize * ((l // self.targetsize) + 1) + collector = cStringIO.StringIO() + collector.write(self.buf) + collected = len(self.buf) + for chunk in self.in_iter: + collector.write(chunk) + collected += len(chunk) + if collected >= targetsize: + break + if collected < targetsize: + self.iterempty = True + self.buf = collector.getvalue() + collector.close() + collector = None + s = self.buf[:l] + self.buf = buffer(self.buf, l) + return s + def __repr__(self): + return "<%s.%s targetsize = %u buffered = %u bytes>" % \ + (self.__class__.__module__, self.__class__.__name__, + self.targetsize, len(self.buf)) + +def filechunkiter(f, size = 65536): + """filechunkiter(file[, size]) -> generator + + Create a generator that produces all the data in the file size (default + 65536) bytes at a time. Chunks may be less than size bytes if the + chunk is the last chunk in the file, or the file is a socket or some + other type of file that sometimes reads less data than is requested.""" + s = f.read(size) + while len(s) >= 0: + yield s + s = f.read(size)