Mercurial > hg
changeset 1199:78ceaf83f28f
Created a class in util called chunkbuffer that buffers reads from an
iterator over strings (aka chunks).
Also added to util (for future use) is a generator function that
iterates over a file n bytes at a time.
Lastly, localrepo was changed to use this new chunkbuffer class when
reading changegroups form the local repository.
author | Eric Hopper <hopper@omnifarious.org> |
---|---|
date | Sun, 04 Sep 2005 14:11:51 -0700 |
parents | 66f7d3946109 |
children | 333de1d53846 |
files | mercurial/localrepo.py mercurial/util.py |
diffstat | 2 files changed, 70 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/localrepo.py Sat Sep 03 23:52:39 2005 -0700 +++ b/mercurial/localrepo.py Sun Sep 04 14:11:51 2005 -0700 @@ -888,21 +888,7 @@ return remote.addchangegroup(cg) def changegroup(self, basenodes): - class genread: - def __init__(self, generator): - self.g = generator - self.buf = "" - def fillbuf(self): - self.buf += "".join(self.g) - - def read(self, l): - while l > len(self.buf): - try: - self.buf += self.g.next() - except StopIteration: - break - d, self.buf = self.buf[:l], self.buf[l:] - return d + genread = util.chunkbuffer def gengroup(): nodes = self.newer(basenodes)
--- a/mercurial/util.py Sat Sep 03 23:52:39 2005 -0700 +++ b/mercurial/util.py Sun Sep 04 14:11:51 2005 -0700 @@ -12,7 +12,7 @@ import os, errno from demandload import * -demandload(globals(), "re") +demandload(globals(), "re cStringIO") def binary(s): """return true if a string is binary data using diff's heuristic""" @@ -352,3 +352,71 @@ val = os.WSTOPSIG(code) return "stopped by signal %d" % val, val raise ValueError("invalid exit code") + +class chunkbuffer(object): + """Allow arbitrary sized chunks of data to be efficiently read from an + iterator over chunks of arbitrary size.""" + def __init__(self, in_iter, targetsize = 2**16): + """in_iter is the iterator that's iterating over the input chunks. + targetsize is how big a buffer to try to maintain.""" + self.in_iter = iter(in_iter) + self.buf = '' + targetsize = int(targetsize) + if (targetsize <= 0): + raise ValueError("targetsize must be greater than 0, was %d" % targetsize) + self.targetsize = int(targetsize) + self.iterempty = False + def fillbuf(self): + """x.fillbuf() + + Ignore the target size, and just read every chunk from the iterator + until it's empty.""" + if not self.iterempty: + collector = cStringIO.StringIO() + collector.write(self.buf) + for ch in self.in_iter: + collector.write(ch) + self.buf = collector.getvalue() + collector.close() + collector = None + self.iterempty = True + + def read(self, l): + """x.read(l) -> str + Read l bytes of data from the iterator of chunks of data. Returns less + than l bytes if the iterator runs dry.""" + if l > len(self.buf) and not self.iterempty: + # Clamp to a multiple of self.targetsize + targetsize = self.targetsize * ((l // self.targetsize) + 1) + collector = cStringIO.StringIO() + collector.write(self.buf) + collected = len(self.buf) + for chunk in self.in_iter: + collector.write(chunk) + collected += len(chunk) + if collected >= targetsize: + break + if collected < targetsize: + self.iterempty = True + self.buf = collector.getvalue() + collector.close() + collector = None + s = self.buf[:l] + self.buf = buffer(self.buf, l) + return s + def __repr__(self): + return "<%s.%s targetsize = %u buffered = %u bytes>" % \ + (self.__class__.__module__, self.__class__.__name__, + self.targetsize, len(self.buf)) + +def filechunkiter(f, size = 65536): + """filechunkiter(file[, size]) -> generator + + Create a generator that produces all the data in the file size (default + 65536) bytes at a time. Chunks may be less than size bytes if the + chunk is the last chunk in the file, or the file is a socket or some + other type of file that sometimes reads less data than is requested.""" + s = f.read(size) + while len(s) >= 0: + yield s + s = f.read(size)