Created a class in util called chunkbuffer that buffers reads from an
iterator over strings (aka chunks).
Also added to util (for future use) is a generator function that
iterates over a file n bytes at a time.
Lastly, localrepo was changed to use this new chunkbuffer class when
reading changegroups form the local repository.
--- a/mercurial/localrepo.py Sat Sep 03 23:52:39 2005 -0700
+++ b/mercurial/localrepo.py Sun Sep 04 14:11:51 2005 -0700
@@ -888,21 +888,7 @@
return remote.addchangegroup(cg)
def changegroup(self, basenodes):
- class genread:
- def __init__(self, generator):
- self.g = generator
- self.buf = ""
- def fillbuf(self):
- self.buf += "".join(self.g)
-
- def read(self, l):
- while l > len(self.buf):
- try:
- self.buf += self.g.next()
- except StopIteration:
- break
- d, self.buf = self.buf[:l], self.buf[l:]
- return d
+ genread = util.chunkbuffer
def gengroup():
nodes = self.newer(basenodes)
--- a/mercurial/util.py Sat Sep 03 23:52:39 2005 -0700
+++ b/mercurial/util.py Sun Sep 04 14:11:51 2005 -0700
@@ -12,7 +12,7 @@
import os, errno
from demandload import *
-demandload(globals(), "re")
+demandload(globals(), "re cStringIO")
def binary(s):
"""return true if a string is binary data using diff's heuristic"""
@@ -352,3 +352,71 @@
val = os.WSTOPSIG(code)
return "stopped by signal %d" % val, val
raise ValueError("invalid exit code")
+
+class chunkbuffer(object):
+ """Allow arbitrary sized chunks of data to be efficiently read from an
+ iterator over chunks of arbitrary size."""
+ def __init__(self, in_iter, targetsize = 2**16):
+ """in_iter is the iterator that's iterating over the input chunks.
+ targetsize is how big a buffer to try to maintain."""
+ self.in_iter = iter(in_iter)
+ self.buf = ''
+ targetsize = int(targetsize)
+ if (targetsize <= 0):
+ raise ValueError("targetsize must be greater than 0, was %d" % targetsize)
+ self.targetsize = int(targetsize)
+ self.iterempty = False
+ def fillbuf(self):
+ """x.fillbuf()
+
+ Ignore the target size, and just read every chunk from the iterator
+ until it's empty."""
+ if not self.iterempty:
+ collector = cStringIO.StringIO()
+ collector.write(self.buf)
+ for ch in self.in_iter:
+ collector.write(ch)
+ self.buf = collector.getvalue()
+ collector.close()
+ collector = None
+ self.iterempty = True
+
+ def read(self, l):
+ """x.read(l) -> str
+ Read l bytes of data from the iterator of chunks of data. Returns less
+ than l bytes if the iterator runs dry."""
+ if l > len(self.buf) and not self.iterempty:
+ # Clamp to a multiple of self.targetsize
+ targetsize = self.targetsize * ((l // self.targetsize) + 1)
+ collector = cStringIO.StringIO()
+ collector.write(self.buf)
+ collected = len(self.buf)
+ for chunk in self.in_iter:
+ collector.write(chunk)
+ collected += len(chunk)
+ if collected >= targetsize:
+ break
+ if collected < targetsize:
+ self.iterempty = True
+ self.buf = collector.getvalue()
+ collector.close()
+ collector = None
+ s = self.buf[:l]
+ self.buf = buffer(self.buf, l)
+ return s
+ def __repr__(self):
+ return "<%s.%s targetsize = %u buffered = %u bytes>" % \
+ (self.__class__.__module__, self.__class__.__name__,
+ self.targetsize, len(self.buf))
+
+def filechunkiter(f, size = 65536):
+ """filechunkiter(file[, size]) -> generator
+
+ Create a generator that produces all the data in the file size (default
+ 65536) bytes at a time. Chunks may be less than size bytes if the
+ chunk is the last chunk in the file, or the file is a socket or some
+ other type of file that sometimes reads less data than is requested."""
+ s = f.read(size)
+ while len(s) >= 0:
+ yield s
+ s = f.read(size)