Mercurial > hg
changeset 11758:a79214972da2 stable
chunkbuffer: use += rather than cStringIO to reduce memory footprint
This significantly refactors the read() loop to use a queue of chunks.
The queue is alternately filled to at least 256k and then emptied by
concatenating onto the output buffer.
For very large read sizes, += uses less memory because it can resize
the target string in place.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Fri, 06 Aug 2010 12:18:33 -0500 |
parents | 65bd4b8e48bd |
children | 05deba16c5d5 aff419e260f9 |
files | mercurial/util.py |
diffstat | 1 files changed, 26 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/util.py Thu Aug 05 16:17:39 2010 -0500 +++ b/mercurial/util.py Fri Aug 06 12:18:33 2010 -0500 @@ -15,7 +15,7 @@ from i18n import _ import error, osutil, encoding -import cStringIO, errno, re, shutil, sys, tempfile, traceback +import errno, re, shutil, sys, tempfile, traceback import os, stat, time, calendar, textwrap, unicodedata, signal import imp @@ -909,31 +909,36 @@ """in_iter is the iterator that's iterating over the input chunks. targetsize is how big a buffer to try to maintain.""" self.iter = iter(in_iter) - self.buf = '' - self.targetsize = 2**16 + self._queue = [] def read(self, l): """Read L bytes of data from the iterator of chunks of data. Returns less than L bytes if the iterator runs dry.""" - if l > len(self.buf) and self.iter: - # Clamp to a multiple of self.targetsize - targetsize = max(l, self.targetsize) - collector = cStringIO.StringIO() - collector.write(self.buf) - collected = len(self.buf) - for chunk in self.iter: - collector.write(chunk) - collected += len(chunk) - if collected >= targetsize: + left = l + buf = '' + queue = self._queue + while left > 0: + # refill the queue + if not queue: + target = 2**18 + for chunk in self.iter: + queue.append(chunk) + target -= len(chunk) + if target <= 0: + break + if not queue: break - if collected < targetsize: - self.iter = False - self.buf = collector.getvalue() - if len(self.buf) == l: - s, self.buf = str(self.buf), '' - else: - s, self.buf = self.buf[:l], buffer(self.buf, l) - return s + + chunk = queue.pop(0) + left -= len(chunk) + if left < 0: + queue.insert(0, chunk[left:]) + buf += chunk[:left] + else: + buf += chunk + + return buf + def filechunkiter(f, size=65536, limit=None): """Create a generator that produces the data in the file size