changeset 5539:954e68e54dea

convert: read CVS files in chunks (issue 800) socket.makefile() fails on large read requests (more than 10MB) with MemoryError.
author Patrick Mezard <pmezard@gmail.com>
date Sun, 18 Nov 2007 17:25:28 +0100
parents dc8fa3482a9a
children 00b812ad67cb
files hgext/convert/cvs.py
diffstat 1 files changed, 17 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/cvs.py	Sat Nov 17 18:41:31 2007 +0100
+++ b/hgext/convert/cvs.py	Sun Nov 18 17:25:28 2007 +0100
@@ -1,6 +1,7 @@
 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
 
 import os, locale, re, socket
+from cStringIO import StringIO
 from mercurial import util
 
 from common import NoRepo, commit, converter_source, checktool
@@ -209,6 +210,20 @@
         return self.heads
 
     def _getfile(self, name, rev):
+
+        def chunkedread(fp, count):
+            # file-objects returned by socked.makefile() do not handle
+            # large read() requests very well.
+            chunksize = 65536
+            output = StringIO()
+            while count > 0:
+                data = fp.read(min(count, chunksize))
+                if not data:
+                    raise util.Abort("%d bytes missing from remote file" % count)
+                count -= len(data)
+                output.write(data)
+            return output.getvalue()
+
         if rev.endswith("(DEAD)"):
             raise IOError
 
@@ -227,14 +242,14 @@
                 self.readp.readline() # entries
                 mode = self.readp.readline()[:-1]
                 count = int(self.readp.readline()[:-1])
-                data = self.readp.read(count)
+                data = chunkedread(self.readp, count)
             elif line.startswith(" "):
                 data += line[1:]
             elif line.startswith("M "):
                 pass
             elif line.startswith("Mbinary "):
                 count = int(self.readp.readline()[:-1])
-                data = self.readp.read(count)
+                data = chunkedread(self.readp, count)
             else:
                 if line == "ok\n":
                     return (data, "x" in mode and "x" or "")