changegroup: avoid large copies
authorMatt Mackall <mpm@selenic.com>
Wed, 03 Oct 2007 17:17:28 -0500
changeset 5368 61462e7d62ed
parent 5367 7530334bf301
child 5371 17ed9b9a0d03
changegroup: avoid large copies - handle chunk headers separately rather than prepending them to (potentially large) chunks - break large chunks into 1M pieces for compression - don't prepend file metadata onto (potentially large) file data
mercurial/changegroup.py
mercurial/localrepo.py
mercurial/revlog.py
--- a/mercurial/changegroup.py	Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/changegroup.py	Wed Oct 03 17:17:28 2007 -0500
@@ -33,10 +33,9 @@
             break
         yield c
 
-def genchunk(data):
-    """build a changegroup chunk"""
-    header = struct.pack(">l", len(data)+ 4)
-    return "%s%s" % (header, data)
+def chunkheader(length):
+    """build a changegroup chunk header"""
+    return struct.pack(">l", length + 4)
 
 def closechunk():
     return struct.pack(">l", 0)
@@ -86,7 +85,12 @@
             empty = True
             for chunk in chunkiter(cg):
                 empty = False
-                fh.write(z.compress(genchunk(chunk)))
+                fh.write(z.compress(chunkheader(len(chunk))))
+                pos = 0
+                while pos < len(chunk):
+                    next = pos + 2**20
+                    fh.write(z.compress(chunk[pos:next]))
+                    pos = next
             fh.write(z.compress(closechunk()))
         fh.write(z.flush())
         cleanup = None
--- a/mercurial/localrepo.py	Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/localrepo.py	Wed Oct 03 17:17:28 2007 -0500
@@ -1720,7 +1720,8 @@
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
                 if len(msng_filenode_lst) > 0:
-                    yield changegroup.genchunk(fname)
+                    yield changegroup.chunkheader(len(fname))
+                    yield fname
                     # Sort the filenodes by their revision #
                     msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
                     # Create a group generator and only pass in a changenode
@@ -1796,7 +1797,8 @@
                 nodeiter = gennodelst(filerevlog)
                 nodeiter = list(nodeiter)
                 if nodeiter:
-                    yield changegroup.genchunk(fname)
+                    yield changegroup.chunkheader(len(fname))
+                    yield fname
                     lookup = lookuprevlink_func(filerevlog)
                     for chnk in filerevlog.group(nodeiter, lookup):
                         yield chnk
--- a/mercurial/revlog.py	Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/revlog.py	Wed Oct 03 17:17:28 2007 -0500
@@ -1094,7 +1094,9 @@
                 meta += mdiff.trivialdiffheader(len(d))
             else:
                 d = self.revdiff(a, b)
-            yield changegroup.genchunk("%s%s" % (meta, d))
+            yield changegroup.chunkheader(len(meta) + len(d))
+            yield meta
+            yield d
 
         yield changegroup.closechunk()