changegroup: avoid large copies
- handle chunk headers separately rather than prepending them to
(potentially large) chunks
- break large chunks into 1M pieces for compression
- don't prepend file metadata onto (potentially large) file data
--- a/mercurial/changegroup.py Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/changegroup.py Wed Oct 03 17:17:28 2007 -0500
@@ -33,10 +33,9 @@
break
yield c
-def genchunk(data):
- """build a changegroup chunk"""
- header = struct.pack(">l", len(data)+ 4)
- return "%s%s" % (header, data)
+def chunkheader(length):
+ """build a changegroup chunk header"""
+ return struct.pack(">l", length + 4)
def closechunk():
return struct.pack(">l", 0)
@@ -86,7 +85,12 @@
empty = True
for chunk in chunkiter(cg):
empty = False
- fh.write(z.compress(genchunk(chunk)))
+ fh.write(z.compress(chunkheader(len(chunk))))
+ pos = 0
+ while pos < len(chunk):
+ next = pos + 2**20
+ fh.write(z.compress(chunk[pos:next]))
+ pos = next
fh.write(z.compress(closechunk()))
fh.write(z.flush())
cleanup = None
--- a/mercurial/localrepo.py Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/localrepo.py Wed Oct 03 17:17:28 2007 -0500
@@ -1720,7 +1720,8 @@
# If any filenodes are left, generate the group for them,
# otherwise don't bother.
if len(msng_filenode_lst) > 0:
- yield changegroup.genchunk(fname)
+ yield changegroup.chunkheader(len(fname))
+ yield fname
# Sort the filenodes by their revision #
msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
# Create a group generator and only pass in a changenode
@@ -1796,7 +1797,8 @@
nodeiter = gennodelst(filerevlog)
nodeiter = list(nodeiter)
if nodeiter:
- yield changegroup.genchunk(fname)
+ yield changegroup.chunkheader(len(fname))
+ yield fname
lookup = lookuprevlink_func(filerevlog)
for chnk in filerevlog.group(nodeiter, lookup):
yield chnk
--- a/mercurial/revlog.py Wed Oct 03 17:17:27 2007 -0500
+++ b/mercurial/revlog.py Wed Oct 03 17:17:28 2007 -0500
@@ -1094,7 +1094,9 @@
meta += mdiff.trivialdiffheader(len(d))
else:
d = self.revdiff(a, b)
- yield changegroup.genchunk("%s%s" % (meta, d))
+ yield changegroup.chunkheader(len(meta) + len(d))
+ yield meta
+ yield d
yield changegroup.closechunk()