keepalive: don't concatenate strings when reading chunked transfer
Surprisingly, this didn't appear to speed up HTTP-based stream cloning
on my machine. I suspect this has more to do with the fact we're using
small HTTP chunks and string concatenation overhead isn't so bad.
However, the reasons for this change are solid: we know string
concatenation can be a performance sink.
--- a/mercurial/keepalive.py Mon Dec 26 12:11:29 2016 -0700
+++ b/mercurial/keepalive.py Wed Oct 07 15:33:52 2015 -0700
@@ -399,10 +399,8 @@
# stolen from Python SVN #68532 to fix issue1088
def _read_chunked(self, amt):
chunk_left = self.chunk_left
- value = ''
+ parts = []
- # XXX This accumulates chunks by repeated string concatenation,
- # which is not efficient as the number or size of chunks gets big.
while True:
if chunk_left is None:
line = self.fp.readline()
@@ -415,22 +413,22 @@
# close the connection as protocol synchronization is
# probably lost
self.close()
- raise httplib.IncompleteRead(value)
+ raise httplib.IncompleteRead(''.join(parts))
if chunk_left == 0:
break
if amt is None:
- value += self._safe_read(chunk_left)
+ parts.append(self._safe_read(chunk_left))
elif amt < chunk_left:
- value += self._safe_read(amt)
+ parts.append(self._safe_read(amt))
self.chunk_left = chunk_left - amt
- return value
+ return ''.join(parts)
elif amt == chunk_left:
- value += self._safe_read(amt)
+ parts.append(self._safe_read(amt))
self._safe_read(2) # toss the CRLF at the end of the chunk
self.chunk_left = None
- return value
+ return ''.join(parts)
else:
- value += self._safe_read(chunk_left)
+ parts.append(self._safe_read(chunk_left))
amt -= chunk_left
# we read the whole chunk, get another
@@ -451,7 +449,7 @@
# we read everything; close the "file"
self.close()
- return value
+ return ''.join(parts)
def readline(self, limit=-1):
i = self._rbuf.find('\n')