keepalive: don't concatenate strings when reading chunked transfer
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 07 Oct 2015 15:33:52 -0700
changeset 30686 8352c42a0a0d
parent 30685 95325386cd1a
child 30687 5d06f6b73a57
keepalive: don't concatenate strings when reading chunked transfer Surprisingly, this didn't appear to speed up HTTP-based stream cloning on my machine. I suspect this has more to do with the fact we're using small HTTP chunks and string concatenation overhead isn't so bad. However, the reasons for this change are solid: we know string concatenation can be a performance sink.
mercurial/keepalive.py
--- a/mercurial/keepalive.py	Mon Dec 26 12:11:29 2016 -0700
+++ b/mercurial/keepalive.py	Wed Oct 07 15:33:52 2015 -0700
@@ -399,10 +399,8 @@
     # stolen from Python SVN #68532 to fix issue1088
     def _read_chunked(self, amt):
         chunk_left = self.chunk_left
-        value = ''
+        parts = []
 
-        # XXX This accumulates chunks by repeated string concatenation,
-        # which is not efficient as the number or size of chunks gets big.
         while True:
             if chunk_left is None:
                 line = self.fp.readline()
@@ -415,22 +413,22 @@
                     # close the connection as protocol synchronization is
                     # probably lost
                     self.close()
-                    raise httplib.IncompleteRead(value)
+                    raise httplib.IncompleteRead(''.join(parts))
                 if chunk_left == 0:
                     break
             if amt is None:
-                value += self._safe_read(chunk_left)
+                parts.append(self._safe_read(chunk_left))
             elif amt < chunk_left:
-                value += self._safe_read(amt)
+                parts.append(self._safe_read(amt))
                 self.chunk_left = chunk_left - amt
-                return value
+                return ''.join(parts)
             elif amt == chunk_left:
-                value += self._safe_read(amt)
+                parts.append(self._safe_read(amt))
                 self._safe_read(2)  # toss the CRLF at the end of the chunk
                 self.chunk_left = None
-                return value
+                return ''.join(parts)
             else:
-                value += self._safe_read(chunk_left)
+                parts.append(self._safe_read(chunk_left))
                 amt -= chunk_left
 
             # we read the whole chunk, get another
@@ -451,7 +449,7 @@
         # we read everything; close the "file"
         self.close()
 
-        return value
+        return ''.join(parts)
 
     def readline(self, limit=-1):
         i = self._rbuf.find('\n')