http: reuse Python's implementation of read/readline/readinto
authorJoerg Sonnenberger <joerg@bec.de>
Sun, 30 Jun 2024 13:22:23 +0200
changeset 51871 c1ed5ee2ad82
parent 51870 b08de326bee4
child 51872 208698117124
http: reuse Python's implementation of read/readline/readinto Since Python 3 already provides a working implementation of readline, there is no need for our own buffering implementation. Reduce the code to transfer accounting only.
mercurial/keepalive.py
--- a/mercurial/keepalive.py	Sun Jun 30 02:46:53 2024 +0200
+++ b/mercurial/keepalive.py	Sun Jun 30 13:22:23 2024 +0200
@@ -380,22 +380,9 @@
 
 class HTTPResponse(httplib.HTTPResponse):
     # we need to subclass HTTPResponse in order to
-    # 1) add readline(), readlines(), and readinto() methods
-    # 2) add close_connection() methods
-    # 3) add info() and geturl() methods
-
-    # in order to add readline(), read must be modified to deal with a
-    # buffer.  example: readline must read a buffer and then spit back
-    # one line at a time.  The only real alternative is to read one
-    # BYTE at a time (ick).  Once something has been read, it can't be
-    # put back (ok, maybe it can, but that's even uglier than this),
-    # so if you THEN do a normal read, you must first take stuff from
-    # the buffer.
-
-    # the read method wraps the original to accommodate buffering,
-    # although read() never adds to the buffer.
-    # Both readline and readlines have been stolen with almost no
-    # modification from socket.py
+    # 1) add close_connection() methods
+    # 2) add info() and geturl() methods
+    # 3) add accounting for read(), readlines() and readinto()
 
     def __init__(self, sock, debuglevel=0, strict=0, method=None):
         httplib.HTTPResponse.__init__(
@@ -411,9 +398,6 @@
         self._url = None  # (same)
         self._connection = None  # (same)
 
-    _raw_read = httplib.HTTPResponse.read
-    _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
-
     # Python 2.7 has a single close() which closes the socket handle.
     # This method was effectively renamed to _close_conn() in Python 3. But
     # there is also a close(). _close_conn() is called by methods like
@@ -442,23 +426,7 @@
         return self._url
 
     def read(self, amt=None):
-        # the _rbuf test is only in this first if for speed.  It's not
-        # logically necessary
-        if self._rbuf and amt is not None:
-            L = len(self._rbuf)
-            if amt > L:
-                amt -= L
-            else:
-                s = self._rbuf[:amt]
-                self._rbuf = self._rbuf[amt:]
-                return s
-        # Careful! http.client.HTTPResponse.read() on Python 3 is
-        # implemented using readinto(), which can duplicate self._rbuf
-        # if it's not empty.
-        s = self._rbuf
-        self._rbuf = b''
-        data = self._raw_read(amt)
-
+        data = super().read(amt)
         self.receivedbytescount += len(data)
         try:
             self._connection.receivedbytescount += len(data)
@@ -468,137 +436,32 @@
             self._handler.parent.receivedbytescount += len(data)
         except AttributeError:
             pass
-
-        s += data
-        return s
-
-    # stolen from Python SVN #68532 to fix issue1088
-    def _read_chunked(self, amt):
-        chunk_left = self.chunk_left
-        parts = []
-
-        while True:
-            if chunk_left is None:
-                line = self.fp.readline()
-                i = line.find(b';')
-                if i >= 0:
-                    line = line[:i]  # strip chunk-extensions
-                try:
-                    chunk_left = int(line, 16)
-                except ValueError:
-                    # close the connection as protocol synchronization is
-                    # probably lost
-                    self.close()
-                    raise httplib.IncompleteRead(b''.join(parts))
-                if chunk_left == 0:
-                    break
-            if amt is None:
-                parts.append(self._safe_read(chunk_left))
-            elif amt < chunk_left:
-                parts.append(self._safe_read(amt))
-                self.chunk_left = chunk_left - amt
-                return b''.join(parts)
-            elif amt == chunk_left:
-                parts.append(self._safe_read(amt))
-                self._safe_read(2)  # toss the CRLF at the end of the chunk
-                self.chunk_left = None
-                return b''.join(parts)
-            else:
-                parts.append(self._safe_read(chunk_left))
-                amt -= chunk_left
-
-            # we read the whole chunk, get another
-            self._safe_read(2)  # toss the CRLF at the end of the chunk
-            chunk_left = None
-
-        # read and discard trailer up to the CRLF terminator
-        ### note: we shouldn't have any trailers!
-        while True:
-            line = self.fp.readline()
-            if not line:
-                # a vanishingly small number of sites EOF without
-                # sending the trailer
-                break
-            if line == b'\r\n':
-                break
-
-        # we read everything; close the "file"
-        self.close()
-
-        return b''.join(parts)
+        return data
 
     def readline(self):
-        # Fast path for a line is already available in read buffer.
-        i = self._rbuf.find(b'\n')
-        if i >= 0:
-            i += 1
-            line = self._rbuf[:i]
-            self._rbuf = self._rbuf[i:]
-            return line
-
-        # No newline in local buffer. Read until we find one.
-        # readinto read via readinto will already return _rbuf
-        if self._raw_readinto is None:
-            chunks = [self._rbuf]
-        else:
-            chunks = []
-        i = -1
-        readsize = self._rbufsize
-        while True:
-            new = self._raw_read(readsize)
-            if not new:
-                break
-
-            self.receivedbytescount += len(new)
-            self._connection.receivedbytescount += len(new)
-            try:
-                self._handler.parent.receivedbytescount += len(new)
-            except AttributeError:
-                pass
-
-            chunks.append(new)
-            i = new.find(b'\n')
-            if i >= 0:
-                break
-
-        # We either have exhausted the stream or have a newline in chunks[-1].
-
-        # EOF
-        if i == -1:
-            self._rbuf = b''
-            return b''.join(chunks)
-
-        i += 1
-        self._rbuf = chunks[-1][i:]
-        chunks[-1] = chunks[-1][:i]
-        return b''.join(chunks)
+        data = super().readline()
+        self.receivedbytescount += len(data)
+        try:
+            self._connection.receivedbytescount += len(data)
+        except AttributeError:
+            pass
+        try:
+            self._handler.parent.receivedbytescount += len(data)
+        except AttributeError:
+            pass
+        return data
 
     def readinto(self, dest):
-        if self._raw_readinto is None:
-            res = self.read(len(dest))
-            if not res:
-                return 0
-            dest[0 : len(res)] = res
-            return len(res)
-        total = len(dest)
-        have = len(self._rbuf)
-        if have >= total:
-            dest[0:total] = self._rbuf[:total]
-            self._rbuf = self._rbuf[total:]
-            return total
-        mv = memoryview(dest)
-        got = self._raw_readinto(mv[have:total])
-
+        got = super().readinto(dest)
         self.receivedbytescount += got
-        self._connection.receivedbytescount += got
         try:
-            self._handler.receivedbytescount += got
+            self._connection.receivedbytescount += got
         except AttributeError:
             pass
-
-        dest[0:have] = self._rbuf
-        got += len(self._rbuf)
-        self._rbuf = b''
+        try:
+            self._handler.parent.receivedbytescount += got
+        except AttributeError:
+            pass
         return got