http: reuse Python's implementation of read/readline/readinto
Since Python 3 already provides a working implementation of readline,
there is no need for our own buffering implementation. Reduce the
code to transfer accounting only.
--- a/mercurial/keepalive.py Sun Jun 30 02:46:53 2024 +0200
+++ b/mercurial/keepalive.py Sun Jun 30 13:22:23 2024 +0200
@@ -380,22 +380,9 @@
class HTTPResponse(httplib.HTTPResponse):
# we need to subclass HTTPResponse in order to
- # 1) add readline(), readlines(), and readinto() methods
- # 2) add close_connection() methods
- # 3) add info() and geturl() methods
-
- # in order to add readline(), read must be modified to deal with a
- # buffer. example: readline must read a buffer and then spit back
- # one line at a time. The only real alternative is to read one
- # BYTE at a time (ick). Once something has been read, it can't be
- # put back (ok, maybe it can, but that's even uglier than this),
- # so if you THEN do a normal read, you must first take stuff from
- # the buffer.
-
- # the read method wraps the original to accommodate buffering,
- # although read() never adds to the buffer.
- # Both readline and readlines have been stolen with almost no
- # modification from socket.py
+ # 1) add close_connection() methods
+ # 2) add info() and geturl() methods
+ # 3) add accounting for read(), readlines() and readinto()
def __init__(self, sock, debuglevel=0, strict=0, method=None):
httplib.HTTPResponse.__init__(
@@ -411,9 +398,6 @@
self._url = None # (same)
self._connection = None # (same)
- _raw_read = httplib.HTTPResponse.read
- _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
-
# Python 2.7 has a single close() which closes the socket handle.
# This method was effectively renamed to _close_conn() in Python 3. But
# there is also a close(). _close_conn() is called by methods like
@@ -442,23 +426,7 @@
return self._url
def read(self, amt=None):
- # the _rbuf test is only in this first if for speed. It's not
- # logically necessary
- if self._rbuf and amt is not None:
- L = len(self._rbuf)
- if amt > L:
- amt -= L
- else:
- s = self._rbuf[:amt]
- self._rbuf = self._rbuf[amt:]
- return s
- # Careful! http.client.HTTPResponse.read() on Python 3 is
- # implemented using readinto(), which can duplicate self._rbuf
- # if it's not empty.
- s = self._rbuf
- self._rbuf = b''
- data = self._raw_read(amt)
-
+ data = super().read(amt)
self.receivedbytescount += len(data)
try:
self._connection.receivedbytescount += len(data)
@@ -468,137 +436,32 @@
self._handler.parent.receivedbytescount += len(data)
except AttributeError:
pass
-
- s += data
- return s
-
- # stolen from Python SVN #68532 to fix issue1088
- def _read_chunked(self, amt):
- chunk_left = self.chunk_left
- parts = []
-
- while True:
- if chunk_left is None:
- line = self.fp.readline()
- i = line.find(b';')
- if i >= 0:
- line = line[:i] # strip chunk-extensions
- try:
- chunk_left = int(line, 16)
- except ValueError:
- # close the connection as protocol synchronization is
- # probably lost
- self.close()
- raise httplib.IncompleteRead(b''.join(parts))
- if chunk_left == 0:
- break
- if amt is None:
- parts.append(self._safe_read(chunk_left))
- elif amt < chunk_left:
- parts.append(self._safe_read(amt))
- self.chunk_left = chunk_left - amt
- return b''.join(parts)
- elif amt == chunk_left:
- parts.append(self._safe_read(amt))
- self._safe_read(2) # toss the CRLF at the end of the chunk
- self.chunk_left = None
- return b''.join(parts)
- else:
- parts.append(self._safe_read(chunk_left))
- amt -= chunk_left
-
- # we read the whole chunk, get another
- self._safe_read(2) # toss the CRLF at the end of the chunk
- chunk_left = None
-
- # read and discard trailer up to the CRLF terminator
- ### note: we shouldn't have any trailers!
- while True:
- line = self.fp.readline()
- if not line:
- # a vanishingly small number of sites EOF without
- # sending the trailer
- break
- if line == b'\r\n':
- break
-
- # we read everything; close the "file"
- self.close()
-
- return b''.join(parts)
+ return data
def readline(self):
- # Fast path for a line is already available in read buffer.
- i = self._rbuf.find(b'\n')
- if i >= 0:
- i += 1
- line = self._rbuf[:i]
- self._rbuf = self._rbuf[i:]
- return line
-
- # No newline in local buffer. Read until we find one.
- # readinto read via readinto will already return _rbuf
- if self._raw_readinto is None:
- chunks = [self._rbuf]
- else:
- chunks = []
- i = -1
- readsize = self._rbufsize
- while True:
- new = self._raw_read(readsize)
- if not new:
- break
-
- self.receivedbytescount += len(new)
- self._connection.receivedbytescount += len(new)
- try:
- self._handler.parent.receivedbytescount += len(new)
- except AttributeError:
- pass
-
- chunks.append(new)
- i = new.find(b'\n')
- if i >= 0:
- break
-
- # We either have exhausted the stream or have a newline in chunks[-1].
-
- # EOF
- if i == -1:
- self._rbuf = b''
- return b''.join(chunks)
-
- i += 1
- self._rbuf = chunks[-1][i:]
- chunks[-1] = chunks[-1][:i]
- return b''.join(chunks)
+ data = super().readline()
+ self.receivedbytescount += len(data)
+ try:
+ self._connection.receivedbytescount += len(data)
+ except AttributeError:
+ pass
+ try:
+ self._handler.parent.receivedbytescount += len(data)
+ except AttributeError:
+ pass
+ return data
def readinto(self, dest):
- if self._raw_readinto is None:
- res = self.read(len(dest))
- if not res:
- return 0
- dest[0 : len(res)] = res
- return len(res)
- total = len(dest)
- have = len(self._rbuf)
- if have >= total:
- dest[0:total] = self._rbuf[:total]
- self._rbuf = self._rbuf[total:]
- return total
- mv = memoryview(dest)
- got = self._raw_readinto(mv[have:total])
-
+ got = super().readinto(dest)
self.receivedbytescount += got
- self._connection.receivedbytescount += got
try:
- self._handler.receivedbytescount += got
+ self._connection.receivedbytescount += got
except AttributeError:
pass
-
- dest[0:have] = self._rbuf
- got += len(self._rbuf)
- self._rbuf = b''
+ try:
+ self._handler.parent.receivedbytescount += got
+ except AttributeError:
+ pass
return got