mercurial/httpclient/_readers.py
changeset 36456 23d12524a202
parent 36455 24c2c760c1cb
child 36457 247b473f408e
equal deleted inserted replaced
36455:24c2c760c1cb 36456:23d12524a202
     1 # Copyright 2011, Google Inc.
       
     2 # All rights reserved.
       
     3 #
       
     4 # Redistribution and use in source and binary forms, with or without
       
     5 # modification, are permitted provided that the following conditions are
       
     6 # met:
       
     7 #
       
     8 #     * Redistributions of source code must retain the above copyright
       
     9 # notice, this list of conditions and the following disclaimer.
       
    10 #     * Redistributions in binary form must reproduce the above
       
    11 # copyright notice, this list of conditions and the following disclaimer
       
    12 # in the documentation and/or other materials provided with the
       
    13 # distribution.
       
    14 #     * Neither the name of Google Inc. nor the names of its
       
    15 # contributors may be used to endorse or promote products derived from
       
    16 # this software without specific prior written permission.
       
    17 
       
    18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    29 """Reader objects to abstract out different body response types.
       
    30 
       
    31 This module is package-private. It is not expected that these will
       
    32 have any clients outside of httpplus.
       
    33 """
       
    34 from __future__ import absolute_import
       
    35 
       
    36 try:
       
    37     import httplib
       
    38     httplib.HTTPException
       
    39 except ImportError:
       
    40     import http.client as httplib
       
    41 
       
    42 import logging
       
    43 
       
    44 logger = logging.getLogger(__name__)
       
    45 
       
    46 
       
    47 class ReadNotReady(Exception):
       
    48     """Raised when read() is attempted but not enough data is loaded."""
       
    49 
       
    50 
       
    51 class HTTPRemoteClosedError(httplib.HTTPException):
       
    52     """The server closed the remote socket in the middle of a response."""
       
    53 
       
    54 
       
    55 class AbstractReader(object):
       
    56     """Abstract base class for response readers.
       
    57 
       
    58     Subclasses must implement _load, and should implement _close if
       
    59     it's not an error for the server to close their socket without
       
    60     some termination condition being detected during _load.
       
    61     """
       
    62     def __init__(self):
       
    63         self._finished = False
       
    64         self._done_chunks = []
       
    65         self.available_data = 0
       
    66 
       
    67     def _addchunk(self, data):
       
    68         self._done_chunks.append(data)
       
    69         self.available_data += len(data)
       
    70 
       
    71     def _pushchunk(self, data):
       
    72         self._done_chunks.insert(0, data)
       
    73         self.available_data += len(data)
       
    74 
       
    75     def _popchunk(self):
       
    76         b = self._done_chunks.pop(0)
       
    77         self.available_data -= len(b)
       
    78 
       
    79         return b
       
    80 
       
    81     def done(self):
       
    82         """Returns true if the response body is entirely read."""
       
    83         return self._finished
       
    84 
       
    85     def read(self, amt):
       
    86         """Read amt bytes from the response body."""
       
    87         if self.available_data < amt and not self._finished:
       
    88             raise ReadNotReady()
       
    89         blocks = []
       
    90         need = amt
       
    91         while self._done_chunks:
       
    92             b = self._popchunk()
       
    93             if len(b) > need:
       
    94                 nb = b[:need]
       
    95                 self._pushchunk(b[need:])
       
    96                 b = nb
       
    97             blocks.append(b)
       
    98             need -= len(b)
       
    99             if need == 0:
       
   100                 break
       
   101         result = b''.join(blocks)
       
   102         assert len(result) == amt or (self._finished and len(result) < amt)
       
   103 
       
   104         return result
       
   105 
       
   106     def readto(self, delimstr, blocks = None):
       
   107         """return available data chunks up to the first one in which
       
   108         delimstr occurs. No data will be returned after delimstr --
       
   109         the chunk in which it occurs will be split and the remainder
       
   110         pushed back onto the available data queue. If blocks is
       
   111         supplied chunks will be added to blocks, otherwise a new list
       
   112         will be allocated.
       
   113         """
       
   114         if blocks is None:
       
   115             blocks = []
       
   116 
       
   117         while self._done_chunks:
       
   118             b = self._popchunk()
       
   119             i = b.find(delimstr) + len(delimstr)
       
   120             if i:
       
   121                 if i < len(b):
       
   122                     self._pushchunk(b[i:])
       
   123                 blocks.append(b[:i])
       
   124                 break
       
   125             else:
       
   126                 blocks.append(b)
       
   127 
       
   128         return blocks
       
   129 
       
   130     def _load(self, data): # pragma: no cover
       
   131         """Subclasses must implement this.
       
   132 
       
   133         As data is available to be read out of this object, it should
       
   134         be placed into the _done_chunks list. Subclasses should not
       
   135         rely on data remaining in _done_chunks forever, as it may be
       
   136         reaped if the client is parsing data as it comes in.
       
   137         """
       
   138         raise NotImplementedError
       
   139 
       
   140     def _close(self):
       
   141         """Default implementation of close.
       
   142 
       
   143         The default implementation assumes that the reader will mark
       
   144         the response as finished on the _finished attribute once the
       
   145         entire response body has been read. In the event that this is
       
   146         not true, the subclass should override the implementation of
       
   147         close (for example, close-is-end responses have to set
       
   148         self._finished in the close handler.)
       
   149         """
       
   150         if not self._finished:
       
   151             raise HTTPRemoteClosedError(
       
   152                 'server appears to have closed the socket mid-response')
       
   153 
       
   154 
       
   155 class AbstractSimpleReader(AbstractReader):
       
   156     """Abstract base class for simple readers that require no response decoding.
       
   157 
       
   158     Examples of such responses are Connection: Close (close-is-end)
       
   159     and responses that specify a content length.
       
   160     """
       
   161     def _load(self, data):
       
   162         if data:
       
   163             assert not self._finished, (
       
   164                 'tried to add data (%r) to a closed reader!' % data)
       
   165         logger.debug('%s read an additional %d data',
       
   166                      self.name, len(data)) # pylint: disable=E1101
       
   167         self._addchunk(data)
       
   168 
       
   169 
       
   170 class CloseIsEndReader(AbstractSimpleReader):
       
   171     """Reader for responses that specify Connection: Close for length."""
       
   172     name = 'close-is-end'
       
   173 
       
   174     def _close(self):
       
   175         logger.info('Marking close-is-end reader as closed.')
       
   176         self._finished = True
       
   177 
       
   178 
       
   179 class ContentLengthReader(AbstractSimpleReader):
       
   180     """Reader for responses that specify an exact content length."""
       
   181     name = 'content-length'
       
   182 
       
   183     def __init__(self, amount):
       
   184         AbstractSimpleReader.__init__(self)
       
   185         self._amount = amount
       
   186         if amount == 0:
       
   187             self._finished = True
       
   188         self._amount_seen = 0
       
   189 
       
   190     def _load(self, data):
       
   191         AbstractSimpleReader._load(self, data)
       
   192         self._amount_seen += len(data)
       
   193         if self._amount_seen >= self._amount:
       
   194             self._finished = True
       
   195             logger.debug('content-length read complete')
       
   196 
       
   197 
       
   198 class ChunkedReader(AbstractReader):
       
   199     """Reader for chunked transfer encoding responses."""
       
   200     def __init__(self, eol):
       
   201         AbstractReader.__init__(self)
       
   202         self._eol = eol
       
   203         self._leftover_skip_amt = 0
       
   204         self._leftover_data = ''
       
   205 
       
   206     def _load(self, data):
       
   207         assert not self._finished, 'tried to add data to a closed reader!'
       
   208         logger.debug('chunked read an additional %d data', len(data))
       
   209         position = 0
       
   210         if self._leftover_data:
       
   211             logger.debug(
       
   212                 'chunked reader trying to finish block from leftover data')
       
   213             # TODO: avoid this string concatenation if possible
       
   214             data = self._leftover_data + data
       
   215             position = self._leftover_skip_amt
       
   216             self._leftover_data = ''
       
   217             self._leftover_skip_amt = 0
       
   218         datalen = len(data)
       
   219         while position < datalen:
       
   220             split = data.find(self._eol, position)
       
   221             if split == -1:
       
   222                 self._leftover_data = data
       
   223                 self._leftover_skip_amt = position
       
   224                 return
       
   225             amt = int(data[position:split], base=16)
       
   226             block_start = split + len(self._eol)
       
   227             # If the whole data chunk plus the eol trailer hasn't
       
   228             # loaded, we'll wait for the next load.
       
   229             if block_start + amt + len(self._eol) > len(data):
       
   230                 self._leftover_data = data
       
   231                 self._leftover_skip_amt = position
       
   232                 return
       
   233             if amt == 0:
       
   234                 self._finished = True
       
   235                 logger.debug('closing chunked reader due to chunk of length 0')
       
   236                 return
       
   237             self._addchunk(data[block_start:block_start + amt])
       
   238             position = block_start + amt + len(self._eol)
       
   239 # no-check-code