1 # Copyright 2011, Google Inc. |
|
2 # All rights reserved. |
|
3 # |
|
4 # Redistribution and use in source and binary forms, with or without |
|
5 # modification, are permitted provided that the following conditions are |
|
6 # met: |
|
7 # |
|
8 # * Redistributions of source code must retain the above copyright |
|
9 # notice, this list of conditions and the following disclaimer. |
|
10 # * Redistributions in binary form must reproduce the above |
|
11 # copyright notice, this list of conditions and the following disclaimer |
|
12 # in the documentation and/or other materials provided with the |
|
13 # distribution. |
|
14 # * Neither the name of Google Inc. nor the names of its |
|
15 # contributors may be used to endorse or promote products derived from |
|
16 # this software without specific prior written permission. |
|
17 |
|
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 """Reader objects to abstract out different body response types. |
|
30 |
|
31 This module is package-private. It is not expected that these will |
|
32 have any clients outside of httpplus. |
|
33 """ |
|
34 from __future__ import absolute_import |
|
35 |
|
36 try: |
|
37 import httplib |
|
38 httplib.HTTPException |
|
39 except ImportError: |
|
40 import http.client as httplib |
|
41 |
|
42 import logging |
|
43 |
|
44 logger = logging.getLogger(__name__) |
|
45 |
|
46 |
|
47 class ReadNotReady(Exception): |
|
48 """Raised when read() is attempted but not enough data is loaded.""" |
|
49 |
|
50 |
|
51 class HTTPRemoteClosedError(httplib.HTTPException): |
|
52 """The server closed the remote socket in the middle of a response.""" |
|
53 |
|
54 |
|
55 class AbstractReader(object): |
|
56 """Abstract base class for response readers. |
|
57 |
|
58 Subclasses must implement _load, and should implement _close if |
|
59 it's not an error for the server to close their socket without |
|
60 some termination condition being detected during _load. |
|
61 """ |
|
62 def __init__(self): |
|
63 self._finished = False |
|
64 self._done_chunks = [] |
|
65 self.available_data = 0 |
|
66 |
|
67 def _addchunk(self, data): |
|
68 self._done_chunks.append(data) |
|
69 self.available_data += len(data) |
|
70 |
|
71 def _pushchunk(self, data): |
|
72 self._done_chunks.insert(0, data) |
|
73 self.available_data += len(data) |
|
74 |
|
75 def _popchunk(self): |
|
76 b = self._done_chunks.pop(0) |
|
77 self.available_data -= len(b) |
|
78 |
|
79 return b |
|
80 |
|
81 def done(self): |
|
82 """Returns true if the response body is entirely read.""" |
|
83 return self._finished |
|
84 |
|
85 def read(self, amt): |
|
86 """Read amt bytes from the response body.""" |
|
87 if self.available_data < amt and not self._finished: |
|
88 raise ReadNotReady() |
|
89 blocks = [] |
|
90 need = amt |
|
91 while self._done_chunks: |
|
92 b = self._popchunk() |
|
93 if len(b) > need: |
|
94 nb = b[:need] |
|
95 self._pushchunk(b[need:]) |
|
96 b = nb |
|
97 blocks.append(b) |
|
98 need -= len(b) |
|
99 if need == 0: |
|
100 break |
|
101 result = b''.join(blocks) |
|
102 assert len(result) == amt or (self._finished and len(result) < amt) |
|
103 |
|
104 return result |
|
105 |
|
106 def readto(self, delimstr, blocks = None): |
|
107 """return available data chunks up to the first one in which |
|
108 delimstr occurs. No data will be returned after delimstr -- |
|
109 the chunk in which it occurs will be split and the remainder |
|
110 pushed back onto the available data queue. If blocks is |
|
111 supplied chunks will be added to blocks, otherwise a new list |
|
112 will be allocated. |
|
113 """ |
|
114 if blocks is None: |
|
115 blocks = [] |
|
116 |
|
117 while self._done_chunks: |
|
118 b = self._popchunk() |
|
119 i = b.find(delimstr) + len(delimstr) |
|
120 if i: |
|
121 if i < len(b): |
|
122 self._pushchunk(b[i:]) |
|
123 blocks.append(b[:i]) |
|
124 break |
|
125 else: |
|
126 blocks.append(b) |
|
127 |
|
128 return blocks |
|
129 |
|
130 def _load(self, data): # pragma: no cover |
|
131 """Subclasses must implement this. |
|
132 |
|
133 As data is available to be read out of this object, it should |
|
134 be placed into the _done_chunks list. Subclasses should not |
|
135 rely on data remaining in _done_chunks forever, as it may be |
|
136 reaped if the client is parsing data as it comes in. |
|
137 """ |
|
138 raise NotImplementedError |
|
139 |
|
140 def _close(self): |
|
141 """Default implementation of close. |
|
142 |
|
143 The default implementation assumes that the reader will mark |
|
144 the response as finished on the _finished attribute once the |
|
145 entire response body has been read. In the event that this is |
|
146 not true, the subclass should override the implementation of |
|
147 close (for example, close-is-end responses have to set |
|
148 self._finished in the close handler.) |
|
149 """ |
|
150 if not self._finished: |
|
151 raise HTTPRemoteClosedError( |
|
152 'server appears to have closed the socket mid-response') |
|
153 |
|
154 |
|
155 class AbstractSimpleReader(AbstractReader): |
|
156 """Abstract base class for simple readers that require no response decoding. |
|
157 |
|
158 Examples of such responses are Connection: Close (close-is-end) |
|
159 and responses that specify a content length. |
|
160 """ |
|
161 def _load(self, data): |
|
162 if data: |
|
163 assert not self._finished, ( |
|
164 'tried to add data (%r) to a closed reader!' % data) |
|
165 logger.debug('%s read an additional %d data', |
|
166 self.name, len(data)) # pylint: disable=E1101 |
|
167 self._addchunk(data) |
|
168 |
|
169 |
|
170 class CloseIsEndReader(AbstractSimpleReader): |
|
171 """Reader for responses that specify Connection: Close for length.""" |
|
172 name = 'close-is-end' |
|
173 |
|
174 def _close(self): |
|
175 logger.info('Marking close-is-end reader as closed.') |
|
176 self._finished = True |
|
177 |
|
178 |
|
179 class ContentLengthReader(AbstractSimpleReader): |
|
180 """Reader for responses that specify an exact content length.""" |
|
181 name = 'content-length' |
|
182 |
|
183 def __init__(self, amount): |
|
184 AbstractSimpleReader.__init__(self) |
|
185 self._amount = amount |
|
186 if amount == 0: |
|
187 self._finished = True |
|
188 self._amount_seen = 0 |
|
189 |
|
190 def _load(self, data): |
|
191 AbstractSimpleReader._load(self, data) |
|
192 self._amount_seen += len(data) |
|
193 if self._amount_seen >= self._amount: |
|
194 self._finished = True |
|
195 logger.debug('content-length read complete') |
|
196 |
|
197 |
|
198 class ChunkedReader(AbstractReader): |
|
199 """Reader for chunked transfer encoding responses.""" |
|
200 def __init__(self, eol): |
|
201 AbstractReader.__init__(self) |
|
202 self._eol = eol |
|
203 self._leftover_skip_amt = 0 |
|
204 self._leftover_data = '' |
|
205 |
|
206 def _load(self, data): |
|
207 assert not self._finished, 'tried to add data to a closed reader!' |
|
208 logger.debug('chunked read an additional %d data', len(data)) |
|
209 position = 0 |
|
210 if self._leftover_data: |
|
211 logger.debug( |
|
212 'chunked reader trying to finish block from leftover data') |
|
213 # TODO: avoid this string concatenation if possible |
|
214 data = self._leftover_data + data |
|
215 position = self._leftover_skip_amt |
|
216 self._leftover_data = '' |
|
217 self._leftover_skip_amt = 0 |
|
218 datalen = len(data) |
|
219 while position < datalen: |
|
220 split = data.find(self._eol, position) |
|
221 if split == -1: |
|
222 self._leftover_data = data |
|
223 self._leftover_skip_amt = position |
|
224 return |
|
225 amt = int(data[position:split], base=16) |
|
226 block_start = split + len(self._eol) |
|
227 # If the whole data chunk plus the eol trailer hasn't |
|
228 # loaded, we'll wait for the next load. |
|
229 if block_start + amt + len(self._eol) > len(data): |
|
230 self._leftover_data = data |
|
231 self._leftover_skip_amt = position |
|
232 return |
|
233 if amt == 0: |
|
234 self._finished = True |
|
235 logger.debug('closing chunked reader due to chunk of length 0') |
|
236 return |
|
237 self._addchunk(data[block_start:block_start + amt]) |
|
238 position = block_start + amt + len(self._eol) |
|
239 # no-check-code |
|