comparison mercurial/httpclient/__init__.py @ 16643:24dbef11f477

httpclient: update to revision 892730fe7f46 of httpplus
author Augie Fackler <raf@durin42.com>
date Fri, 04 May 2012 16:00:33 -0500
parents c81dce8a7bb6
children 69af967b6d6f
comparison
equal deleted inserted replaced
16642:5cf18921bb7b 16643:24dbef11f477
43 import logging 43 import logging
44 import rfc822 44 import rfc822
45 import select 45 import select
46 import socket 46 import socket
47 47
48 import _readers
48 import socketutil 49 import socketutil
49 50
50 logger = logging.getLogger(__name__) 51 logger = logging.getLogger(__name__)
51 52
52 __all__ = ['HTTPConnection', 'HTTPResponse'] 53 __all__ = ['HTTPConnection', 'HTTPResponse']
53 54
54 HTTP_VER_1_0 = 'HTTP/1.0' 55 HTTP_VER_1_0 = 'HTTP/1.0'
55 HTTP_VER_1_1 = 'HTTP/1.1' 56 HTTP_VER_1_1 = 'HTTP/1.1'
56
57 _LEN_CLOSE_IS_END = -1
58 57
59 OUTGOING_BUFFER_SIZE = 1 << 15 58 OUTGOING_BUFFER_SIZE = 1 << 15
60 INCOMING_BUFFER_SIZE = 1 << 20 59 INCOMING_BUFFER_SIZE = 1 << 20
61 60
62 HDR_ACCEPT_ENCODING = 'accept-encoding' 61 HDR_ACCEPT_ENCODING = 'accept-encoding'
81 """Response from an HTTP server. 80 """Response from an HTTP server.
82 81
83 The response will continue to load as available. If you need the 82 The response will continue to load as available. If you need the
84 complete response before continuing, check the .complete() method. 83 complete response before continuing, check the .complete() method.
85 """ 84 """
86 def __init__(self, sock, timeout): 85 def __init__(self, sock, timeout, method):
87 self.sock = sock 86 self.sock = sock
87 self.method = method
88 self.raw_response = '' 88 self.raw_response = ''
89 self._body = None
90 self._headers_len = 0 89 self._headers_len = 0
91 self._content_len = 0
92 self.headers = None 90 self.headers = None
93 self.will_close = False 91 self.will_close = False
94 self.status_line = '' 92 self.status_line = ''
95 self.status = None 93 self.status = None
94 self.continued = False
96 self.http_version = None 95 self.http_version = None
97 self.reason = None 96 self.reason = None
98 self._chunked = False 97 self._reader = None
99 self._chunked_done = False
100 self._chunked_until_next = 0
101 self._chunked_skip_bytes = 0
102 self._chunked_preloaded_block = None
103 98
104 self._read_location = 0 99 self._read_location = 0
105 self._eol = EOL 100 self._eol = EOL
106 101
107 self._timeout = timeout 102 self._timeout = timeout
115 110
116 Note that if this is a connection where complete means the 111 Note that if this is a connection where complete means the
117 socket is closed, this will nearly always return False, even 112 socket is closed, this will nearly always return False, even
118 in cases where all the data has actually been loaded. 113 in cases where all the data has actually been loaded.
119 """ 114 """
120 if self._chunked: 115 if self._reader:
121 return self._chunked_done 116 return self._reader.done()
122 if self._content_len == _LEN_CLOSE_IS_END: 117
123 return False 118 def _close(self):
124 return self._body is not None and len(self._body) >= self._content_len 119 if self._reader is not None:
120 self._reader._close()
125 121
126 def readline(self): 122 def readline(self):
127 """Read a single line from the response body. 123 """Read a single line from the response body.
128 124
129 This may block until either a line ending is found or the 125 This may block until either a line ending is found or the
130 response is complete. 126 response is complete.
131 """ 127 """
132 eol = self._body.find('\n', self._read_location) 128 # TODO: move this into the reader interface where it can be
133 while eol == -1 and not self.complete(): 129 # smarter (and probably avoid copies)
130 bytes = []
131 while not bytes:
132 try:
133 bytes = [self._reader.read(1)]
134 except _readers.ReadNotReady:
135 self._select()
136 while bytes[-1] != '\n' and not self.complete():
134 self._select() 137 self._select()
135 eol = self._body.find('\n', self._read_location) 138 bytes.append(self._reader.read(1))
136 if eol != -1: 139 if bytes[-1] != '\n':
137 eol += 1 140 next = self._reader.read(1)
138 else: 141 while next and next != '\n':
139 eol = len(self._body) 142 bytes.append(next)
140 data = self._body[self._read_location:eol] 143 next = self._reader.read(1)
141 self._read_location = eol 144 bytes.append(next)
142 return data 145 return ''.join(bytes)
143 146
144 def read(self, length=None): 147 def read(self, length=None):
145 # if length is None, unbounded read 148 # if length is None, unbounded read
146 while (not self.complete() # never select on a finished read 149 while (not self.complete() # never select on a finished read
147 and (not length # unbounded, so we wait for complete() 150 and (not length # unbounded, so we wait for complete()
148 or (self._read_location + length) > len(self._body))): 151 or length > self._reader.available_data)):
149 self._select() 152 self._select()
150 if not length: 153 if not length:
151 length = len(self._body) - self._read_location 154 length = self._reader.available_data
152 elif len(self._body) < (self._read_location + length): 155 r = self._reader.read(length)
153 length = len(self._body) - self._read_location
154 r = self._body[self._read_location:self._read_location + length]
155 self._read_location += len(r)
156 if self.complete() and self.will_close: 156 if self.complete() and self.will_close:
157 self.sock.close() 157 self.sock.close()
158 return r 158 return r
159 159
160 def _select(self): 160 def _select(self):
161 r, _, _ = select.select([self.sock], [], [], self._timeout) 161 r, _, _ = select.select([self.sock], [], [], self._timeout)
162 if not r: 162 if not r:
163 # socket was not readable. If the response is not complete 163 # socket was not readable. If the response is not
164 # and we're not a _LEN_CLOSE_IS_END response, raise a timeout. 164 # complete, raise a timeout.
165 # If we are a _LEN_CLOSE_IS_END response and we have no data, 165 if not self.complete():
166 # raise a timeout.
167 if not (self.complete() or
168 (self._content_len == _LEN_CLOSE_IS_END and self._body)):
169 logger.info('timed out with timeout of %s', self._timeout) 166 logger.info('timed out with timeout of %s', self._timeout)
170 raise HTTPTimeoutException('timeout reading data') 167 raise HTTPTimeoutException('timeout reading data')
171 logger.info('cl: %r body: %r', self._content_len, self._body)
172 try: 168 try:
173 data = self.sock.recv(INCOMING_BUFFER_SIZE) 169 data = self.sock.recv(INCOMING_BUFFER_SIZE)
174 # If the socket was readable and no data was read, that
175 # means the socket was closed. If this isn't a
176 # _CLOSE_IS_END socket, then something is wrong if we're
177 # here (we shouldn't enter _select() if the response is
178 # complete), so abort.
179 if not data and self._content_len != _LEN_CLOSE_IS_END:
180 raise HTTPRemoteClosedError(
181 'server appears to have closed the socket mid-response')
182 except socket.sslerror, e: 170 except socket.sslerror, e:
183 if e.args[0] != socket.SSL_ERROR_WANT_READ: 171 if e.args[0] != socket.SSL_ERROR_WANT_READ:
184 raise 172 raise
185 logger.debug('SSL_WANT_READ in _select, should retry later') 173 logger.debug('SSL_WANT_READ in _select, should retry later')
186 return True 174 return True
187 logger.debug('response read %d data during _select', len(data)) 175 logger.debug('response read %d data during _select', len(data))
176 # If the socket was readable and no data was read, that means
177 # the socket was closed. Inform the reader (if any) so it can
178 # raise an exception if this is an invalid situation.
188 if not data: 179 if not data:
189 if self.headers and self._content_len == _LEN_CLOSE_IS_END: 180 if self._reader:
190 self._content_len = len(self._body) 181 self._reader._close()
191 return False 182 return False
192 else: 183 else:
193 self._load_response(data) 184 self._load_response(data)
194 return True 185 return True
195 186
196 def _chunked_parsedata(self, data):
197 if self._chunked_preloaded_block:
198 data = self._chunked_preloaded_block + data
199 self._chunked_preloaded_block = None
200 while data:
201 logger.debug('looping with %d data remaining', len(data))
202 # Slice out anything we should skip
203 if self._chunked_skip_bytes:
204 if len(data) <= self._chunked_skip_bytes:
205 self._chunked_skip_bytes -= len(data)
206 data = ''
207 break
208 else:
209 data = data[self._chunked_skip_bytes:]
210 self._chunked_skip_bytes = 0
211
212 # determine how much is until the next chunk
213 if self._chunked_until_next:
214 amt = self._chunked_until_next
215 logger.debug('reading remaining %d of existing chunk', amt)
216 self._chunked_until_next = 0
217 body = data
218 else:
219 try:
220 amt, body = data.split(self._eol, 1)
221 except ValueError:
222 self._chunked_preloaded_block = data
223 logger.debug('saving %r as a preloaded block for chunked',
224 self._chunked_preloaded_block)
225 return
226 amt = int(amt, base=16)
227 logger.debug('reading chunk of length %d', amt)
228 if amt == 0:
229 self._chunked_done = True
230
231 # read through end of what we have or the chunk
232 self._body += body[:amt]
233 if len(body) >= amt:
234 data = body[amt:]
235 self._chunked_skip_bytes = len(self._eol)
236 else:
237 self._chunked_until_next = amt - len(body)
238 self._chunked_skip_bytes = 0
239 data = ''
240
241 def _load_response(self, data): 187 def _load_response(self, data):
242 if self._chunked: 188 # Being here implies we're not at the end of the headers yet,
243 self._chunked_parsedata(data) 189 # since at the end of this method if headers were completely
244 return 190 # loaded we replace this method with the load() method of the
245 elif self._body is not None: 191 # reader we created.
246 self._body += data
247 return
248
249 # We haven't seen end of headers yet
250 self.raw_response += data 192 self.raw_response += data
251 # This is a bogus server with bad line endings 193 # This is a bogus server with bad line endings
252 if self._eol not in self.raw_response: 194 if self._eol not in self.raw_response:
253 for bad_eol in ('\n', '\r'): 195 for bad_eol in ('\n', '\r'):
254 if (bad_eol in self.raw_response 196 if (bad_eol in self.raw_response
268 # handle 100-continue response 210 # handle 100-continue response
269 hdrs, body = self.raw_response.split(self._end_headers, 1) 211 hdrs, body = self.raw_response.split(self._end_headers, 1)
270 http_ver, status = hdrs.split(' ', 1) 212 http_ver, status = hdrs.split(' ', 1)
271 if status.startswith('100'): 213 if status.startswith('100'):
272 self.raw_response = body 214 self.raw_response = body
215 self.continued = True
273 logger.debug('continue seen, setting body to %r', body) 216 logger.debug('continue seen, setting body to %r', body)
274 return 217 return
275 218
276 # arriving here means we should parse response headers 219 # arriving here means we should parse response headers
277 # as all headers have arrived completely 220 # as all headers have arrived completely
287 self.reason) = self.status_line.split(' ', 2) 230 self.reason) = self.status_line.split(' ', 2)
288 self.status = int(self.status) 231 self.status = int(self.status)
289 if self._eol != EOL: 232 if self._eol != EOL:
290 hdrs = hdrs.replace(self._eol, '\r\n') 233 hdrs = hdrs.replace(self._eol, '\r\n')
291 headers = rfc822.Message(cStringIO.StringIO(hdrs)) 234 headers = rfc822.Message(cStringIO.StringIO(hdrs))
235 content_len = None
292 if HDR_CONTENT_LENGTH in headers: 236 if HDR_CONTENT_LENGTH in headers:
293 self._content_len = int(headers[HDR_CONTENT_LENGTH]) 237 content_len = int(headers[HDR_CONTENT_LENGTH])
294 if self.http_version == HTTP_VER_1_0: 238 if self.http_version == HTTP_VER_1_0:
295 self.will_close = True 239 self.will_close = True
296 elif HDR_CONNECTION_CTRL in headers: 240 elif HDR_CONNECTION_CTRL in headers:
297 self.will_close = ( 241 self.will_close = (
298 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE) 242 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
299 if self._content_len == 0:
300 self._content_len = _LEN_CLOSE_IS_END
301 if (HDR_XFER_ENCODING in headers 243 if (HDR_XFER_ENCODING in headers
302 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED): 244 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
303 self._body = '' 245 self._reader = _readers.ChunkedReader(self._eol)
304 self._chunked_parsedata(body) 246 logger.debug('using a chunked reader')
305 self._chunked = True 247 else:
306 if self._body is None: 248 # HEAD responses are forbidden from returning a body, and
307 self._body = body 249 # it's implausible for a CONNECT response to use
250 # close-is-end logic for an OK response.
251 if (self.method == 'HEAD' or
252 (self.method == 'CONNECT' and content_len is None)):
253 content_len = 0
254 if content_len is not None:
255 logger.debug('using a content-length reader with length %d',
256 content_len)
257 self._reader = _readers.ContentLengthReader(content_len)
258 else:
259 # Response body had no length specified and is not
260 # chunked, so the end of the body will only be
261 # identifiable by the termination of the socket by the
262 # server. My interpretation of the spec means that we
263 # are correct in hitting this case if
264 # transfer-encoding, content-length, and
265 # connection-control were left unspecified.
266 self._reader = _readers.CloseIsEndReader()
267 logger.debug('using a close-is-end reader')
268 self.will_close = True
269
270 if body:
271 self._reader._load(body)
272 logger.debug('headers complete')
308 self.headers = headers 273 self.headers = headers
274 self._load_response = self._reader._load
309 275
310 276
311 class HTTPConnection(object): 277 class HTTPConnection(object):
312 """Connection to a single http server. 278 """Connection to a single http server.
313 279
380 data = self.buildheaders('CONNECT', '%s:%d' % (self.host, 346 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
381 self.port), 347 self.port),
382 {}, HTTP_VER_1_0) 348 {}, HTTP_VER_1_0)
383 sock.send(data) 349 sock.send(data)
384 sock.setblocking(0) 350 sock.setblocking(0)
385 r = self.response_class(sock, self.timeout) 351 r = self.response_class(sock, self.timeout, 'CONNECT')
386 timeout_exc = HTTPTimeoutException( 352 timeout_exc = HTTPTimeoutException(
387 'Timed out waiting for CONNECT response from proxy') 353 'Timed out waiting for CONNECT response from proxy')
388 while not r.complete(): 354 while not r.complete():
389 try: 355 try:
390 if not r._select(): 356 if not r._select():
391 raise timeout_exc 357 if not r.complete():
358 raise timeout_exc
392 except HTTPTimeoutException: 359 except HTTPTimeoutException:
393 # This raise/except pattern looks goofy, but 360 # This raise/except pattern looks goofy, but
394 # _select can raise the timeout as well as the 361 # _select can raise the timeout as well as the
395 # loop body. I wish it wasn't this convoluted, 362 # loop body. I wish it wasn't this convoluted,
396 # but I don't have a better solution 363 # but I don't have a better solution
525 and not (response and response.complete())): 492 and not (response and response.complete())):
526 select_timeout = self.timeout 493 select_timeout = self.timeout
527 out = outgoing_headers or body 494 out = outgoing_headers or body
528 blocking_on_continue = False 495 blocking_on_continue = False
529 if expect_continue and not outgoing_headers and not ( 496 if expect_continue and not outgoing_headers and not (
530 response and response.headers): 497 response and (response.headers or response.continued)):
531 logger.info( 498 logger.info(
532 'waiting up to %s seconds for' 499 'waiting up to %s seconds for'
533 ' continue response from server', 500 ' continue response from server',
534 self.continue_timeout) 501 self.continue_timeout)
535 select_timeout = self.continue_timeout 502 select_timeout = self.continue_timeout
548 expect_continue = False 515 expect_continue = False
549 logger.info('no response to continue expectation from ' 516 logger.info('no response to continue expectation from '
550 'server, optimistically sending request body') 517 'server, optimistically sending request body')
551 else: 518 else:
552 raise HTTPTimeoutException('timeout sending data') 519 raise HTTPTimeoutException('timeout sending data')
553 # TODO exceptional conditions with select? (what are those be?)
554 # TODO if the response is loading, must we finish sending at all?
555 #
556 # Certainly not if it's going to close the connection and/or
557 # the response is already done...I think.
558 was_first = first 520 was_first = first
559 521
560 # incoming data 522 # incoming data
561 if r: 523 if r:
562 try: 524 try:
570 continue 532 continue
571 if not data: 533 if not data:
572 logger.info('socket appears closed in read') 534 logger.info('socket appears closed in read')
573 self.sock = None 535 self.sock = None
574 self._current_response = None 536 self._current_response = None
537 if response is not None:
538 response._close()
575 # This if/elif ladder is a bit subtle, 539 # This if/elif ladder is a bit subtle,
576 # comments in each branch should help. 540 # comments in each branch should help.
577 if response is not None and ( 541 if response is not None and response.complete():
578 response.complete() or
579 response._content_len == _LEN_CLOSE_IS_END):
580 # Server responded completely and then 542 # Server responded completely and then
581 # closed the socket. We should just shut 543 # closed the socket. We should just shut
582 # things down and let the caller get their 544 # things down and let the caller get their
583 # response. 545 # response.
584 logger.info('Got an early response, ' 546 logger.info('Got an early response, '
603 'Connection appears closed after ' 565 'Connection appears closed after '
604 'some request data was written, but the ' 566 'some request data was written, but the '
605 'response was missing or incomplete!') 567 'response was missing or incomplete!')
606 logger.debug('read %d bytes in request()', len(data)) 568 logger.debug('read %d bytes in request()', len(data))
607 if response is None: 569 if response is None:
608 response = self.response_class(r[0], self.timeout) 570 response = self.response_class(r[0], self.timeout, method)
609 response._load_response(data) 571 response._load_response(data)
610 # Jump to the next select() call so we load more 572 # Jump to the next select() call so we load more
611 # data if the server is still sending us content. 573 # data if the server is still sending us content.
612 continue 574 continue
613 except socket.error, e: 575 except socket.error, e:
614 if e[0] != errno.EPIPE and not was_first: 576 if e[0] != errno.EPIPE and not was_first:
615 raise 577 raise
616 if (response._content_len
617 and response._content_len != _LEN_CLOSE_IS_END):
618 outgoing_headers = sent_data + outgoing_headers
619 reconnect('read')
620 578
621 # outgoing data 579 # outgoing data
622 if w and out: 580 if w and out:
623 try: 581 try:
624 if getattr(out, 'read', False): 582 if getattr(out, 'read', False):
659 outgoing_headers = out[amt:] 617 outgoing_headers = out[amt:]
660 618
661 # close if the server response said to or responded before eating 619 # close if the server response said to or responded before eating
662 # the whole request 620 # the whole request
663 if response is None: 621 if response is None:
664 response = self.response_class(self.sock, self.timeout) 622 response = self.response_class(self.sock, self.timeout, method)
665 complete = response.complete() 623 complete = response.complete()
666 data_left = bool(outgoing_headers or body) 624 data_left = bool(outgoing_headers or body)
667 if data_left: 625 if data_left:
668 logger.info('stopped sending request early, ' 626 logger.info('stopped sending request early, '
669 'will close the socket to be safe.') 627 'will close the socket to be safe.')
677 def getresponse(self): 635 def getresponse(self):
678 if self._current_response is None: 636 if self._current_response is None:
679 raise httplib.ResponseNotReady() 637 raise httplib.ResponseNotReady()
680 r = self._current_response 638 r = self._current_response
681 while r.headers is None: 639 while r.headers is None:
682 r._select() 640 if not r._select() and not r.complete():
641 raise _readers.HTTPRemoteClosedError()
683 if r.will_close: 642 if r.will_close:
684 self.sock = None 643 self.sock = None
685 self._current_response = None 644 self._current_response = None
686 elif r.complete(): 645 elif r.complete():
687 self._current_response = None 646 self._current_response = None
703 662
704 663
705 class HTTPStateError(httplib.HTTPException): 664 class HTTPStateError(httplib.HTTPException):
706 """Invalid internal state encountered.""" 665 """Invalid internal state encountered."""
707 666
708 667 # Forward this exception type from _readers since it needs to be part
709 class HTTPRemoteClosedError(httplib.HTTPException): 668 # of the public API.
710 """The server closed the remote socket in the middle of a response.""" 669 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
711 # no-check-code 670 # no-check-code