comparison mercurial/hgweb/request.py @ 43076:2372284d9457

formatting: blacken the codebase This is using my patch to black (https://github.com/psf/black/pull/826) so we don't un-wrap collection literals. Done with: hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S # skip-blame mass-reformatting only # no-check-commit reformats foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6971
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:45:02 -0400
parents 6107d4549fcc
children 687b865b95ad
comparison
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
6 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version. 7 # GNU General Public License version 2 or any later version.
8 8
9 from __future__ import absolute_import 9 from __future__ import absolute_import
10 10
11 #import wsgiref.validate 11 # import wsgiref.validate
12 12
13 from ..thirdparty import ( 13 from ..thirdparty import attr
14 attr,
15 )
16 from .. import ( 14 from .. import (
17 error, 15 error,
18 pycompat, 16 pycompat,
19 util, 17 util,
20 ) 18 )
21 19
20
22 class multidict(object): 21 class multidict(object):
23 """A dict like object that can store multiple values for a key. 22 """A dict like object that can store multiple values for a key.
24 23
25 Used to store parsed request parameters. 24 Used to store parsed request parameters.
26 25
27 This is inspired by WebOb's class of the same name. 26 This is inspired by WebOb's class of the same name.
28 """ 27 """
28
29 def __init__(self): 29 def __init__(self):
30 self._items = {} 30 self._items = {}
31 31
32 def __getitem__(self, key): 32 def __getitem__(self, key):
33 """Returns the last set value for a key.""" 33 """Returns the last set value for a key."""
73 73
74 return vals[0] 74 return vals[0]
75 75
76 def asdictoflists(self): 76 def asdictoflists(self):
77 return {k: list(v) for k, v in self._items.iteritems()} 77 return {k: list(v) for k, v in self._items.iteritems()}
78
78 79
79 @attr.s(frozen=True) 80 @attr.s(frozen=True)
80 class parsedrequest(object): 81 class parsedrequest(object):
81 """Represents a parsed WSGI request. 82 """Represents a parsed WSGI request.
82 83
122 # Request body input stream. 123 # Request body input stream.
123 bodyfh = attr.ib() 124 bodyfh = attr.ib()
124 # WSGI environment dict, unmodified. 125 # WSGI environment dict, unmodified.
125 rawenv = attr.ib() 126 rawenv = attr.ib()
126 127
128
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): 129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
128 """Parse URL components from environment variables. 130 """Parse URL components from environment variables.
129 131
130 WSGI defines request attributes via environment variables. This function 132 WSGI defines request attributes via environment variables. This function
131 parses the environment variables into a data structure. 133 parses the environment variables into a data structure.
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code. 153 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
152 154
153 # We first validate that the incoming object conforms with the WSGI spec. 155 # We first validate that the incoming object conforms with the WSGI spec.
154 # We only want to be dealing with spec-conforming WSGI implementations. 156 # We only want to be dealing with spec-conforming WSGI implementations.
155 # TODO enable this once we fix internal violations. 157 # TODO enable this once we fix internal violations.
156 #wsgiref.validate.check_environ(env) 158 # wsgiref.validate.check_environ(env)
157 159
158 # PEP-0333 states that environment keys and values are native strings 160 # PEP-0333 states that environment keys and values are native strings
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode 161 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes 162 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
161 # in Mercurial, so mass convert string keys and values to bytes. 163 # in Mercurial, so mass convert string keys and values to bytes.
162 if pycompat.ispy3: 164 if pycompat.ispy3:
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()} 165 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v 166 env = {
165 for k, v in env.iteritems()} 167 k: v.encode('latin-1') if isinstance(v, str) else v
168 for k, v in env.iteritems()
169 }
166 170
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly 171 # Some hosting solutions are emulating hgwebdir, and dispatching directly
168 # to an hgweb instance using this environment variable. This was always 172 # to an hgweb instance using this environment variable. This was always
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. 173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
170 if not reponame: 174 if not reponame:
253 257
254 if not env.get('PATH_INFO'): 258 if not env.get('PATH_INFO'):
255 raise error.ProgrammingError('reponame requires PATH_INFO') 259 raise error.ProgrammingError('reponame requires PATH_INFO')
256 260
257 if not env['PATH_INFO'].startswith(repoprefix): 261 if not env['PATH_INFO'].startswith(repoprefix):
258 raise error.ProgrammingError('PATH_INFO does not begin with repo ' 262 raise error.ProgrammingError(
259 'name: %s (%s)' % (env['PATH_INFO'], 263 'PATH_INFO does not begin with repo '
260 reponame)) 264 'name: %s (%s)' % (env['PATH_INFO'], reponame)
261 265 )
262 dispatchpath = env['PATH_INFO'][len(repoprefix):] 266
267 dispatchpath = env['PATH_INFO'][len(repoprefix) :]
263 268
264 if dispatchpath and not dispatchpath.startswith('/'): 269 if dispatchpath and not dispatchpath.startswith('/'):
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does ' 270 raise error.ProgrammingError(
266 'not end at path delimiter: %s (%s)' % 271 'reponame prefix of PATH_INFO does '
267 (env['PATH_INFO'], reponame)) 272 'not end at path delimiter: %s (%s)'
273 % (env['PATH_INFO'], reponame)
274 )
268 275
269 apppath = apppath.rstrip('/') + repoprefix 276 apppath = apppath.rstrip('/') + repoprefix
270 dispatchparts = dispatchpath.strip('/').split('/') 277 dispatchparts = dispatchpath.strip('/').split('/')
271 dispatchpath = '/'.join(dispatchparts) 278 dispatchpath = '/'.join(dispatchparts)
272 279
293 # perform case normalization for us. We just rewrite underscore to dash 300 # perform case normalization for us. We just rewrite underscore to dash
294 # so keys match what likely went over the wire. 301 # so keys match what likely went over the wire.
295 headers = [] 302 headers = []
296 for k, v in env.iteritems(): 303 for k, v in env.iteritems():
297 if k.startswith('HTTP_'): 304 if k.startswith('HTTP_'):
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v)) 305 headers.append((k[len('HTTP_') :].replace('_', '-'), v))
299 306
300 from . import wsgiheaders # avoid cycle 307 from . import wsgiheaders # avoid cycle
308
301 headers = wsgiheaders.Headers(headers) 309 headers = wsgiheaders.Headers(headers)
302 310
303 # This is kind of a lie because the HTTP header wasn't explicitly 311 # This is kind of a lie because the HTTP header wasn't explicitly
304 # sent. But for all intents and purposes it should be OK to lie about 312 # sent. But for all intents and purposes it should be OK to lie about
305 # this, since a consumer will either either value to determine how many 313 # this, since a consumer will either either value to determine how many
311 headers['Content-Type'] = env['CONTENT_TYPE'] 319 headers['Content-Type'] = env['CONTENT_TYPE']
312 320
313 if bodyfh is None: 321 if bodyfh is None:
314 bodyfh = env['wsgi.input'] 322 bodyfh = env['wsgi.input']
315 if 'Content-Length' in headers: 323 if 'Content-Length' in headers:
316 bodyfh = util.cappedreader(bodyfh, 324 bodyfh = util.cappedreader(
317 int(headers['Content-Length'] or '0')) 325 bodyfh, int(headers['Content-Length'] or '0')
318 326 )
319 return parsedrequest(method=env['REQUEST_METHOD'], 327
320 url=fullurl, baseurl=baseurl, 328 return parsedrequest(
321 advertisedurl=advertisedfullurl, 329 method=env['REQUEST_METHOD'],
322 advertisedbaseurl=advertisedbaseurl, 330 url=fullurl,
323 urlscheme=env['wsgi.url_scheme'], 331 baseurl=baseurl,
324 remoteuser=env.get('REMOTE_USER'), 332 advertisedurl=advertisedfullurl,
325 remotehost=env.get('REMOTE_HOST'), 333 advertisedbaseurl=advertisedbaseurl,
326 apppath=apppath, 334 urlscheme=env['wsgi.url_scheme'],
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath, 335 remoteuser=env.get('REMOTE_USER'),
328 reponame=reponame, 336 remotehost=env.get('REMOTE_HOST'),
329 querystring=querystring, 337 apppath=apppath,
330 qsparams=qsparams, 338 dispatchparts=dispatchparts,
331 headers=headers, 339 dispatchpath=dispatchpath,
332 bodyfh=bodyfh, 340 reponame=reponame,
333 rawenv=env) 341 querystring=querystring,
342 qsparams=qsparams,
343 headers=headers,
344 bodyfh=bodyfh,
345 rawenv=env,
346 )
347
334 348
335 class offsettrackingwriter(object): 349 class offsettrackingwriter(object):
336 """A file object like object that is append only and tracks write count. 350 """A file object like object that is append only and tracks write count.
337 351
338 Instances are bound to a callable. This callable is called with data 352 Instances are bound to a callable. This callable is called with data
343 357
344 The intent of this class is to wrap the ``write()`` function returned by 358 The intent of this class is to wrap the ``write()`` function returned by
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and 359 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
346 not a file object, it doesn't implement other file object methods. 360 not a file object, it doesn't implement other file object methods.
347 """ 361 """
362
348 def __init__(self, writefn): 363 def __init__(self, writefn):
349 self._write = writefn 364 self._write = writefn
350 self._offset = 0 365 self._offset = 0
351 366
352 def write(self, s): 367 def write(self, s):
361 pass 376 pass
362 377
363 def tell(self): 378 def tell(self):
364 return self._offset 379 return self._offset
365 380
381
366 class wsgiresponse(object): 382 class wsgiresponse(object):
367 """Represents a response to a WSGI request. 383 """Represents a response to a WSGI request.
368 384
369 A response consists of a status line, headers, and a body. 385 A response consists of a status line, headers, and a body.
370 386
387 """ 403 """
388 self._req = req 404 self._req = req
389 self._startresponse = startresponse 405 self._startresponse = startresponse
390 406
391 self.status = None 407 self.status = None
392 from . import wsgiheaders # avoid cycle 408 from . import wsgiheaders # avoid cycle
409
393 self.headers = wsgiheaders.Headers([]) 410 self.headers = wsgiheaders.Headers([])
394 411
395 self._bodybytes = None 412 self._bodybytes = None
396 self._bodygen = None 413 self._bodygen = None
397 self._bodywillwrite = False 414 self._bodywillwrite = False
398 self._started = False 415 self._started = False
399 self._bodywritefn = None 416 self._bodywritefn = None
400 417
401 def _verifybody(self): 418 def _verifybody(self):
402 if (self._bodybytes is not None or self._bodygen is not None 419 if (
403 or self._bodywillwrite): 420 self._bodybytes is not None
421 or self._bodygen is not None
422 or self._bodywillwrite
423 ):
404 raise error.ProgrammingError('cannot define body multiple times') 424 raise error.ProgrammingError('cannot define body multiple times')
405 425
406 def setbodybytes(self, b): 426 def setbodybytes(self, b):
407 """Define the response body as static bytes. 427 """Define the response body as static bytes.
408 428
448 self._started = True 468 self._started = True
449 469
450 if not self.status: 470 if not self.status:
451 raise error.ProgrammingError('status line not defined') 471 raise error.ProgrammingError('status line not defined')
452 472
453 if (self._bodybytes is None and self._bodygen is None 473 if (
454 and not self._bodywillwrite): 474 self._bodybytes is None
475 and self._bodygen is None
476 and not self._bodywillwrite
477 ):
455 raise error.ProgrammingError('response body not defined') 478 raise error.ProgrammingError('response body not defined')
456 479
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of 480 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} 481 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
459 # and SHOULD NOT generate other headers unless they could be used 482 # and SHOULD NOT generate other headers unless they could be used
467 if self.headers.get('Content-Length') == '0': 490 if self.headers.get('Content-Length') == '0':
468 del self.headers['Content-Length'] 491 del self.headers['Content-Length']
469 492
470 # Strictly speaking, this is too strict. But until it causes 493 # Strictly speaking, this is too strict. But until it causes
471 # problems, let's be strict. 494 # problems, let's be strict.
472 badheaders = {k for k in self.headers.keys() 495 badheaders = {
473 if k.lower() not in ('date', 'etag', 'expires', 496 k
474 'cache-control', 497 for k in self.headers.keys()
475 'content-location', 498 if k.lower()
476 'content-security-policy', 499 not in (
477 'vary')} 500 'date',
501 'etag',
502 'expires',
503 'cache-control',
504 'content-location',
505 'content-security-policy',
506 'vary',
507 )
508 }
478 if badheaders: 509 if badheaders:
479 raise error.ProgrammingError( 510 raise error.ProgrammingError(
480 'illegal header on 304 response: %s' % 511 'illegal header on 304 response: %s'
481 ', '.join(sorted(badheaders))) 512 % ', '.join(sorted(badheaders))
513 )
482 514
483 if self._bodygen is not None or self._bodywillwrite: 515 if self._bodygen is not None or self._bodywillwrite:
484 raise error.ProgrammingError("must use setbodybytes('') with " 516 raise error.ProgrammingError(
485 "304 responses") 517 "must use setbodybytes('') with " "304 responses"
518 )
486 519
487 # Various HTTP clients (notably httplib) won't read the HTTP response 520 # Various HTTP clients (notably httplib) won't read the HTTP response
488 # until the HTTP request has been sent in full. If servers (us) send a 521 # until the HTTP request has been sent in full. If servers (us) send a
489 # response before the HTTP request has been fully sent, the connection 522 # response before the HTTP request has been fully sent, the connection
490 # may deadlock because neither end is reading. 523 # may deadlock because neither end is reading.
529 while True: 562 while True:
530 chunk = self._req.bodyfh.read(32768) 563 chunk = self._req.bodyfh.read(32768)
531 if not chunk: 564 if not chunk:
532 break 565 break
533 566
534 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for 567 strheaders = [
535 k, v in self.headers.items()] 568 (pycompat.strurl(k), pycompat.strurl(v))
536 write = self._startresponse(pycompat.sysstr(self.status), 569 for k, v in self.headers.items()
537 strheaders) 570 ]
571 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
538 572
539 if self._bodybytes: 573 if self._bodybytes:
540 yield self._bodybytes 574 yield self._bodybytes
541 elif self._bodygen: 575 elif self._bodygen:
542 for chunk in self._bodygen: 576 for chunk in self._bodygen:
564 """ 598 """
565 if not self._bodywillwrite: 599 if not self._bodywillwrite:
566 raise error.ProgrammingError('must call setbodywillwrite() first') 600 raise error.ProgrammingError('must call setbodywillwrite() first')
567 601
568 if not self._started: 602 if not self._started:
569 raise error.ProgrammingError('must call sendresponse() first; did ' 603 raise error.ProgrammingError(
570 'you remember to consume it since it ' 604 'must call sendresponse() first; did '
571 'is a generator?') 605 'you remember to consume it since it '
606 'is a generator?'
607 )
572 608
573 assert self._bodywritefn 609 assert self._bodywritefn
574 return offsettrackingwriter(self._bodywritefn) 610 return offsettrackingwriter(self._bodywritefn)
611
575 612
576 def wsgiapplication(app_maker): 613 def wsgiapplication(app_maker):
577 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() 614 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
578 can and should now be used as a WSGI application.''' 615 can and should now be used as a WSGI application.'''
579 application = app_maker() 616 application = app_maker()
617
580 def run_wsgi(env, respond): 618 def run_wsgi(env, respond):
581 return application(env, respond) 619 return application(env, respond)
620
582 return run_wsgi 621 return run_wsgi