# HG changeset patch # User Gregory Szorc # Date 1520707714 28800 # Node ID 290fc4c3d1e0d6f69adbac09b413b805ed4706f7 # Parent 7066617187c10b5cd17a6e4c26364a933b8bef38 hgweb: use a capped reader for WSGI input stream Per PEP 3333, the input stream from WSGI should respect EOF and prevent reads past the end of the request body. However, not all WSGI servers guarantee this. Notably, our BaseHTTPServer based built-in HTTP server doesn't. Instead, it exposes the raw socket and you can read() from it all you want, getting the connection in a bad state by doing so. We have a "cappedreader" utility class that proxies a file object and prevents reading past a limit. This commit converts the WSGI input stream into a capped reader when the input length is advertised via Content-Length headers. "cappedreader" only exposes a read() method. PEP 3333 states that the input stream MUST also support readline(), readlines(hint), and __iter__(). However, since our WSGI application code only calls read() and since we're not manipulating the stream exposed by the WSGI server, we're not violating the spec here. Differential Revision: https://phab.mercurial-scm.org/D2768 diff -r 7066617187c1 -r 290fc4c3d1e0 mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py Sat Mar 10 10:47:30 2018 -0800 +++ b/mercurial/hgweb/request.py Sat Mar 10 10:48:34 2018 -0800 @@ -234,6 +234,14 @@ raise RuntimeError("Unknown and unsupported WSGI version %d.%d" % version) self.inp = wsgienv[r'wsgi.input'] + + if r'HTTP_CONTENT_LENGTH' in wsgienv: + self.inp = util.cappedreader(self.inp, + int(wsgienv[r'HTTP_CONTENT_LENGTH'])) + elif r'CONTENT_LENGTH' in wsgienv: + self.inp = util.cappedreader(self.inp, + int(wsgienv[r'CONTENT_LENGTH'])) + self.err = wsgienv[r'wsgi.errors'] self.threaded = wsgienv[r'wsgi.multithread'] self.multiprocess = wsgienv[r'wsgi.multiprocess']