changeset 36884:ccb70a77f746

hgweb: refactor 304 handling code We had generic code in wsgirequest for handling HTTP 304 responses. We also had a special case for it in the catch all exception handler in the WSGI application. We only ever raise 304 in one place. So, we don't need to treat it specially in the catch all exception handler. But it is useful to validate behavior of 304 responses. We port the code that sends a 304 to use the new response API. We then move the code for screening 304 sanity into the new response API. As part of doing so, we discovered that we would send Content-Length: 0. This is not allowed. So, we fix our response code to not emit that header for empty response bodies. Differential Revision: https://phab.mercurial-scm.org/D2794
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 10 Mar 2018 18:42:00 -0800
parents 02bea04b4c54
children 9675147aec06
files mercurial/hgweb/hgweb_mod.py mercurial/hgweb/request.py
diffstat 2 files changed, 37 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/hgweb/hgweb_mod.py	Sat Mar 10 18:19:27 2018 -0800
+++ b/mercurial/hgweb/hgweb_mod.py	Sat Mar 10 18:42:00 2018 -0800
@@ -15,7 +15,6 @@
     ErrorResponse,
     HTTP_BAD_REQUEST,
     HTTP_NOT_FOUND,
-    HTTP_NOT_MODIFIED,
     HTTP_OK,
     HTTP_SERVER_ERROR,
     cspvalues,
@@ -391,7 +390,10 @@
             if rctx.configbool('web', 'cache') and not rctx.nonce:
                 tag = 'W/"%d"' % self.mtime
                 if req.headers.get('If-None-Match') == tag:
-                    raise ErrorResponse(HTTP_NOT_MODIFIED)
+                    res.status = '304 Not Modified'
+                    # Response body not allowed on 304.
+                    res.setbodybytes('')
+                    return res.sendresponse()
 
                 wsgireq.headers.append((r'ETag', pycompat.sysstr(tag)))
                 res.headers['ETag'] = tag
@@ -426,9 +428,6 @@
             return tmpl('error', error=pycompat.bytestr(inst))
         except ErrorResponse as inst:
             wsgireq.respond(inst, ctype)
-            if inst.code == HTTP_NOT_MODIFIED:
-                # Not allowed to return a body on a 304
-                return ['']
             return tmpl('error', error=pycompat.bytestr(inst))
 
     def check_perm(self, rctx, req, op):
--- a/mercurial/hgweb/request.py	Sat Mar 10 18:19:27 2018 -0800
+++ b/mercurial/hgweb/request.py	Sat Mar 10 18:42:00 2018 -0800
@@ -15,7 +15,6 @@
 
 from .common import (
     ErrorResponse,
-    HTTP_NOT_MODIFIED,
     statusmessage,
 )
 
@@ -361,7 +360,10 @@
             raise error.ProgrammingError('cannot define body multiple times')
 
     def setbodybytes(self, b):
-        """Define the response body as static bytes."""
+        """Define the response body as static bytes.
+
+        The empty string signals that there is no response body.
+        """
         self._verifybody()
         self._bodybytes = b
         self.headers['Content-Length'] = '%d' % len(b)
@@ -408,6 +410,35 @@
             and not self._bodywillwrite):
             raise error.ProgrammingError('response body not defined')
 
+        # RFC 7232 Section 4.1 states that a 304 MUST generate one of
+        # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
+        # and SHOULD NOT generate other headers unless they could be used
+        # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
+        # states that no response body can be issued. Content-Length can
+        # be sent. But if it is present, it should be the size of the response
+        # that wasn't transferred.
+        if self.status.startswith('304 '):
+            # setbodybytes('') will set C-L to 0. This doesn't conform with the
+            # spec. So remove it.
+            if self.headers.get('Content-Length') == '0':
+                del self.headers['Content-Length']
+
+            # Strictly speaking, this is too strict. But until it causes
+            # problems, let's be strict.
+            badheaders = {k for k in self.headers.keys()
+                          if k.lower() not in ('date', 'etag', 'expires',
+                                               'cache-control',
+                                               'content-location',
+                                               'vary')}
+            if badheaders:
+                raise error.ProgrammingError(
+                    'illegal header on 304 response: %s' %
+                    ', '.join(sorted(badheaders)))
+
+            if self._bodygen is not None or self._bodywillwrite:
+                raise error.ProgrammingError("must use setbodybytes('') with "
+                                             "304 responses")
+
         # Various HTTP clients (notably httplib) won't read the HTTP response
         # until the HTTP request has been sent in full. If servers (us) send a
         # response before the HTTP request has been fully sent, the connection
@@ -539,13 +570,6 @@
 
             if isinstance(status, ErrorResponse):
                 self.headers.extend(status.headers)
-                if status.code == HTTP_NOT_MODIFIED:
-                    # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
-                    # it MUST NOT include any headers other than these and no
-                    # body
-                    self.headers = [(k, v) for (k, v) in self.headers if
-                                    k in ('Date', 'ETag', 'Expires',
-                                          'Cache-Control', 'Vary')]
                 status = statusmessage(status.code, pycompat.bytestr(status))
             elif status == 200:
                 status = '200 Script output follows'