hgweb: use parsed request to construct query parameters
authorGregory Szorc <gregory.szorc@gmail.com>
Thu, 08 Mar 2018 15:37:05 -0800
changeset 36819 cfb9ef24968c
parent 36818 886fba199022
child 36820 158d4ecc03c8
hgweb: use parsed request to construct query parameters The way hgweb routes requests is kind of bonkers. If PATH_INFO is set, we take the URL path after the repository. Otherwise, we take the first part of the query string before "&" and the part before ";" in that. We then kinda/sorta treat this as a path and route based on that. This commit ports that code to use the parsed request object. This required a new attribute on the parsed request to indicate whether there is any PATH_INFO. The new code still feels a bit convoluted for my liking. But we'll need to rewrite more of the code before a better solution becomes apparant. This code feels strictly better since we're no longer doing low-level WSGI manipulation during routing. Differential Revision: https://phab.mercurial-scm.org/D2739
mercurial/hgweb/hgweb_mod.py
mercurial/hgweb/request.py
--- a/mercurial/hgweb/hgweb_mod.py	Thu Mar 08 11:33:33 2018 -0800
+++ b/mercurial/hgweb/hgweb_mod.py	Thu Mar 08 15:37:05 2018 -0800
@@ -318,15 +318,10 @@
                                if h[0] != 'Content-Security-Policy']
             wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
 
-        if r'PATH_INFO' in wsgireq.env:
-            parts = wsgireq.env[r'PATH_INFO'].strip(r'/').split(r'/')
-            repo_parts = wsgireq.env.get(r'REPO_NAME', r'').split(r'/')
-            if parts[:len(repo_parts)] == repo_parts:
-                parts = parts[len(repo_parts):]
-            query = r'/'.join(parts)
+        if req.havepathinfo:
+            query = req.dispatchpath
         else:
-            query = wsgireq.env[r'QUERY_STRING'].partition(r'&')[0]
-            query = query.partition(r';')[0]
+            query = req.querystring.partition('&')[0].partition(';')[0]
 
         # Route it to a wire protocol handler if it looks like a wire protocol
         # request.
@@ -344,7 +339,7 @@
 
         # translate user-visible url structure to internal structure
 
-        args = query.split(r'/', 2)
+        args = query.split('/', 2)
         if 'cmd' not in wsgireq.form and args and args[0]:
             cmd = args.pop(0)
             style = cmd.rfind('-')
--- a/mercurial/hgweb/request.py	Thu Mar 08 11:33:33 2018 -0800
+++ b/mercurial/hgweb/request.py	Thu Mar 08 15:37:05 2018 -0800
@@ -76,6 +76,9 @@
     dispatchparts = attr.ib()
     # URL path component (no query string) used for dispatch.
     dispatchpath = attr.ib()
+    # Whether there is a path component to this request. This can be true
+    # when ``dispatchpath`` is empty due to REPO_NAME muckery.
+    havepathinfo = attr.ib()
     # Raw query string (part after "?" in URL).
     querystring = attr.ib()
     # List of 2-tuples of query string arguments.
@@ -188,6 +191,7 @@
                          advertisedbaseurl=advertisedbaseurl,
                          apppath=apppath,
                          dispatchparts=dispatchparts, dispatchpath=dispatchpath,
+                         havepathinfo='PATH_INFO' in env,
                          querystring=querystring,
                          querystringlist=querystringlist,
                          querystringdict=querystringdict)