hgweb: use parsed request to construct query parameters
The way hgweb routes requests is kind of bonkers. If PATH_INFO is
set, we take the URL path after the repository. Otherwise, we take
the first part of the query string before "&" and the part before
";" in that.
We then kinda/sorta treat this as a path and route based on that.
This commit ports that code to use the parsed request object. This
required a new attribute on the parsed request to indicate whether
there is any PATH_INFO.
The new code still feels a bit convoluted for my liking. But we'll
need to rewrite more of the code before a better solution becomes
apparant. This code feels strictly better since we're no longer
doing low-level WSGI manipulation during routing.
Differential Revision: https://phab.mercurial-scm.org/D2739
--- a/mercurial/hgweb/hgweb_mod.py Thu Mar 08 11:33:33 2018 -0800
+++ b/mercurial/hgweb/hgweb_mod.py Thu Mar 08 15:37:05 2018 -0800
@@ -318,15 +318,10 @@
if h[0] != 'Content-Security-Policy']
wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
- if r'PATH_INFO' in wsgireq.env:
- parts = wsgireq.env[r'PATH_INFO'].strip(r'/').split(r'/')
- repo_parts = wsgireq.env.get(r'REPO_NAME', r'').split(r'/')
- if parts[:len(repo_parts)] == repo_parts:
- parts = parts[len(repo_parts):]
- query = r'/'.join(parts)
+ if req.havepathinfo:
+ query = req.dispatchpath
else:
- query = wsgireq.env[r'QUERY_STRING'].partition(r'&')[0]
- query = query.partition(r';')[0]
+ query = req.querystring.partition('&')[0].partition(';')[0]
# Route it to a wire protocol handler if it looks like a wire protocol
# request.
@@ -344,7 +339,7 @@
# translate user-visible url structure to internal structure
- args = query.split(r'/', 2)
+ args = query.split('/', 2)
if 'cmd' not in wsgireq.form and args and args[0]:
cmd = args.pop(0)
style = cmd.rfind('-')
--- a/mercurial/hgweb/request.py Thu Mar 08 11:33:33 2018 -0800
+++ b/mercurial/hgweb/request.py Thu Mar 08 15:37:05 2018 -0800
@@ -76,6 +76,9 @@
dispatchparts = attr.ib()
# URL path component (no query string) used for dispatch.
dispatchpath = attr.ib()
+ # Whether there is a path component to this request. This can be true
+ # when ``dispatchpath`` is empty due to REPO_NAME muckery.
+ havepathinfo = attr.ib()
# Raw query string (part after "?" in URL).
querystring = attr.ib()
# List of 2-tuples of query string arguments.
@@ -188,6 +191,7 @@
advertisedbaseurl=advertisedbaseurl,
apppath=apppath,
dispatchparts=dispatchparts, dispatchpath=dispatchpath,
+ havepathinfo='PATH_INFO' in env,
querystring=querystring,
querystringlist=querystringlist,
querystringdict=querystringdict)