contrib/check-config.py
author Gregory Szorc <gregory.szorc@gmail.com>
Tue, 10 Jan 2017 23:37:08 -0800
changeset 30766 d7bf7d2bd5ab
parent 28352 a92ee4d8a574
child 32865 e5a6a540ae63
permissions -rwxr-xr-x
hgweb: support Content Security Policy Content-Security-Policy (CSP) is a web security feature that allows servers to declare what loaded content is allowed to do. For example, a policy can prevent loading of images, JavaScript, CSS, etc unless the source of that content is whitelisted (by hostname, URI scheme, hashes of content, etc). It's a nifty security feature that provides extra mitigation against some attacks, notably XSS. Mitigation against these attacks is important for Mercurial because hgweb renders repository data, which is commonly untrusted. While we make attempts to escape things, etc, there's the possibility that malicious data could be injected into the site content. If this happens today, the full power of the web browser is available to that malicious content. A restrictive CSP policy (defined by the server operator and sent in an HTTP header which is outside the control of malicious content), could restrict browser capabilities and mitigate security problems posed by malicious data. CSP works by emitting an HTTP header declaring the policy that browsers should apply. Ideally, this header would be emitted by a layer above Mercurial (likely the HTTP server doing the WSGI "proxying"). This works for some CSP policies, but not all. For example, policies to allow inline JavaScript may require setting a "nonce" attribute on <script>. This attribute value must be unique and non-guessable. And, the value must be present in the HTTP header and the HTML body. This means that coordinating the value between Mercurial and another HTTP server could be difficult: it is much easier to generate and emit the nonce in a central location. This commit introduces support for emitting a Content-Security-Policy header from hgweb. A config option defines the header value. If present, the header is emitted. A special "%nonce%" syntax in the value triggers generation of a nonce and inclusion in <script> elements in templates. The inclusion of a nonce does not occur unless "%nonce%" is present. This makes this commit completely backwards compatible and the feature opt-in. The nonce is a type 4 UUID, which is the flavor that is randomly generated. It has 122 random bits, which should be plenty to satisfy the guarantees of a nonce.

#!/usr/bin/env python
#
# check-config - a config flag documentation checker for Mercurial
#
# Copyright 2015 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import, print_function
import re
import sys

foundopts = {}
documented = {}

configre = (r"""ui\.config(|int|bool|list)\(['"](\S+)['"],\s*"""
            r"""['"](\S+)['"](,\s+(?:default=)?(\S+?))?\)""")
configpartialre = (r"""ui\.config""")

def main(args):
    for f in args:
        sect = ''
        prevname = ''
        confsect = ''
        carryover = ''
        for l in open(f):

            # check topic-like bits
            m = re.match('\s*``(\S+)``', l)
            if m:
                prevname = m.group(1)
            if re.match('^\s*-+$', l):
                sect = prevname
                prevname = ''

            if sect and prevname:
                name = sect + '.' + prevname
                documented[name] = 1

            # check docstring bits
            m = re.match(r'^\s+\[(\S+)\]', l)
            if m:
                confsect = m.group(1)
                continue
            m = re.match(r'^\s+(?:#\s*)?(\S+) = ', l)
            if m:
                name = confsect + '.' + m.group(1)
                documented[name] = 1

            # like the bugzilla extension
            m = re.match(r'^\s*(\S+\.\S+)$', l)
            if m:
                documented[m.group(1)] = 1

            # like convert
            m = re.match(r'^\s*:(\S+\.\S+):\s+', l)
            if m:
                documented[m.group(1)] = 1

            # quoted in help or docstrings
            m = re.match(r'.*?``(\S+\.\S+)``', l)
            if m:
                documented[m.group(1)] = 1

            # look for ignore markers
            m = re.search(r'# (?:internal|experimental|deprecated|developer)'
                          ' config: (\S+\.\S+)$', l)
            if m:
                documented[m.group(1)] = 1

            # look for code-like bits
            line = carryover + l
            m = re.search(configre, line, re.MULTILINE)
            if m:
                ctype = m.group(1)
                if not ctype:
                    ctype = 'str'
                name = m.group(2) + "." + m.group(3)
                default = m.group(5)
                if default in (None, 'False', 'None', '0', '[]', '""', "''"):
                    default = ''
                if re.match('[a-z.]+$', default):
                    default = '<variable>'
                if name in foundopts and (ctype, default) != foundopts[name]:
                    print(l)
                    print("conflict on %s: %r != %r" % (name, (ctype, default),
                                                        foundopts[name]))
                foundopts[name] = (ctype, default)
                carryover = ''
            else:
                m = re.search(configpartialre, line)
                if m:
                    carryover = line
                else:
                    carryover = ''

    for name in sorted(foundopts):
        if name not in documented:
            if not (name.startswith("devel.") or
                    name.startswith("experimental.") or
                    name.startswith("debug.")):
                ctype, default = foundopts[name]
                if default:
                    default = ' [%s]' % default
                print("undocumented: %s (%s)%s" % (name, ctype, default))

if __name__ == "__main__":
    if len(sys.argv) > 1:
        sys.exit(main(sys.argv[1:]))
    else:
        sys.exit(main([l.rstrip() for l in sys.stdin]))