doc/docchecker
author Pierre-Yves David <pierre-yves.david@octobus.net>
Thu, 28 Feb 2019 00:56:27 +0100
changeset 41886 e514799e4e07
parent 41016 9bfbb9fc5871
child 43731 47ef023d0165
permissions -rwxr-xr-x
discovery: use a lower level but faster way to retrieve parents We already know that no revision in the undecided set are filtered, so we can skip multiple checks and directly access lower level data. In a private pathological case, this improves the timing from about 70 seconds to about 50 seconds. There are other actions to be taken to improve that case, however this gives an idea of the general overhead.

#!/usr/bin/env python
#
# docchecker - look for problematic markup
#
# Copyright 2016 timeless <timeless@mozdev.org> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import, print_function

import os
import re
import sys

try:
    import msvcrt
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
except ImportError:
    pass

stdout = getattr(sys.stdout, 'buffer', sys.stdout)

leadingline = re.compile(br'(^\s*)(\S.*)$')

checks = [
  (br""":hg:`[^`]*'[^`]*`""",
   b"""warning: please avoid nesting ' in :hg:`...`"""),
  (br'\w:hg:`',
   b'warning: please have a space before :hg:'),
  (br"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""",
   b'''warning: please use " instead of ' for hg ... "..."'''),
]

def check(line):
    messages = []
    for match, msg in checks:
        if re.search(match, line):
            messages.append(msg)
    if messages:
        stdout.write(b'%s\n' % line)
        for msg in messages:
            stdout.write(b'%s\n' % msg)

def work(file):
    (llead, lline) = (b'', b'')

    for line in file:
        # this section unwraps lines
        match = leadingline.match(line)
        if not match:
            check(lline)
            (llead, lline) = (b'', b'')
            continue

        lead, line = match.group(1), match.group(2)
        if (lead == llead):
            if (lline != b''):
                lline += b' ' + line
            else:
                lline = line
        else:
            check(lline)
            (llead, lline) = (lead, line)
    check(lline)

def main():
    for f in sys.argv[1:]:
        try:
            with open(f, 'rb') as file:
                work(file)
        except BaseException as e:
            sys.stdout.write(r"failed to process %s: %s\n" % (f, e))

main()