doc/docchecker
author Martin von Zweigbergk <martinvonz@google.com>
Wed, 17 Apr 2019 23:10:29 -0700
changeset 42230 fdbeacb9d456
parent 41006 9bfbb9fc5871
child 43691 47ef023d0165
permissions -rwxr-xr-x
copies: filter out copies from non-existent source later in _chain() _changesetforwardcopies() repeatedly calls _chain(). That is very expensive because _chain() does lookups in the manifest. I hope to split up the function in two parts: 1) simple chaining, not considering end points, and 2) filter out files that don't exist in the end points (and ping-pong copies/renames). This patches gets us closer to that by moving the check for non-existent source later in the function. Now there are no more checks for "src" and "dst" in the first loop; all the filtering of invalid copies is done in the second loop. The code also looks much more consistent now. No measureable impact on `hg debugpathcopies 4.0 4.8`. That shouldn't be surprising since the only case we're doing more checks now is in case of chained copies/renames, which are quire rare in practice. Differential Revision: https://phab.mercurial-scm.org/D6277

#!/usr/bin/env python
#
# docchecker - look for problematic markup
#
# Copyright 2016 timeless <timeless@mozdev.org> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import, print_function

import os
import re
import sys

try:
    import msvcrt
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
except ImportError:
    pass

stdout = getattr(sys.stdout, 'buffer', sys.stdout)

leadingline = re.compile(br'(^\s*)(\S.*)$')

checks = [
  (br""":hg:`[^`]*'[^`]*`""",
   b"""warning: please avoid nesting ' in :hg:`...`"""),
  (br'\w:hg:`',
   b'warning: please have a space before :hg:'),
  (br"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""",
   b'''warning: please use " instead of ' for hg ... "..."'''),
]

def check(line):
    messages = []
    for match, msg in checks:
        if re.search(match, line):
            messages.append(msg)
    if messages:
        stdout.write(b'%s\n' % line)
        for msg in messages:
            stdout.write(b'%s\n' % msg)

def work(file):
    (llead, lline) = (b'', b'')

    for line in file:
        # this section unwraps lines
        match = leadingline.match(line)
        if not match:
            check(lline)
            (llead, lline) = (b'', b'')
            continue

        lead, line = match.group(1), match.group(2)
        if (lead == llead):
            if (lline != b''):
                lline += b' ' + line
            else:
                lline = line
        else:
            check(lline)
            (llead, lline) = (lead, line)
    check(lline)

def main():
    for f in sys.argv[1:]:
        try:
            with open(f, 'rb') as file:
                work(file)
        except BaseException as e:
            sys.stdout.write(r"failed to process %s: %s\n" % (f, e))

main()