discovery: use a lower level but faster way to retrieve parents
We already know that no revision in the undecided set are filtered, so we can
skip multiple checks and directly access lower level data.
In a private pathological case, this improves the timing from about 70 seconds
to about 50 seconds. There are other actions to be taken to improve that case,
however this gives an idea of the general overhead.
#!/usr/bin/env python
#
# docchecker - look for problematic markup
#
# Copyright 2016 timeless <timeless@mozdev.org> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import, print_function
import os
import re
import sys
try:
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
except ImportError:
pass
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
leadingline = re.compile(br'(^\s*)(\S.*)$')
checks = [
(br""":hg:`[^`]*'[^`]*`""",
b"""warning: please avoid nesting ' in :hg:`...`"""),
(br'\w:hg:`',
b'warning: please have a space before :hg:'),
(br"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""",
b'''warning: please use " instead of ' for hg ... "..."'''),
]
def check(line):
messages = []
for match, msg in checks:
if re.search(match, line):
messages.append(msg)
if messages:
stdout.write(b'%s\n' % line)
for msg in messages:
stdout.write(b'%s\n' % msg)
def work(file):
(llead, lline) = (b'', b'')
for line in file:
# this section unwraps lines
match = leadingline.match(line)
if not match:
check(lline)
(llead, lline) = (b'', b'')
continue
lead, line = match.group(1), match.group(2)
if (lead == llead):
if (lline != b''):
lline += b' ' + line
else:
lline = line
else:
check(lline)
(llead, lline) = (lead, line)
check(lline)
def main():
for f in sys.argv[1:]:
try:
with open(f, 'rb') as file:
work(file)
except BaseException as e:
sys.stdout.write(r"failed to process %s: %s\n" % (f, e))
main()