view doc/check-seclevel.py @ 28495:70c2f8a98276

changelog: avoid slicing raw data until needed Before, we were slicing the original raw text and storing individual variables with values corresponding to each field. This is avoidable overhead. With this patch, we store the offsets of the fields at construction time and perform the slice when a property is accessed. This appears to show a very marginal performance win on its own and the gains are so small as to not be worth reporting. However, this patch marks the end of our parsing refactor, so it is worth reporting the gains from the entire series: author(mpm) 0.896565 0.795987 89% desc(bug) 0.887169 0.803438 90% date(2015) 0.878797 0.773961 88% extra(rebase_source) 0.865446 0.761603 88% author(mpm) or author(greg) 1.801832 1.576025 87% author(mpm) or desc(bug) 1.812438 1.593335 88% date(2015) or branch(default) 0.968276 0.875270 90% author(mpm) or desc(bug) or date(2015) or extra(rebase_source) 3.656193 3.183104 87% Pretty consistent speed-up across the board for any revset accessing changelog revision data. Not bad! It's also worth noting that PyPy appears to experience a similar to marginally greater speed-up as well! According to statprof, revsets accessing changelog revision data are now clearly dominated by zlib decompression (16-17% of execution time). Surprisingly, it appears the most expensive part of revision parsing are the various text.index() calls to search for newlines! These appear to cumulatively add up to 5+% of execution time. I reckon implementing the parsing in C would make things marginally faster. If accessing larger strings (such as the commit message), encoding.tolocal() is the most expensive procedure outside of decompression.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 06 Mar 2016 15:40:20 -0800
parents 1b8c7d59be43
children 98153441c8cc
line wrap: on
line source

#!/usr/bin/env python
#
# checkseclevel - checking section title levels in each online help document

import sys, os
import optparse

# import from the live mercurial repo
os.environ['HGMODULEPOLICY'] = 'py'
sys.path.insert(0, "..")
from mercurial import demandimport; demandimport.enable()
from mercurial.commands import table
from mercurial.help import helptable
from mercurial import extensions
from mercurial import minirst
from mercurial import ui as uimod

level2mark = ['"', '=', '-', '.', '#']
reservedmarks = ['"']

mark2level = {}
for m, l in zip(level2mark, xrange(len(level2mark))):
    if m not in reservedmarks:
        mark2level[m] = l

initlevel_topic = 0
initlevel_cmd = 1
initlevel_ext = 1
initlevel_ext_cmd = 3

def showavailables(ui, initlevel):
    ui.warn(('    available marks and order of them in this help: %s\n') %
            (', '.join(['%r' % (m * 4) for m in level2mark[initlevel + 1:]])))

def checkseclevel(ui, doc, name, initlevel):
    ui.note(('checking "%s"\n') % name)
    blocks, pruned = minirst.parse(doc, 0, ['verbose'])
    errorcnt = 0
    curlevel = initlevel
    for block in blocks:
        if block['type'] != 'section':
            continue
        mark = block['underline']
        title = block['lines'][0]
        if (mark not in mark2level) or (mark2level[mark] <= initlevel):
            ui.warn(('invalid section mark %r for "%s" of %s\n') %
                    (mark * 4, title, name))
            showavailables(ui, initlevel)
            errorcnt += 1
            continue
        nextlevel = mark2level[mark]
        if curlevel < nextlevel and curlevel + 1 != nextlevel:
            ui.warn(('gap of section level at "%s" of %s\n') %
                    (title, name))
            showavailables(ui, initlevel)
            errorcnt += 1
            continue
        ui.note(('appropriate section level for "%s %s"\n') %
                (mark * (nextlevel * 2), title))
        curlevel = nextlevel

    return errorcnt

def checkcmdtable(ui, cmdtable, namefmt, initlevel):
    errorcnt = 0
    for k, entry in cmdtable.items():
        name = k.split("|")[0].lstrip("^")
        if not entry[0].__doc__:
            ui.note(('skip checking %s: no help document\n') %
                    (namefmt % name))
            continue
        errorcnt += checkseclevel(ui, entry[0].__doc__,
                                  namefmt % name,
                                  initlevel)
    return errorcnt

def checkhghelps(ui):
    errorcnt = 0
    for names, sec, doc in helptable:
        if callable(doc):
            doc = doc(ui)
        errorcnt += checkseclevel(ui, doc,
                                  '%s help topic' % names[0],
                                  initlevel_topic)

    errorcnt += checkcmdtable(ui, table, '%s command', initlevel_cmd)

    for name in sorted(extensions.enabled().keys() +
                       extensions.disabled().keys()):
        mod = extensions.load(ui, name, None)
        if not mod.__doc__:
            ui.note(('skip checking %s extension: no help document\n') % name)
            continue
        errorcnt += checkseclevel(ui, mod.__doc__,
                                  '%s extension' % name,
                                  initlevel_ext)

        cmdtable = getattr(mod, 'cmdtable', None)
        if cmdtable:
            errorcnt += checkcmdtable(ui, cmdtable,
                                      '%s command of ' + name + ' extension',
                                      initlevel_ext_cmd)
    return errorcnt

def checkfile(ui, filename, initlevel):
    if filename == '-':
        filename = 'stdin'
        doc = sys.stdin.read()
    else:
        with open(filename) as fp:
            doc = fp.read()

    ui.note(('checking input from %s with initlevel %d\n') %
            (filename, initlevel))
    return checkseclevel(ui, doc, 'input from %s' % filename, initlevel)

def main():
    optparser = optparse.OptionParser("""%prog [options]

This checks all help documents of Mercurial (topics, commands,
extensions and commands of them), if no file is specified by --file
option.
""")
    optparser.add_option("-v", "--verbose",
                         help="enable additional output",
                         action="store_true")
    optparser.add_option("-d", "--debug",
                         help="debug mode",
                         action="store_true")
    optparser.add_option("-f", "--file",
                         help="filename to read in (or '-' for stdin)",
                         action="store", default="")

    optparser.add_option("-t", "--topic",
                         help="parse file as help topic",
                         action="store_const", dest="initlevel", const=0)
    optparser.add_option("-c", "--command",
                         help="parse file as help of core command",
                         action="store_const", dest="initlevel", const=1)
    optparser.add_option("-e", "--extension",
                         help="parse file as help of extension",
                         action="store_const", dest="initlevel", const=1)
    optparser.add_option("-C", "--extension-command",
                         help="parse file as help of extension command",
                         action="store_const", dest="initlevel", const=3)

    optparser.add_option("-l", "--initlevel",
                         help="set initial section level manually",
                         action="store", type="int", default=0)

    (options, args) = optparser.parse_args()

    ui = uimod.ui()
    ui.setconfig('ui', 'verbose', options.verbose, '--verbose')
    ui.setconfig('ui', 'debug', options.debug, '--debug')

    if options.file:
        if checkfile(ui, options.file, options.initlevel):
            sys.exit(1)
    else:
        if checkhghelps(ui):
            sys.exit(1)

if __name__ == "__main__":
    main()