view doc/gendoc.py @ 28495:70c2f8a98276

changelog: avoid slicing raw data until needed Before, we were slicing the original raw text and storing individual variables with values corresponding to each field. This is avoidable overhead. With this patch, we store the offsets of the fields at construction time and perform the slice when a property is accessed. This appears to show a very marginal performance win on its own and the gains are so small as to not be worth reporting. However, this patch marks the end of our parsing refactor, so it is worth reporting the gains from the entire series: author(mpm) 0.896565 0.795987 89% desc(bug) 0.887169 0.803438 90% date(2015) 0.878797 0.773961 88% extra(rebase_source) 0.865446 0.761603 88% author(mpm) or author(greg) 1.801832 1.576025 87% author(mpm) or desc(bug) 1.812438 1.593335 88% date(2015) or branch(default) 0.968276 0.875270 90% author(mpm) or desc(bug) or date(2015) or extra(rebase_source) 3.656193 3.183104 87% Pretty consistent speed-up across the board for any revset accessing changelog revision data. Not bad! It's also worth noting that PyPy appears to experience a similar to marginally greater speed-up as well! According to statprof, revsets accessing changelog revision data are now clearly dominated by zlib decompression (16-17% of execution time). Surprisingly, it appears the most expensive part of revision parsing are the various text.index() calls to search for newlines! These appear to cumulatively add up to 5+% of execution time. I reckon implementing the parsing in C would make things marginally faster. If accessing larger strings (such as the commit message), encoding.tolocal() is the most expensive procedure outside of decompression.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 06 Mar 2016 15:40:20 -0800
parents 512f883c234c
children ea1fab5293ca
line wrap: on
line source

#!/usr/bin/env python
"""usage: %s DOC ...

where DOC is the name of a document
"""

import os, sys, textwrap

# This script is executed during installs and may not have C extensions
# available. Relax C module requirements.
os.environ['HGMODULEPOLICY'] = 'allow'
# import from the live mercurial repo
sys.path.insert(0, "..")
from mercurial import demandimport; demandimport.enable()
from mercurial import minirst
from mercurial.commands import table, globalopts
from mercurial.i18n import gettext, _
from mercurial.help import helptable, loaddoc
from mercurial import extensions
from mercurial import ui as uimod

def get_desc(docstr):
    if not docstr:
        return "", ""
    # sanitize
    docstr = docstr.strip("\n")
    docstr = docstr.rstrip()
    shortdesc = docstr.splitlines()[0].strip()

    i = docstr.find("\n")
    if i != -1:
        desc = docstr[i + 2:]
    else:
        desc = shortdesc

    desc = textwrap.dedent(desc)

    return (shortdesc, desc)

def get_opts(opts):
    for opt in opts:
        if len(opt) == 5:
            shortopt, longopt, default, desc, optlabel = opt
        else:
            shortopt, longopt, default, desc = opt
            optlabel = _("VALUE")
        allopts = []
        if shortopt:
            allopts.append("-%s" % shortopt)
        if longopt:
            allopts.append("--%s" % longopt)
        if isinstance(default, list):
            allopts[-1] += " <%s[+]>" % optlabel
        elif (default is not None) and not isinstance(default, bool):
            allopts[-1] += " <%s>" % optlabel
        if '\n' in desc:
            # only remove line breaks and indentation
            desc = ' '.join(l.lstrip() for l in desc.split('\n'))
        desc += default and _(" (default: %s)") % default or ""
        yield (", ".join(allopts), desc)

def get_cmd(cmd, cmdtable):
    d = {}
    attr = cmdtable[cmd]
    cmds = cmd.lstrip("^").split("|")

    d['cmd'] = cmds[0]
    d['aliases'] = cmd.split("|")[1:]
    d['desc'] = get_desc(gettext(attr[0].__doc__))
    d['opts'] = list(get_opts(attr[1]))

    s = 'hg ' + cmds[0]
    if len(attr) > 2:
        if not attr[2].startswith('hg'):
            s += ' ' + attr[2]
        else:
            s = attr[2]
    d['synopsis'] = s.strip()

    return d

def showdoc(ui):
    # print options
    ui.write(minirst.section(_("Options")))
    multioccur = False
    for optstr, desc in get_opts(globalopts):
        ui.write("%s\n    %s\n\n" % (optstr, desc))
        if optstr.endswith("[+]>"):
            multioccur = True
    if multioccur:
        ui.write(_("\n[+] marked option can be specified multiple times\n"))
        ui.write("\n")

    # print cmds
    ui.write(minirst.section(_("Commands")))
    commandprinter(ui, table, minirst.subsection)

    # print help topics
    # The config help topic is included in the hgrc.5 man page.
    helpprinter(ui, helptable, minirst.section, exclude=['config'])

    ui.write(minirst.section(_("Extensions")))
    ui.write(_("This section contains help for extensions that are "
               "distributed together with Mercurial. Help for other "
               "extensions is available in the help system."))
    ui.write("\n\n"
             ".. contents::\n"
             "   :class: htmlonly\n"
             "   :local:\n"
             "   :depth: 1\n\n")

    for extensionname in sorted(allextensionnames()):
        mod = extensions.load(ui, extensionname, None)
        ui.write(minirst.subsection(extensionname))
        ui.write("%s\n\n" % gettext(mod.__doc__))
        cmdtable = getattr(mod, 'cmdtable', None)
        if cmdtable:
            ui.write(minirst.subsubsection(_('Commands')))
            commandprinter(ui, cmdtable, minirst.subsubsubsection)

def showtopic(ui, topic):
    extrahelptable = [
        (["common"], '', loaddoc('common')),
        (["hg.1"], '', loaddoc('hg.1')),
        (["hgignore.5"], '', loaddoc('hgignore.5')),
        (["hgrc.5"], '', loaddoc('hgrc.5')),
        (["hgignore.5.gendoc"], '', loaddoc('hgignore')),
        (["hgrc.5.gendoc"], '', loaddoc('config')),
    ]
    helpprinter(ui, helptable + extrahelptable, None, include=[topic])

def helpprinter(ui, helptable, sectionfunc, include=[], exclude=[]):
    for names, sec, doc in helptable:
        if exclude and names[0] in exclude:
            continue
        if include and names[0] not in include:
            continue
        for name in names:
            ui.write(".. _%s:\n" % name)
        ui.write("\n")
        if sectionfunc:
            ui.write(sectionfunc(sec))
        if callable(doc):
            doc = doc(ui)
        ui.write(doc)
        ui.write("\n")

def commandprinter(ui, cmdtable, sectionfunc):
    h = {}
    for c, attr in cmdtable.items():
        f = c.split("|")[0]
        f = f.lstrip("^")
        h[f] = c
    cmds = h.keys()
    cmds.sort()

    for f in cmds:
        if f.startswith("debug"):
            continue
        d = get_cmd(h[f], cmdtable)
        ui.write(sectionfunc(d['cmd']))
        # short description
        ui.write(d['desc'][0])
        # synopsis
        ui.write("::\n\n")
        synopsislines = d['synopsis'].splitlines()
        for line in synopsislines:
            # some commands (such as rebase) have a multi-line
            # synopsis
            ui.write("   %s\n" % line)
        ui.write('\n')
        # description
        ui.write("%s\n\n" % d['desc'][1])
        # options
        opt_output = list(d['opts'])
        if opt_output:
            opts_len = max([len(line[0]) for line in opt_output])
            ui.write(_("Options:\n\n"))
            multioccur = False
            for optstr, desc in opt_output:
                if desc:
                    s = "%-*s  %s" % (opts_len, optstr, desc)
                else:
                    s = optstr
                ui.write("%s\n" % s)
                if optstr.endswith("[+]>"):
                    multioccur = True
            if multioccur:
                ui.write(_("\n[+] marked option can be specified"
                           " multiple times\n"))
            ui.write("\n")
        # aliases
        if d['aliases']:
            ui.write(_("    aliases: %s\n\n") % " ".join(d['aliases']))


def allextensionnames():
    return extensions.enabled().keys() + extensions.disabled().keys()

if __name__ == "__main__":
    doc = 'hg.1.gendoc'
    if len(sys.argv) > 1:
        doc = sys.argv[1]

    ui = uimod.ui()
    if doc == 'hg.1.gendoc':
        showdoc(ui)
    else:
        showtopic(ui, sys.argv[1])