view mercurial/fancyopts.py @ 42743:8c9a6adec67a

rust-discovery: using the children cache in add_missing The DAG range computation often needs to get back to very old revisions, and turns out to be disproportionately long, given that the end goal is to remove the descendents of the given missing revisons from the undecided set. The fast iteration capabilities available in the Rust case make it possible to avoid the DAG range entirely, at the cost of precomputing the children cache, and to simply iterate on children of the given missing revisions. This is a case where staying on the same side of the interface between the two languages has clear benefits. On discoveries with initial undecided sets small enough to bypass sampling entirely, the total cost of computing the children cache and the subsequent iteration becomes better than the Python + C counterpart, which relies on reachableroots2. For example, on a repo with more than one million revisions with an initial undecided set of 11 elements, we get these figures: Rust version with simple iteration addcommons: 57.287us first undecided computation: 184.278334ms first children cache computation: 131.056us addmissings iteration: 42.766us first addinfo total: 185.24 ms Python + C version first addcommons: 0.29 ms addcommons 0.21 ms first undecided computation 191.35 ms addmissings 45.75 ms first addinfo total: 237.77 ms On discoveries with large undecided sets, the initial price paid makes the first addinfo slower than the Python + C version, but that's more than compensated by the gain in sampling and subsequent iterations. Here's an extreme example with an undecided set of a million revisions: Rust version: first undecided computation: 293.842629ms first children cache computation: 407.911297ms addmissings iteration: 34.312869ms first addinfo total: 776.02 ms taking initial sample query 2: sampling time: 1318.38 ms query 2; still undecided: 1005013, sample size is: 200 addmissings: 143.062us Python + C version: first undecided computation 298.13 ms addmissings 80.13 ms first addinfo total: 399.62 ms taking initial sample query 2: sampling time: 3957.23 ms query 2; still undecided: 1005013, sample size is: 200 addmissings 52.88 ms Differential Revision: https://phab.mercurial-scm.org/D6428
author Georges Racinet <georges.racinet@octobus.net>
date Tue, 16 Apr 2019 01:16:39 +0200
parents 39e5e346eba7
children 2372284d9457
line wrap: on
line source

# fancyopts.py - better command line parsing
#
#  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import abc
import functools

from .i18n import _
from . import (
    error,
    pycompat,
)

# Set of flags to not apply boolean negation logic on
nevernegate = {
    # avoid --no-noninteractive
    'noninteractive',
    # These two flags are special because they cause hg to do one
    # thing and then exit, and so aren't suitable for use in things
    # like aliases anyway.
    'help',
    'version',
}

def _earlyoptarg(arg, shortlist, namelist):
    """Check if the given arg is a valid unabbreviated option

    Returns (flag_str, has_embedded_value?, embedded_value, takes_value?)

    >>> def opt(arg):
    ...     return _earlyoptarg(arg, b'R:q', [b'cwd=', b'debugger'])

    long form:

    >>> opt(b'--cwd')
    ('--cwd', False, '', True)
    >>> opt(b'--cwd=')
    ('--cwd', True, '', True)
    >>> opt(b'--cwd=foo')
    ('--cwd', True, 'foo', True)
    >>> opt(b'--debugger')
    ('--debugger', False, '', False)
    >>> opt(b'--debugger=')  # invalid but parsable
    ('--debugger', True, '', False)

    short form:

    >>> opt(b'-R')
    ('-R', False, '', True)
    >>> opt(b'-Rfoo')
    ('-R', True, 'foo', True)
    >>> opt(b'-q')
    ('-q', False, '', False)
    >>> opt(b'-qfoo')  # invalid but parsable
    ('-q', True, 'foo', False)

    unknown or invalid:

    >>> opt(b'--unknown')
    ('', False, '', False)
    >>> opt(b'-u')
    ('', False, '', False)
    >>> opt(b'-ufoo')
    ('', False, '', False)
    >>> opt(b'--')
    ('', False, '', False)
    >>> opt(b'-')
    ('', False, '', False)
    >>> opt(b'-:')
    ('', False, '', False)
    >>> opt(b'-:foo')
    ('', False, '', False)
    """
    if arg.startswith('--'):
        flag, eq, val = arg.partition('=')
        if flag[2:] in namelist:
            return flag, bool(eq), val, False
        if flag[2:] + '=' in namelist:
            return flag, bool(eq), val, True
    elif arg.startswith('-') and arg != '-' and not arg.startswith('-:'):
        flag, val = arg[:2], arg[2:]
        i = shortlist.find(flag[1:])
        if i >= 0:
            return flag, bool(val), val, shortlist.startswith(':', i + 1)
    return '', False, '', False

def earlygetopt(args, shortlist, namelist, gnu=False, keepsep=False):
    """Parse options like getopt, but ignores unknown options and abbreviated
    forms

    If gnu=False, this stops processing options as soon as a non/unknown-option
    argument is encountered. Otherwise, option and non-option arguments may be
    intermixed, and unknown-option arguments are taken as non-option.

    If keepsep=True, '--' won't be removed from the list of arguments left.
    This is useful for stripping early options from a full command arguments.

    >>> def get(args, gnu=False, keepsep=False):
    ...     return earlygetopt(args, b'R:q', [b'cwd=', b'debugger'],
    ...                        gnu=gnu, keepsep=keepsep)

    default parsing rules for early options:

    >>> get([b'x', b'--cwd', b'foo', b'-Rbar', b'-q', b'y'], gnu=True)
    ([('--cwd', 'foo'), ('-R', 'bar'), ('-q', '')], ['x', 'y'])
    >>> get([b'x', b'--cwd=foo', b'y', b'-R', b'bar', b'--debugger'], gnu=True)
    ([('--cwd', 'foo'), ('-R', 'bar'), ('--debugger', '')], ['x', 'y'])
    >>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=True)
    ([('--cwd', 'foo')], ['--unknown', '--debugger'])

    restricted parsing rules (early options must come first):

    >>> get([b'--cwd', b'foo', b'-Rbar', b'x', b'-q', b'y'], gnu=False)
    ([('--cwd', 'foo'), ('-R', 'bar')], ['x', '-q', 'y'])
    >>> get([b'--cwd=foo', b'x', b'y', b'-R', b'bar', b'--debugger'], gnu=False)
    ([('--cwd', 'foo')], ['x', 'y', '-R', 'bar', '--debugger'])
    >>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=False)
    ([], ['--unknown', '--cwd=foo', '--', '--debugger'])

    stripping early options (without loosing '--'):

    >>> get([b'x', b'-Rbar', b'--', '--debugger'], gnu=True, keepsep=True)[1]
    ['x', '--', '--debugger']

    last argument:

    >>> get([b'--cwd'])
    ([], ['--cwd'])
    >>> get([b'--cwd=foo'])
    ([('--cwd', 'foo')], [])
    >>> get([b'-R'])
    ([], ['-R'])
    >>> get([b'-Rbar'])
    ([('-R', 'bar')], [])
    >>> get([b'-q'])
    ([('-q', '')], [])
    >>> get([b'-q', b'--'])
    ([('-q', '')], [])

    '--' may be a value:

    >>> get([b'-R', b'--', b'x'])
    ([('-R', '--')], ['x'])
    >>> get([b'--cwd', b'--', b'x'])
    ([('--cwd', '--')], ['x'])

    value passed to bool options:

    >>> get([b'--debugger=foo', b'x'])
    ([], ['--debugger=foo', 'x'])
    >>> get([b'-qfoo', b'x'])
    ([], ['-qfoo', 'x'])

    short option isn't separated with '=':

    >>> get([b'-R=bar'])
    ([('-R', '=bar')], [])

    ':' may be in shortlist, but shouldn't be taken as an option letter:

    >>> get([b'-:', b'y'])
    ([], ['-:', 'y'])

    '-' is a valid non-option argument:

    >>> get([b'-', b'y'])
    ([], ['-', 'y'])
    """
    parsedopts = []
    parsedargs = []
    pos = 0
    while pos < len(args):
        arg = args[pos]
        if arg == '--':
            pos += not keepsep
            break
        flag, hasval, val, takeval = _earlyoptarg(arg, shortlist, namelist)
        if not hasval and takeval and pos + 1 >= len(args):
            # missing last argument
            break
        if not flag or hasval and not takeval:
            # non-option argument or -b/--bool=INVALID_VALUE
            if gnu:
                parsedargs.append(arg)
                pos += 1
            else:
                break
        elif hasval == takeval:
            # -b/--bool or -s/--str=VALUE
            parsedopts.append((flag, val))
            pos += 1
        else:
            # -s/--str VALUE
            parsedopts.append((flag, args[pos + 1]))
            pos += 2

    parsedargs.extend(args[pos:])
    return parsedopts, parsedargs

class customopt(object):
    """Manage defaults and mutations for any type of opt."""

    __metaclass__ = abc.ABCMeta

    def __init__(self, defaultvalue):
        self._defaultvalue = defaultvalue

    def _isboolopt(self):
        return False

    def getdefaultvalue(self):
        """Returns the default value for this opt.

        Subclasses should override this to return a new value if the value type
        is mutable."""
        return self._defaultvalue

    @abc.abstractmethod
    def newstate(self, oldstate, newparam, abort):
        """Adds newparam to oldstate and returns the new state.

        On failure, abort can be called with a string error message."""

class _simpleopt(customopt):
    def _isboolopt(self):
        return isinstance(self._defaultvalue, (bool, type(None)))

    def newstate(self, oldstate, newparam, abort):
        return newparam

class _callableopt(customopt):
    def __init__(self, callablefn):
        self.callablefn = callablefn
        super(_callableopt, self).__init__(None)

    def newstate(self, oldstate, newparam, abort):
        return self.callablefn(newparam)

class _listopt(customopt):
    def getdefaultvalue(self):
        return self._defaultvalue[:]

    def newstate(self, oldstate, newparam, abort):
        oldstate.append(newparam)
        return oldstate

class _intopt(customopt):
    def newstate(self, oldstate, newparam, abort):
        try:
            return int(newparam)
        except ValueError:
            abort(_('expected int'))

def _defaultopt(default):
    """Returns a default opt implementation, given a default value."""

    if isinstance(default, customopt):
        return default
    elif callable(default):
        return _callableopt(default)
    elif isinstance(default, list):
        return _listopt(default[:])
    elif type(default) is type(1):
        return _intopt(default)
    else:
        return _simpleopt(default)

def fancyopts(args, options, state, gnu=False, early=False, optaliases=None):
    """
    read args, parse options, and store options in state

    each option is a tuple of:

      short option or ''
      long option
      default value
      description
      option value label(optional)

    option types include:

      boolean or none - option sets variable in state to true
      string - parameter string is stored in state
      list - parameter string is added to a list
      integer - parameter strings is stored as int
      function - call function with parameter
      customopt - subclass of 'customopt'

    optaliases is a mapping from a canonical option name to a list of
    additional long options. This exists for preserving backward compatibility
    of early options. If we want to use it extensively, please consider moving
    the functionality to the options table (e.g separate long options by '|'.)

    non-option args are returned
    """
    if optaliases is None:
        optaliases = {}
    namelist = []
    shortlist = ''
    argmap = {}
    defmap = {}
    negations = {}
    alllong = set(o[1] for o in options)

    for option in options:
        if len(option) == 5:
            short, name, default, comment, dummy = option
        else:
            short, name, default, comment = option
        # convert opts to getopt format
        onames = [name]
        onames.extend(optaliases.get(name, []))
        name = name.replace('-', '_')

        argmap['-' + short] = name
        for n in onames:
            argmap['--' + n] = name
        defmap[name] = _defaultopt(default)

        # copy defaults to state
        state[name] = defmap[name].getdefaultvalue()

        # does it take a parameter?
        if not defmap[name]._isboolopt():
            if short:
                short += ':'
            onames = [n + '=' for n in onames]
        elif name not in nevernegate:
            for n in onames:
                if n.startswith('no-'):
                    insert = n[3:]
                else:
                    insert = 'no-' + n
                # backout (as a practical example) has both --commit and
                # --no-commit options, so we don't want to allow the
                # negations of those flags.
                if insert not in alllong:
                    assert ('--' + n) not in negations
                    negations['--' + insert] = '--' + n
                    namelist.append(insert)
        if short:
            shortlist += short
        if name:
            namelist.extend(onames)

    # parse arguments
    if early:
        parse = functools.partial(earlygetopt, gnu=gnu)
    elif gnu:
        parse = pycompat.gnugetoptb
    else:
        parse = pycompat.getoptb
    opts, args = parse(args, shortlist, namelist)

    # transfer result to state
    for opt, val in opts:
        boolval = True
        negation = negations.get(opt, False)
        if negation:
            opt = negation
            boolval = False
        name = argmap[opt]
        obj = defmap[name]
        if obj._isboolopt():
            state[name] = boolval
        else:
            def abort(s):
                raise error.Abort(_('invalid value %r for option %s, %s')
                                  % (pycompat.maybebytestr(val), opt, s))
            state[name] = defmap[name].newstate(state[name], val, abort)

    # return unparsed args
    return args