mercurial/minifileset.py
author Yuya Nishihara <yuya@tcha.org>
Sat, 21 Jul 2018 17:19:12 +0900
changeset 38904 899b4c74209c
parent 38869 7e7e2b2ff284
child 38918 e79a69af1593
permissions -rw-r--r--
fileset: combine union of basic patterns into single matcher This appears to improve query performance in a big repository than I thought. Writing less Python in a hot loop, faster computation we gain. $ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*' (orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000) (new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     1
# minifileset.py - a simple language to select files
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     2
#
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     3
# Copyright 2017 Facebook, Inc.
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     4
#
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     5
# This software may be used and distributed according to the terms of the
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     6
# GNU General Public License version 2 or any later version.
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     7
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     8
from __future__ import absolute_import
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
     9
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    10
from .i18n import _
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    11
from . import (
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    12
    error,
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    13
    fileset,
38845
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38844
diff changeset
    14
    filesetlang,
37920
9c98cb30f4de minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents: 35800
diff changeset
    15
    pycompat,
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    16
)
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    17
38687
1500cbe22d53 fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents: 37920
diff changeset
    18
def _sizep(x):
1500cbe22d53 fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents: 37920
diff changeset
    19
    # i18n: "size" is a keyword
38845
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38844
diff changeset
    20
    expr = filesetlang.getstring(x, _("size requires an expression"))
38687
1500cbe22d53 fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents: 37920
diff changeset
    21
    return fileset.sizematcher(expr)
1500cbe22d53 fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents: 37920
diff changeset
    22
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    23
def _compile(tree):
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    24
    if not tree:
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    25
        raise error.ParseError(_("missing argument"))
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    26
    op = tree[0]
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35740
diff changeset
    27
    if op in {'symbol', 'string', 'kindpat'}:
38845
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38844
diff changeset
    28
        name = filesetlang.getpattern(tree, {'path'}, _('invalid file pattern'))
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    29
        if name.startswith('**'): # file extension test, ex. "**.tar.gz"
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    30
            ext = name[2:]
37920
9c98cb30f4de minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents: 35800
diff changeset
    31
            for c in pycompat.bytestr(ext):
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    32
                if c in '*{}[]?/\\':
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    33
                    raise error.ParseError(_('reserved character: %s') % c)
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    34
            return lambda n, s: n.endswith(ext)
35740
06a757b9e334 minifileset: unify handling of symbol and string patterns
Yuya Nishihara <yuya@tcha.org>
parents: 35691
diff changeset
    35
        elif name.startswith('path:'): # directory or full path test
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    36
            p = name[5:] # prefix
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    37
            pl = len(p)
37920
9c98cb30f4de minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents: 35800
diff changeset
    38
            f = lambda n, s: n.startswith(p) and (len(n) == pl
9c98cb30f4de minifileset: fix on Python 3
Augie Fackler <augie@google.com>
parents: 35800
diff changeset
    39
                                                  or n[pl:pl + 1] == '/')
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    40
            return f
35800
d5288b966e2f minifileset: note the unsupported file pattern when raising a parse error
Matt Harbison <matt_harbison@yahoo.com>
parents: 35741
diff changeset
    41
        raise error.ParseError(_("unsupported file pattern: %s") % name,
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    42
                               hint=_('paths must be prefixed with "path:"'))
38904
899b4c74209c fileset: combine union of basic patterns into single matcher
Yuya Nishihara <yuya@tcha.org>
parents: 38869
diff changeset
    43
    elif op in {'or', 'patterns'}:
38844
d82c4d42b615 fileset: flatten 'or' nodes to unnest unionmatchers
Yuya Nishihara <yuya@tcha.org>
parents: 38687
diff changeset
    44
        funcs = [_compile(x) for x in tree[1:]]
d82c4d42b615 fileset: flatten 'or' nodes to unnest unionmatchers
Yuya Nishihara <yuya@tcha.org>
parents: 38687
diff changeset
    45
        return lambda n, s: any(f(n, s) for f in funcs)
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    46
    elif op == 'and':
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    47
        func1 = _compile(tree[1])
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    48
        func2 = _compile(tree[2])
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    49
        return lambda n, s: func1(n, s) and func2(n, s)
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    50
    elif op == 'not':
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    51
        return lambda n, s: not _compile(tree[1])(n, s)
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    52
    elif op == 'func':
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    53
        symbols = {
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    54
            'all': lambda n, s: True,
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    55
            'none': lambda n, s: False,
38687
1500cbe22d53 fileset: parse argument of size() by predicate function
Yuya Nishihara <yuya@tcha.org>
parents: 37920
diff changeset
    56
            'size': lambda n, s: _sizep(tree[2])(s),
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    57
        }
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    58
38845
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38844
diff changeset
    59
        name = filesetlang.getsymbol(tree[1])
35691
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35616
diff changeset
    60
        if name in symbols:
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    61
            return symbols[name]
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    62
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    63
        raise error.UnknownIdentifier(name, symbols.keys())
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    64
    elif op == 'minus':     # equivalent to 'x and not y'
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    65
        func1 = _compile(tree[1])
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    66
        func2 = _compile(tree[2])
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    67
        return lambda n, s: func1(n, s) and not func2(n, s)
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    68
    elif op == 'list':
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    69
        raise error.ParseError(_("can't use a list in this context"),
38850
4fe8d1f077b8 help: add quotes to a few commands we point to
Martin von Zweigbergk <martinvonz@google.com>
parents: 38845
diff changeset
    70
                               hint=_('see \'hg help "filesets.x or y"\''))
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    71
    raise error.ProgrammingError('illegal tree: %r' % (tree,))
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    72
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    73
def compile(text):
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    74
    """generate a function (path, size) -> bool from filter specification.
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    75
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    76
    "text" could contain the operators defined by the fileset language for
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    77
    common logic operations, and parenthesis for grouping.  The supported path
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    78
    tests are '**.extname' for file extension test, and '"path:dir/subdir"'
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    79
    for prefix test.  The ``size()`` predicate is borrowed from filesets to test
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    80
    file size.  The predicates ``all()`` and ``none()`` are also supported.
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    81
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35740
diff changeset
    82
    '(**.php & size(">10MB")) | **.zip | (path:bin & !path:bin/README)' for
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    83
    example, will catch all php files whose size is greater than 10 MB, all
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    84
    files whose name ends with ".zip", and all files under "bin" in the repo
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    85
    root except for "bin/README".
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    86
    """
38845
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38844
diff changeset
    87
    tree = filesetlang.parse(text)
38866
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38850
diff changeset
    88
    tree = filesetlang.analyze(tree)
38869
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38868
diff changeset
    89
    tree = filesetlang.optimize(tree)
35616
706aa203b396 fileset: add a lightweight file filtering language
Matt Harbison <matt_harbison@yahoo.com>
parents:
diff changeset
    90
    return _compile(tree)