mercurial/revset.py
author Nicolas Dumazet <nicdumz.commits@gmail.com>
Tue, 20 Jul 2010 14:42:05 +0900
changeset 11635 4da35e02b67c
parent 11467 6b836d5c8c9e
child 11650 ebaf117c2642
permissions -rw-r--r--
log: do not redefine cachefunc in walkchangerevs The same variable is defined a few blocks earlier. The first phases in walkchangerevs should in fact fill that cache, and allow faster lookups in the last phase. Redefining and overriding this cached function, (knowing that it will be called with the same arguments) defeats the caching purpose.

# revset.py - revision set queries for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import re
import parser, util, error, discovery
import match as _match
from i18n import _

elements = {
    "(": (20, ("group", 1, ")"), ("func", 1, ")")),
    "-": (19, ("negate", 19), ("minus", 19)),
    "::": (17, ("dagrangepre", 17), ("dagrange", 17),
           ("dagrangepost", 17)),
    "..": (17, ("dagrangepre", 17), ("dagrange", 17),
           ("dagrangepost", 17)),
    ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
    "not": (10, ("not", 10)),
    "!": (10, ("not", 10)),
    "and": (5, None, ("and", 5)),
    "&": (5, None, ("and", 5)),
    "or": (4, None, ("or", 4)),
    "|": (4, None, ("or", 4)),
    "+": (4, None, ("or", 4)),
    ",": (2, None, ("list", 2)),
    ")": (0, None, None),
    "symbol": (0, ("symbol",), None),
    "string": (0, ("string",), None),
    "end": (0, None, None),
}

keywords = set(['and', 'or', 'not'])

def tokenize(program):
    pos, l = 0, len(program)
    while pos < l:
        c = program[pos]
        if c.isspace(): # skip inter-token whitespace
            pass
        elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
            yield ('::', None, pos)
            pos += 1 # skip ahead
        elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
            yield ('..', None, pos)
            pos += 1 # skip ahead
        elif c in "():,-|&+!": # handle simple operators
            yield (c, None, pos)
        elif c in '"\'': # handle quoted strings
            pos += 1
            s = pos
            while pos < l: # find closing quote
                d = program[pos]
                if d == '\\': # skip over escaped characters
                    pos += 2
                    continue
                if d == c:
                    yield ('string', program[s:pos].decode('string-escape'), s)
                    break
                pos += 1
            else:
                raise error.ParseError(_("unterminated string"), s)
        elif c.isalnum() or c in '._' or ord(c) > 127: # gather up a symbol/keyword
            s = pos
            pos += 1
            while pos < l: # find end of symbol
                d = program[pos]
                if not (d.isalnum() or d in "._" or ord(d) > 127):
                    break
                if d == '.' and program[pos - 1] == '.': # special case for ..
                    pos -= 1
                    break
                pos += 1
            sym = program[s:pos]
            if sym in keywords: # operator keywords
                yield (sym, None, s)
            else:
                yield ('symbol', sym, s)
            pos -= 1
        else:
            raise error.ParseError(_("syntax error"), pos)
        pos += 1
    yield ('end', None, pos)

# helpers

def getstring(x, err):
    if x and (x[0] == 'string' or x[0] == 'symbol'):
        return x[1]
    raise error.ParseError(err)

def getlist(x):
    if not x:
        return []
    if x[0] == 'list':
        return getlist(x[1]) + [x[2]]
    return [x]

def getargs(x, min, max, err):
    l = getlist(x)
    if len(l) < min or len(l) > max:
        raise error.ParseError(err)
    return l

def getset(repo, subset, x):
    if not x:
        raise error.ParseError(_("missing argument"))
    return methods[x[0]](repo, subset, *x[1:])

# operator methods

def stringset(repo, subset, x):
    x = repo[x].rev()
    if x == -1 and len(subset) == len(repo):
        return [-1]
    if x in subset:
        return [x]
    return []

def symbolset(repo, subset, x):
    if x in symbols:
        raise error.ParseError(_("can't use %s here") % x)
    return stringset(repo, subset, x)

def rangeset(repo, subset, x, y):
    m = getset(repo, subset, x)
    if not m:
        m = getset(repo, range(len(repo)), x)

    n = getset(repo, subset, y)
    if not n:
        n = getset(repo, range(len(repo)), y)

    if not m or not n:
        return []
    m, n = m[0], n[-1]

    if m < n:
        r = range(m, n + 1)
    else:
        r = range(m, n - 1, -1)
    s = set(subset)
    return [x for x in r if x in s]

def andset(repo, subset, x, y):
    return getset(repo, getset(repo, subset, x), y)

def orset(repo, subset, x, y):
    s = set(getset(repo, subset, x))
    s |= set(getset(repo, [r for r in subset if r not in s], y))
    return [r for r in subset if r in s]

def notset(repo, subset, x):
    s = set(getset(repo, subset, x))
    return [r for r in subset if r not in s]

def listset(repo, subset, a, b):
    raise error.ParseError(_("can't use a list in this context"))

def func(repo, subset, a, b):
    if a[0] == 'symbol' and a[1] in symbols:
        return symbols[a[1]](repo, subset, b)
    raise error.ParseError(_("not a function: %s") % a[1])

# functions

def p1(repo, subset, x):
    ps = set()
    cl = repo.changelog
    for r in getset(repo, subset, x):
        ps.add(cl.parentrevs(r)[0])
    return [r for r in subset if r in ps]

def p2(repo, subset, x):
    ps = set()
    cl = repo.changelog
    for r in getset(repo, subset, x):
        ps.add(cl.parentrevs(r)[1])
    return [r for r in subset if r in ps]

def parents(repo, subset, x):
    ps = set()
    cl = repo.changelog
    for r in getset(repo, subset, x):
        ps.update(cl.parentrevs(r))
    return [r for r in subset if r in ps]

def maxrev(repo, subset, x):
    s = getset(repo, subset, x)
    if s:
        m = max(s)
        if m in subset:
            return [m]
    return []

def limit(repo, subset, x):
    l = getargs(x, 2, 2, _("limit wants two arguments"))
    try:
        lim = int(getstring(l[1], _("limit wants a number")))
    except ValueError:
        raise error.ParseError(_("limit expects a number"))
    return getset(repo, subset, l[0])[:lim]

def children(repo, subset, x):
    cs = set()
    cl = repo.changelog
    s = set(getset(repo, subset, x))
    for r in xrange(0, len(repo)):
        for p in cl.parentrevs(r):
            if p in s:
                cs.add(r)
    return [r for r in subset if r in cs]

def branch(repo, subset, x):
    s = getset(repo, range(len(repo)), x)
    b = set()
    for r in s:
        b.add(repo[r].branch())
    s = set(s)
    return [r for r in subset if r in s or repo[r].branch() in b]

def ancestor(repo, subset, x):
    l = getargs(x, 2, 2, _("ancestor wants two arguments"))
    a = getset(repo, subset, l[0])
    b = getset(repo, subset, l[1])
    if len(a) > 1 or len(b) > 1:
        raise error.ParseError(_("ancestor arguments must be single revisions"))
    return [repo[a[0]].ancestor(repo[b[0]]).rev()]

def ancestors(repo, subset, x):
    args = getset(repo, range(len(repo)), x)
    if not args:
        return []
    s = set(repo.changelog.ancestors(*args)) | set(args)
    return [r for r in subset if r in s]

def descendants(repo, subset, x):
    args = getset(repo, range(len(repo)), x)
    if not args:
        return []
    s = set(repo.changelog.descendants(*args)) | set(args)
    return [r for r in subset if r in s]

def follow(repo, subset, x):
    getargs(x, 0, 0, _("follow takes no arguments"))
    p = repo['.'].rev()
    s = set(repo.changelog.ancestors(p)) | set([p])
    return [r for r in subset if r in s]

def date(repo, subset, x):
    ds = getstring(x, _("date wants a string"))
    dm = util.matchdate(ds)
    return [r for r in subset if dm(repo[r].date()[0])]

def keyword(repo, subset, x):
    kw = getstring(x, _("keyword wants a string")).lower()
    l = []
    for r in subset:
        c = repo[r]
        t = " ".join(c.files() + [c.user(), c.description()])
        if kw in t.lower():
            l.append(r)
    return l

def grep(repo, subset, x):
    gr = re.compile(getstring(x, _("grep wants a string")))
    l = []
    for r in subset:
        c = repo[r]
        for e in c.files() + [c.user(), c.description()]:
            if gr.search(e):
                l.append(r)
                continue
    return l

def author(repo, subset, x):
    n = getstring(x, _("author wants a string")).lower()
    return [r for r in subset if n in repo[r].user().lower()]

def hasfile(repo, subset, x):
    pat = getstring(x, _("file wants a pattern"))
    m = _match.match(repo.root, repo.getcwd(), [pat])
    s = []
    for r in subset:
        for f in repo[r].files():
            if m(f):
                s.append(r)
                continue
    return s

def contains(repo, subset, x):
    pat = getstring(x, _("contains wants a pattern"))
    m = _match.match(repo.root, repo.getcwd(), [pat])
    s = []
    if m.files() == [pat]:
        for r in subset:
            if pat in repo[r]:
                s.append(r)
                continue
    else:
        for r in subset:
            for f in repo[r].manifest():
                if m(f):
                    s.append(r)
                    continue
    return s

def checkstatus(repo, subset, pat, field):
    m = _match.match(repo.root, repo.getcwd(), [pat])
    s = []
    fast = (m.files() == [pat])
    for r in subset:
        c = repo[r]
        if fast:
            if pat not in c.files():
                continue
        else:
            for f in c.files():
                if m(f):
                    break
            else:
                continue
        files = repo.status(c.p1().node(), c.node())[field]
        if fast:
            if pat in files:
                s.append(r)
                continue
        else:
            for f in files:
                if m(f):
                    s.append(r)
                    continue
    return s

def modifies(repo, subset, x):
    pat = getstring(x, _("modifies wants a pattern"))
    return checkstatus(repo, subset, pat, 0)

def adds(repo, subset, x):
    pat = getstring(x, _("adds wants a pattern"))
    return checkstatus(repo, subset, pat, 1)

def removes(repo, subset, x):
    pat = getstring(x, _("removes wants a pattern"))
    return checkstatus(repo, subset, pat, 2)

def merge(repo, subset, x):
    getargs(x, 0, 0, _("merge takes no arguments"))
    cl = repo.changelog
    return [r for r in subset if cl.parentrevs(r)[1] != -1]

def closed(repo, subset, x):
    getargs(x, 0, 0, _("closed takes no arguments"))
    return [r for r in subset if repo[r].extra().get('close')]

def head(repo, subset, x):
    getargs(x, 0, 0, _("head takes no arguments"))
    hs = set()
    for b, ls in repo.branchmap().iteritems():
        hs.update(repo[h].rev() for h in ls)
    return [r for r in subset if r in hs]

def reverse(repo, subset, x):
    l = getset(repo, subset, x)
    l.reverse()
    return l

def sort(repo, subset, x):
    l = getargs(x, 1, 2, _("sort wants one or two arguments"))
    keys = "rev"
    if len(l) == 2:
        keys = getstring(l[1], _("sort spec must be a string"))

    s = l[0]
    keys = keys.split()
    l = []
    def invert(s):
        return "".join(chr(255 - ord(c)) for c in s)
    for r in getset(repo, subset, s):
        c = repo[r]
        e = []
        for k in keys:
            if k == 'rev':
                e.append(r)
            elif k == '-rev':
                e.append(-r)
            elif k == 'branch':
                e.append(c.branch())
            elif k == '-branch':
                e.append(invert(c.branch()))
            elif k == 'desc':
                e.append(c.description())
            elif k == '-desc':
                e.append(invert(c.description()))
            elif k in 'user author':
                e.append(c.user())
            elif k in '-user -author':
                e.append(invert(c.user()))
            elif k == 'date':
                e.append(c.date()[0])
            elif k == '-date':
                e.append(-c.date()[0])
            else:
                raise error.ParseError(_("unknown sort key %r") % k)
        e.append(r)
        l.append(e)
    l.sort()
    return [e[-1] for e in l]

def getall(repo, subset, x):
    getargs(x, 0, 0, _("all takes no arguments"))
    return subset

def heads(repo, subset, x):
    s = getset(repo, subset, x)
    ps = set(parents(repo, subset, x))
    return [r for r in s if r not in ps]

def roots(repo, subset, x):
    s = getset(repo, subset, x)
    cs = set(children(repo, subset, x))
    return [r for r in s if r not in cs]

def outgoing(repo, subset, x):
    import hg # avoid start-up nasties
    l = getargs(x, 0, 1, _("outgoing wants a repository path"))
    dest = l[1:] or ''
    dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
    dest, branches = hg.parseurl(dest)
    other = hg.repository(hg.remoteui(repo, {}), dest)
    repo.ui.pushbuffer()
    o = discovery.findoutgoing(repo, other)
    repo.ui.popbuffer()
    cl = repo.changelog
    o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
    return [r for r in subset if r in o]

def tagged(repo, subset, x):
    getargs(x, 0, 0, _("tagged takes no arguments"))
    cl = repo.changelog
    s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
    return [r for r in subset if r in s]

symbols = {
    "adds": adds,
    "all": getall,
    "ancestor": ancestor,
    "ancestors": ancestors,
    "author": author,
    "branch": branch,
    "children": children,
    "closed": closed,
    "contains": contains,
    "date": date,
    "descendants": descendants,
    "file": hasfile,
    "follow": follow,
    "grep": grep,
    "head": head,
    "heads": heads,
    "keyword": keyword,
    "limit": limit,
    "max": maxrev,
    "merge": merge,
    "modifies": modifies,
    "outgoing": outgoing,
    "p1": p1,
    "p2": p2,
    "parents": parents,
    "removes": removes,
    "reverse": reverse,
    "roots": roots,
    "sort": sort,
    "tagged": tagged,
    "user": author,
}

methods = {
    "range": rangeset,
    "string": stringset,
    "symbol": symbolset,
    "and": andset,
    "or": orset,
    "not": notset,
    "list": listset,
    "func": func,
}

def optimize(x, small):
    if x == None:
        return 0, x

    smallbonus = 1
    if small:
        smallbonus = .5

    op = x[0]
    if op == 'minus':
        return optimize(('and', x[1], ('not', x[2])), small)
    elif op == 'dagrange':
        return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
                         ('func', ('symbol', 'ancestors'), x[2])), small)
    elif op == 'dagrangepre':
        return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
    elif op == 'dagrangepost':
        return optimize(('func', ('symbol', 'descendants'), x[1]), small)
    elif op == 'rangepre':
        return optimize(('range', ('string', '0'), x[1]), small)
    elif op == 'rangepost':
        return optimize(('range', x[1], ('string', 'tip')), small)
    elif op == 'negate':
        return optimize(('string',
                         '-' + getstring(x[1], _("can't negate that"))), small)
    elif op in 'string symbol negate':
        return smallbonus, x # single revisions are small
    elif op == 'and' or op == 'dagrange':
        wa, ta = optimize(x[1], True)
        wb, tb = optimize(x[2], True)
        w = min(wa, wb)
        if wa > wb:
            return w, (op, tb, ta)
        return w, (op, ta, tb)
    elif op == 'or':
        wa, ta = optimize(x[1], False)
        wb, tb = optimize(x[2], False)
        if wb < wa:
            wb, wa = wa, wb
        return max(wa, wb), (op, ta, tb)
    elif op == 'not':
        o = optimize(x[1], not small)
        return o[0], (op, o[1])
    elif op == 'group':
        return optimize(x[1], small)
    elif op in 'range list':
        wa, ta = optimize(x[1], small)
        wb, tb = optimize(x[2], small)
        return wa + wb, (op, ta, tb)
    elif op == 'func':
        f = getstring(x[1], _("not a symbol"))
        wa, ta = optimize(x[2], small)
        if f in "grep date user author keyword branch file":
            w = 10 # slow
        elif f in "modifies adds removes outgoing":
            w = 30 # slower
        elif f == "contains":
            w = 100 # very slow
        elif f == "ancestor":
            w = 1 * smallbonus
        elif f == "reverse limit":
            w = 0
        elif f in "sort":
            w = 10 # assume most sorts look at changelog
        else:
            w = 1
        return w + wa, (op, x[1], ta)
    return 1, x

parse = parser.parser(tokenize, elements).parse

def match(spec):
    if not spec:
        raise error.ParseError(_("empty query"))
    tree = parse(spec)
    weight, tree = optimize(tree, True)
    def mfunc(repo, subset):
        return getset(repo, subset, tree)
    return mfunc