Mercurial > hg
view mercurial/fileset.py @ 38643:967fee55e8d9
revlog: postprocess chunk to slice them down to a certain size
After the density slicing is done, we enforce a maximum chunk size to avoid
memory consumption issue.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Tue, 10 Jul 2018 11:57:33 +0200 |
parents | 5cbcbe51d38d |
children | 07b551a4df44 |
line wrap: on
line source
# fileset.py - file set queries for mercurial # # Copyright 2010 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import re from .i18n import _ from . import ( error, match as matchmod, merge, parser, pycompat, registrar, scmutil, util, ) from .utils import ( stringutil, ) elements = { # token-type: binding-strength, primary, prefix, infix, suffix "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), ":": (15, None, None, ("kindpat", 15), None), "-": (5, None, ("negate", 19), ("minus", 5), None), "not": (10, None, ("not", 10), None, None), "!": (10, None, ("not", 10), None, None), "and": (5, None, None, ("and", 5), None), "&": (5, None, None, ("and", 5), None), "or": (4, None, None, ("or", 4), None), "|": (4, None, None, ("or", 4), None), "+": (4, None, None, ("or", 4), None), ",": (2, None, None, ("list", 2), None), ")": (0, None, None, None, None), "symbol": (0, "symbol", None, None, None), "string": (0, "string", None, None, None), "end": (0, None, None, None, None), } keywords = {'and', 'or', 'not'} globchars = ".*{}[]?/\\_" def tokenize(program): pos, l = 0, len(program) program = pycompat.bytestr(program) while pos < l: c = program[pos] if c.isspace(): # skip inter-token whitespace pass elif c in "(),-:|&+!": # handle simple operators yield (c, None, pos) elif (c in '"\'' or c == 'r' and program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings if c == 'r': pos += 1 c = program[pos] decode = lambda x: x else: decode = parser.unescapestr pos += 1 s = pos while pos < l: # find closing quote d = program[pos] if d == '\\': # skip over escaped characters pos += 2 continue if d == c: yield ('string', decode(program[s:pos]), s) break pos += 1 else: raise error.ParseError(_("unterminated string"), s) elif c.isalnum() or c in globchars or ord(c) > 127: # gather up a symbol/keyword s = pos pos += 1 while pos < l: # find end of symbol d = program[pos] if not (d.isalnum() or d in globchars or ord(d) > 127): break pos += 1 sym = program[s:pos] if sym in keywords: # operator keywords yield (sym, None, s) else: yield ('symbol', sym, s) pos -= 1 else: raise error.ParseError(_("syntax error"), pos) pos += 1 yield ('end', None, pos) def parse(expr): p = parser.parser(elements) tree, pos = p.parse(tokenize(expr)) if pos != len(expr): raise error.ParseError(_("invalid token"), pos) return tree def getsymbol(x): if x and x[0] == 'symbol': return x[1] raise error.ParseError(_('not a symbol')) def getstring(x, err): if x and (x[0] == 'string' or x[0] == 'symbol'): return x[1] raise error.ParseError(err) def _getkindpat(x, y, allkinds, err): kind = getsymbol(x) pat = getstring(y, err) if kind not in allkinds: raise error.ParseError(_("invalid pattern kind: %s") % kind) return '%s:%s' % (kind, pat) def getpattern(x, allkinds, err): if x and x[0] == 'kindpat': return _getkindpat(x[1], x[2], allkinds, err) return getstring(x, err) def getlist(x): if not x: return [] if x[0] == 'list': return getlist(x[1]) + [x[2]] return [x] def getargs(x, min, max, err): l = getlist(x) if len(l) < min or len(l) > max: raise error.ParseError(err) return l def getset(mctx, x): if not x: raise error.ParseError(_("missing argument")) return methods[x[0]](mctx, *x[1:]) def stringset(mctx, x): m = mctx.matcher([x]) return [f for f in mctx.subset if m(f)] def kindpatset(mctx, x, y): return stringset(mctx, _getkindpat(x, y, matchmod.allpatternkinds, _("pattern must be a string"))) def andset(mctx, x, y): return getset(mctx.narrow(getset(mctx, x)), y) def orset(mctx, x, y): # needs optimizing xl = getset(mctx, x) yl = getset(mctx, y) return xl + [f for f in yl if f not in xl] def notset(mctx, x): s = set(getset(mctx, x)) return [r for r in mctx.subset if r not in s] def minusset(mctx, x, y): xl = getset(mctx, x) yl = set(getset(mctx, y)) return [f for f in xl if f not in yl] def negateset(mctx, x): raise error.ParseError(_("can't use negate operator in this context")) def listset(mctx, a, b): raise error.ParseError(_("can't use a list in this context"), hint=_('see hg help "filesets.x or y"')) def func(mctx, a, b): funcname = getsymbol(a) if funcname in symbols: enabled = mctx._existingenabled mctx._existingenabled = funcname in _existingcallers try: return symbols[funcname](mctx, b) finally: mctx._existingenabled = enabled keep = lambda fn: getattr(fn, '__doc__', None) is not None syms = [s for (s, fn) in symbols.items() if keep(fn)] raise error.UnknownIdentifier(funcname, syms) # symbols are callable like: # fun(mctx, x) # with: # mctx - current matchctx instance # x - argument in tree form symbols = {} # filesets using matchctx.status() _statuscallers = set() # filesets using matchctx.existing() _existingcallers = set() predicate = registrar.filesetpredicate() @predicate('modified()', callstatus=True) def modified(mctx, x): """File that is modified according to :hg:`status`. """ # i18n: "modified" is a keyword getargs(x, 0, 0, _("modified takes no arguments")) s = set(mctx.status().modified) return [f for f in mctx.subset if f in s] @predicate('added()', callstatus=True) def added(mctx, x): """File that is added according to :hg:`status`. """ # i18n: "added" is a keyword getargs(x, 0, 0, _("added takes no arguments")) s = set(mctx.status().added) return [f for f in mctx.subset if f in s] @predicate('removed()', callstatus=True) def removed(mctx, x): """File that is removed according to :hg:`status`. """ # i18n: "removed" is a keyword getargs(x, 0, 0, _("removed takes no arguments")) s = set(mctx.status().removed) return [f for f in mctx.subset if f in s] @predicate('deleted()', callstatus=True) def deleted(mctx, x): """Alias for ``missing()``. """ # i18n: "deleted" is a keyword getargs(x, 0, 0, _("deleted takes no arguments")) s = set(mctx.status().deleted) return [f for f in mctx.subset if f in s] @predicate('missing()', callstatus=True) def missing(mctx, x): """File that is missing according to :hg:`status`. """ # i18n: "missing" is a keyword getargs(x, 0, 0, _("missing takes no arguments")) s = set(mctx.status().deleted) return [f for f in mctx.subset if f in s] @predicate('unknown()', callstatus=True) def unknown(mctx, x): """File that is unknown according to :hg:`status`. These files will only be considered if this predicate is used. """ # i18n: "unknown" is a keyword getargs(x, 0, 0, _("unknown takes no arguments")) s = set(mctx.status().unknown) return [f for f in mctx.subset if f in s] @predicate('ignored()', callstatus=True) def ignored(mctx, x): """File that is ignored according to :hg:`status`. These files will only be considered if this predicate is used. """ # i18n: "ignored" is a keyword getargs(x, 0, 0, _("ignored takes no arguments")) s = set(mctx.status().ignored) return [f for f in mctx.subset if f in s] @predicate('clean()', callstatus=True) def clean(mctx, x): """File that is clean according to :hg:`status`. """ # i18n: "clean" is a keyword getargs(x, 0, 0, _("clean takes no arguments")) s = set(mctx.status().clean) return [f for f in mctx.subset if f in s] @predicate('binary()', callexisting=True) def binary(mctx, x): """File that appears to be binary (contains NUL bytes). """ # i18n: "binary" is a keyword getargs(x, 0, 0, _("binary takes no arguments")) return [f for f in mctx.existing() if mctx.ctx[f].isbinary()] @predicate('exec()', callexisting=True) def exec_(mctx, x): """File that is marked as executable. """ # i18n: "exec" is a keyword getargs(x, 0, 0, _("exec takes no arguments")) return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x'] @predicate('symlink()', callexisting=True) def symlink(mctx, x): """File that is marked as a symlink. """ # i18n: "symlink" is a keyword getargs(x, 0, 0, _("symlink takes no arguments")) return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l'] @predicate('resolved()') def resolved(mctx, x): """File that is marked resolved according to :hg:`resolve -l`. """ # i18n: "resolved" is a keyword getargs(x, 0, 0, _("resolved takes no arguments")) if mctx.ctx.rev() is not None: return [] ms = merge.mergestate.read(mctx.ctx.repo()) return [f for f in mctx.subset if f in ms and ms[f] == 'r'] @predicate('unresolved()') def unresolved(mctx, x): """File that is marked unresolved according to :hg:`resolve -l`. """ # i18n: "unresolved" is a keyword getargs(x, 0, 0, _("unresolved takes no arguments")) if mctx.ctx.rev() is not None: return [] ms = merge.mergestate.read(mctx.ctx.repo()) return [f for f in mctx.subset if f in ms and ms[f] == 'u'] @predicate('hgignore()') def hgignore(mctx, x): """File that matches the active .hgignore pattern. """ # i18n: "hgignore" is a keyword getargs(x, 0, 0, _("hgignore takes no arguments")) ignore = mctx.ctx.repo().dirstate._ignore return [f for f in mctx.subset if ignore(f)] @predicate('portable()') def portable(mctx, x): """File that has a portable name. (This doesn't include filenames with case collisions.) """ # i18n: "portable" is a keyword getargs(x, 0, 0, _("portable takes no arguments")) checkwinfilename = util.checkwinfilename return [f for f in mctx.subset if checkwinfilename(f) is None] @predicate('grep(regex)', callexisting=True) def grep(mctx, x): """File contains the given regular expression. """ try: # i18n: "grep" is a keyword r = re.compile(getstring(x, _("grep requires a pattern"))) except re.error as e: raise error.ParseError(_('invalid match pattern: %s') % stringutil.forcebytestr(e)) return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())] def _sizetomax(s): try: s = s.strip().lower() for k, v in util._sizeunits: if s.endswith(k): # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1 n = s[:-len(k)] inc = 1.0 if "." in n: inc /= 10 ** len(n.split(".")[1]) return int((float(n) + inc) * v) - 1 # no extension, this is a precise value return int(s) except ValueError: raise error.ParseError(_("couldn't parse size: %s") % s) def sizematcher(x): """Return a function(size) -> bool from the ``size()`` expression""" # i18n: "size" is a keyword expr = getstring(x, _("size requires an expression")).strip() if '-' in expr: # do we have a range? a, b = expr.split('-', 1) a = util.sizetoint(a) b = util.sizetoint(b) return lambda x: x >= a and x <= b elif expr.startswith("<="): a = util.sizetoint(expr[2:]) return lambda x: x <= a elif expr.startswith("<"): a = util.sizetoint(expr[1:]) return lambda x: x < a elif expr.startswith(">="): a = util.sizetoint(expr[2:]) return lambda x: x >= a elif expr.startswith(">"): a = util.sizetoint(expr[1:]) return lambda x: x > a else: a = util.sizetoint(expr) b = _sizetomax(expr) return lambda x: x >= a and x <= b @predicate('size(expression)', callexisting=True) def size(mctx, x): """File size matches the given expression. Examples: - size('1k') - files from 1024 to 2047 bytes - size('< 20k') - files less than 20480 bytes - size('>= .5MB') - files at least 524288 bytes - size('4k - 1MB') - files from 4096 bytes to 1048576 bytes """ m = sizematcher(x) return [f for f in mctx.existing() if m(mctx.ctx[f].size())] @predicate('encoding(name)', callexisting=True) def encoding(mctx, x): """File can be successfully decoded with the given character encoding. May not be useful for encodings other than ASCII and UTF-8. """ # i18n: "encoding" is a keyword enc = getstring(x, _("encoding requires an encoding name")) s = [] for f in mctx.existing(): d = mctx.ctx[f].data() try: d.decode(pycompat.sysstr(enc)) except LookupError: raise error.Abort(_("unknown encoding '%s'") % enc) except UnicodeDecodeError: continue s.append(f) return s @predicate('eol(style)', callexisting=True) def eol(mctx, x): """File contains newlines of the given style (dos, unix, mac). Binary files are excluded, files with mixed line endings match multiple styles. """ # i18n: "eol" is a keyword enc = getstring(x, _("eol requires a style name")) s = [] for f in mctx.existing(): fctx = mctx.ctx[f] if fctx.isbinary(): continue d = fctx.data() if (enc == 'dos' or enc == 'win') and '\r\n' in d: s.append(f) elif enc == 'unix' and re.search('(?<!\r)\n', d): s.append(f) elif enc == 'mac' and re.search('\r(?!\n)', d): s.append(f) return s @predicate('copied()') def copied(mctx, x): """File that is recorded as being copied. """ # i18n: "copied" is a keyword getargs(x, 0, 0, _("copied takes no arguments")) s = [] for f in mctx.subset: if f in mctx.ctx: p = mctx.ctx[f].parents() if p and p[0].path() != f: s.append(f) return s @predicate('revs(revs, pattern)') def revs(mctx, x): """Evaluate set in the specified revisions. If the revset match multiple revs, this will return file matching pattern in any of the revision. """ # i18n: "revs" is a keyword r, x = getargs(x, 2, 2, _("revs takes two arguments")) # i18n: "revs" is a keyword revspec = getstring(r, _("first argument to revs must be a revision")) repo = mctx.ctx.repo() revs = scmutil.revrange(repo, [revspec]) found = set() result = [] for r in revs: ctx = repo[r] for f in getset(mctx.switch(ctx, _buildstatus(ctx, x)), x): if f not in found: found.add(f) result.append(f) return result @predicate('status(base, rev, pattern)') def status(mctx, x): """Evaluate predicate using status change between ``base`` and ``rev``. Examples: - ``status(3, 7, added())`` - matches files added from "3" to "7" """ repo = mctx.ctx.repo() # i18n: "status" is a keyword b, r, x = getargs(x, 3, 3, _("status takes three arguments")) # i18n: "status" is a keyword baseerr = _("first argument to status must be a revision") baserevspec = getstring(b, baseerr) if not baserevspec: raise error.ParseError(baseerr) reverr = _("second argument to status must be a revision") revspec = getstring(r, reverr) if not revspec: raise error.ParseError(reverr) basectx, ctx = scmutil.revpair(repo, [baserevspec, revspec]) return getset(mctx.switch(ctx, _buildstatus(ctx, x, basectx=basectx)), x) @predicate('subrepo([pattern])') def subrepo(mctx, x): """Subrepositories whose paths match the given pattern. """ # i18n: "subrepo" is a keyword getargs(x, 0, 1, _("subrepo takes at most one argument")) ctx = mctx.ctx sstate = sorted(ctx.substate) if x: pat = getpattern(x, matchmod.allpatternkinds, # i18n: "subrepo" is a keyword _("subrepo requires a pattern or no arguments")) fast = not matchmod.patkind(pat) if fast: def m(s): return (s == pat) else: m = matchmod.match(ctx.repo().root, '', [pat], ctx=ctx) return [sub for sub in sstate if m(sub)] else: return [sub for sub in sstate] methods = { 'string': stringset, 'symbol': stringset, 'kindpat': kindpatset, 'and': andset, 'or': orset, 'minus': minusset, 'negate': negateset, 'list': listset, 'group': getset, 'not': notset, 'func': func, } class matchctx(object): def __init__(self, ctx, subset, status=None, badfn=None): self.ctx = ctx self.subset = subset self._status = status self._badfn = badfn self._existingenabled = False def status(self): return self._status def matcher(self, patterns): return self.ctx.match(patterns, badfn=self._badfn) def filter(self, files): return [f for f in files if f in self.subset] def existing(self): if not self._existingenabled: raise error.ProgrammingError('unexpected existing() invocation') if self._status is not None: removed = set(self._status[3]) unknown = set(self._status[4] + self._status[5]) else: removed = set() unknown = set() return (f for f in self.subset if (f in self.ctx and f not in removed) or f in unknown) def narrow(self, files): return matchctx(self.ctx, self.filter(files), self._status, self._badfn) def switch(self, ctx, status=None): subset = self.filter(_buildsubset(ctx, status)) return matchctx(ctx, subset, status, self._badfn) class fullmatchctx(matchctx): """A match context where any files in any revisions should be valid""" def __init__(self, ctx, status=None, badfn=None): subset = _buildsubset(ctx, status) super(fullmatchctx, self).__init__(ctx, subset, status, badfn) def switch(self, ctx, status=None): return fullmatchctx(ctx, status, self._badfn) # filesets using matchctx.switch() _switchcallers = [ 'revs', 'status', ] def _intree(funcs, tree): if isinstance(tree, tuple): if tree[0] == 'func' and tree[1][0] == 'symbol': if tree[1][1] in funcs: return True if tree[1][1] in _switchcallers: # arguments won't be evaluated in the current context return False for s in tree[1:]: if _intree(funcs, s): return True return False def _buildsubset(ctx, status): if status: subset = [] for c in status: subset.extend(c) return subset else: return list(ctx.walk(ctx.match([]))) def match(ctx, expr, badfn=None): """Create a matcher for a single fileset expression""" repo = ctx.repo() tree = parse(expr) fset = getset(fullmatchctx(ctx, _buildstatus(ctx, tree), badfn=badfn), tree) return matchmod.predicatematcher(repo.root, repo.getcwd(), fset.__contains__, predrepr='fileset', badfn=badfn) def _buildstatus(ctx, tree, basectx=None): # do we need status info? # temporaty boolean to simplify the next conditional purewdir = ctx.rev() is None and basectx is None if (_intree(_statuscallers, tree) or # Using matchctx.existing() on a workingctx requires us to check # for deleted files. (purewdir and _intree(_existingcallers, tree))): unknown = _intree(['unknown'], tree) ignored = _intree(['ignored'], tree) r = ctx.repo() if basectx is None: basectx = ctx.p1() return r.status(basectx, ctx, unknown=unknown, ignored=ignored, clean=True) else: return None def prettyformat(tree): return parser.prettyformat(tree, ('string', 'symbol')) def loadpredicate(ui, extname, registrarobj): """Load fileset predicates from specified registrarobj """ for name, func in registrarobj._table.iteritems(): symbols[name] = func if func._callstatus: _statuscallers.add(name) if func._callexisting: _existingcallers.add(name) # load built-in predicates explicitly to setup _statuscallers/_existingcallers loadpredicate(None, None, predicate) # tell hggettext to extract docstrings from these functions: i18nfunctions = symbols.values()