Mercurial > hg
changeset 38689:ff5b6fca1082
fileset: rewrite predicates to return matcher not closed to subset (API) (BC)
This makes fileset expression open to any input, so that we can just say
"hg status 'set: not binary()'" to select text files including unknowns.
With this and removal of subset computation, 'set:**' becomes as fast as
'glob:**'. Further optimization will probably be possible by narrowing the
file tree to compute status for example.
This also fixes 'subrepo()' to not ignore the current mctx.subset.
.. bc::
The fileset expression may include untracked files by default. Use
``tracked()`` to explicitly filter out files not existing at the context
revision.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 09 Jun 2018 19:55:10 +0900 |
parents | 2570dca0f21c |
children | 5d9749c598f0 |
files | hgext/lfs/__init__.py mercurial/fileset.py tests/test-fileset.t tests/test-lfs.t |
diffstat | 4 files changed, 120 insertions(+), 104 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/lfs/__init__.py Tue Jul 10 23:49:48 2018 +0900 +++ b/hgext/lfs/__init__.py Sat Jun 09 19:55:10 2018 +0900 @@ -362,8 +362,10 @@ """File that uses LFS storage.""" # i18n: "lfs" is a keyword fileset.getargs(x, 0, 0, _("lfs takes no arguments")) - return [f for f in mctx.subset - if wrapper.pointerfromctx(mctx.ctx, f, removed=True) is not None] + ctx = mctx.ctx + def lfsfilep(f): + return wrapper.pointerfromctx(ctx, f, removed=True) is not None + return mctx.predicate(lfsfilep, predrepr='<lfs>') @templatekeyword('lfs_files', requires={'ctx'}) def lfsfiles(context, mapping):
--- a/mercurial/fileset.py Tue Jul 10 23:49:48 2018 +0900 +++ b/mercurial/fileset.py Sat Jun 09 19:55:10 2018 +0900 @@ -140,43 +140,41 @@ raise error.ParseError(err) return l -def getset(mctx, x): +def getmatch(mctx, x): if not x: raise error.ParseError(_("missing argument")) return methods[x[0]](mctx, *x[1:]) -def stringset(mctx, x): - m = mctx.matcher([x]) - return [f for f in mctx.subset if m(f)] +def stringmatch(mctx, x): + return mctx.matcher([x]) -def kindpatset(mctx, x, y): - return stringset(mctx, _getkindpat(x, y, matchmod.allpatternkinds, - _("pattern must be a string"))) +def kindpatmatch(mctx, x, y): + return stringmatch(mctx, _getkindpat(x, y, matchmod.allpatternkinds, + _("pattern must be a string"))) -def andset(mctx, x, y): - xl = set(getset(mctx, x)) - yl = getset(mctx, y) - return [f for f in yl if f in xl] +def andmatch(mctx, x, y): + xm = getmatch(mctx, x) + ym = getmatch(mctx, y) + return matchmod.intersectmatchers(xm, ym) -def orset(mctx, x, y): - # needs optimizing - xl = getset(mctx, x) - yl = getset(mctx, y) - return xl + [f for f in yl if f not in xl] +def ormatch(mctx, x, y): + xm = getmatch(mctx, x) + ym = getmatch(mctx, y) + return matchmod.unionmatcher([xm, ym]) -def notset(mctx, x): - s = set(getset(mctx, x)) - return [r for r in mctx.subset if r not in s] +def notmatch(mctx, x): + m = getmatch(mctx, x) + return mctx.predicate(lambda f: not m(f), predrepr=('<not %r>', m)) -def minusset(mctx, x, y): - xl = getset(mctx, x) - yl = set(getset(mctx, y)) - return [f for f in xl if f not in yl] +def minusmatch(mctx, x, y): + xm = getmatch(mctx, x) + ym = getmatch(mctx, y) + return matchmod.differencematcher(xm, ym) -def negateset(mctx, x): +def negatematch(mctx, x): raise error.ParseError(_("can't use negate operator in this context")) -def listset(mctx, a, b): +def listmatch(mctx, x, y): raise error.ParseError(_("can't use a list in this context"), hint=_('see hg help "filesets.x or y"')) @@ -217,7 +215,7 @@ # i18n: "modified" is a keyword getargs(x, 0, 0, _("modified takes no arguments")) s = set(mctx.status().modified) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='modified') @predicate('added()', callstatus=True) def added(mctx, x): @@ -226,7 +224,7 @@ # i18n: "added" is a keyword getargs(x, 0, 0, _("added takes no arguments")) s = set(mctx.status().added) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='added') @predicate('removed()', callstatus=True) def removed(mctx, x): @@ -235,7 +233,7 @@ # i18n: "removed" is a keyword getargs(x, 0, 0, _("removed takes no arguments")) s = set(mctx.status().removed) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='removed') @predicate('deleted()', callstatus=True) def deleted(mctx, x): @@ -244,7 +242,7 @@ # i18n: "deleted" is a keyword getargs(x, 0, 0, _("deleted takes no arguments")) s = set(mctx.status().deleted) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='deleted') @predicate('missing()', callstatus=True) def missing(mctx, x): @@ -253,27 +251,23 @@ # i18n: "missing" is a keyword getargs(x, 0, 0, _("missing takes no arguments")) s = set(mctx.status().deleted) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='deleted') @predicate('unknown()', callstatus=True) def unknown(mctx, x): - """File that is unknown according to :hg:`status`. These files will only be - considered if this predicate is used. - """ + """File that is unknown according to :hg:`status`.""" # i18n: "unknown" is a keyword getargs(x, 0, 0, _("unknown takes no arguments")) s = set(mctx.status().unknown) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='unknown') @predicate('ignored()', callstatus=True) def ignored(mctx, x): - """File that is ignored according to :hg:`status`. These files will only be - considered if this predicate is used. - """ + """File that is ignored according to :hg:`status`.""" # i18n: "ignored" is a keyword getargs(x, 0, 0, _("ignored takes no arguments")) s = set(mctx.status().ignored) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='ignored') @predicate('clean()', callstatus=True) def clean(mctx, x): @@ -282,14 +276,14 @@ # i18n: "clean" is a keyword getargs(x, 0, 0, _("clean takes no arguments")) s = set(mctx.status().clean) - return [f for f in mctx.subset if f in s] + return mctx.predicate(s.__contains__, predrepr='clean') @predicate('tracked()') def tracked(mctx, x): """File that is under Mercurial control.""" # i18n: "tracked" is a keyword getargs(x, 0, 0, _("tracked takes no arguments")) - return [f for f in mctx.subset if f in mctx.ctx] + return mctx.predicate(mctx.ctx.__contains__, predrepr='tracked') @predicate('binary()', callexisting=True) def binary(mctx, x): @@ -297,7 +291,8 @@ """ # i18n: "binary" is a keyword getargs(x, 0, 0, _("binary takes no arguments")) - return [f for f in mctx.existing() if mctx.ctx[f].isbinary()] + return mctx.fpredicate(lambda fctx: fctx.isbinary(), + predrepr='binary', cache=True) @predicate('exec()', callexisting=True) def exec_(mctx, x): @@ -305,7 +300,8 @@ """ # i18n: "exec" is a keyword getargs(x, 0, 0, _("exec takes no arguments")) - return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x'] + ctx = mctx.ctx + return mctx.predicate(lambda f: ctx.flags(f) == 'x', predrepr='exec') @predicate('symlink()', callexisting=True) def symlink(mctx, x): @@ -313,7 +309,8 @@ """ # i18n: "symlink" is a keyword getargs(x, 0, 0, _("symlink takes no arguments")) - return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l'] + ctx = mctx.ctx + return mctx.predicate(lambda f: ctx.flags(f) == 'l', predrepr='symlink') @predicate('resolved()') def resolved(mctx, x): @@ -322,9 +319,10 @@ # i18n: "resolved" is a keyword getargs(x, 0, 0, _("resolved takes no arguments")) if mctx.ctx.rev() is not None: - return [] + return mctx.never() ms = merge.mergestate.read(mctx.ctx.repo()) - return [f for f in mctx.subset if f in ms and ms[f] == 'r'] + return mctx.predicate(lambda f: f in ms and ms[f] == 'r', + predrepr='resolved') @predicate('unresolved()') def unresolved(mctx, x): @@ -333,9 +331,10 @@ # i18n: "unresolved" is a keyword getargs(x, 0, 0, _("unresolved takes no arguments")) if mctx.ctx.rev() is not None: - return [] + return mctx.never() ms = merge.mergestate.read(mctx.ctx.repo()) - return [f for f in mctx.subset if f in ms and ms[f] == 'u'] + return mctx.predicate(lambda f: f in ms and ms[f] == 'u', + predrepr='unresolved') @predicate('hgignore()') def hgignore(mctx, x): @@ -343,8 +342,7 @@ """ # i18n: "hgignore" is a keyword getargs(x, 0, 0, _("hgignore takes no arguments")) - ignore = mctx.ctx.repo().dirstate._ignore - return [f for f in mctx.subset if ignore(f)] + return mctx.ctx.repo().dirstate._ignore @predicate('portable()') def portable(mctx, x): @@ -353,8 +351,8 @@ """ # i18n: "portable" is a keyword getargs(x, 0, 0, _("portable takes no arguments")) - checkwinfilename = util.checkwinfilename - return [f for f in mctx.subset if checkwinfilename(f) is None] + return mctx.predicate(lambda f: util.checkwinfilename(f) is None, + predrepr='portable') @predicate('grep(regex)', callexisting=True) def grep(mctx, x): @@ -366,7 +364,8 @@ except re.error as e: raise error.ParseError(_('invalid match pattern: %s') % stringutil.forcebytestr(e)) - return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())] + return mctx.fpredicate(lambda fctx: r.search(fctx.data()), + predrepr=('grep(%r)', r.pattern), cache=True) def _sizetomax(s): try: @@ -421,7 +420,8 @@ # i18n: "size" is a keyword expr = getstring(x, _("size requires an expression")) m = sizematcher(expr) - return [f for f in mctx.existing() if m(mctx.ctx[f].size())] + return mctx.fpredicate(lambda fctx: m(fctx.size()), + predrepr=('size(%r)', expr), cache=True) @predicate('encoding(name)', callexisting=True) def encoding(mctx, x): @@ -433,18 +433,17 @@ # i18n: "encoding" is a keyword enc = getstring(x, _("encoding requires an encoding name")) - s = [] - for f in mctx.existing(): - d = mctx.ctx[f].data() + def encp(fctx): + d = fctx.data() try: d.decode(pycompat.sysstr(enc)) + return True except LookupError: raise error.Abort(_("unknown encoding '%s'") % enc) except UnicodeDecodeError: - continue - s.append(f) + return False - return s + return mctx.fpredicate(encp, predrepr=('encoding(%r)', enc), cache=True) @predicate('eol(style)', callexisting=True) def eol(mctx, x): @@ -456,19 +455,18 @@ # i18n: "eol" is a keyword enc = getstring(x, _("eol requires a style name")) - s = [] - for f in mctx.existing(): - fctx = mctx.ctx[f] + def eolp(fctx): if fctx.isbinary(): - continue + return False d = fctx.data() if (enc == 'dos' or enc == 'win') and '\r\n' in d: - s.append(f) + return True elif enc == 'unix' and re.search('(?<!\r)\n', d): - s.append(f) + return True elif enc == 'mac' and re.search('\r(?!\n)', d): - s.append(f) - return s + return True + return False + return mctx.fpredicate(eolp, predrepr=('eol(%r)', enc), cache=True) @predicate('copied()') def copied(mctx, x): @@ -476,13 +474,10 @@ """ # i18n: "copied" is a keyword getargs(x, 0, 0, _("copied takes no arguments")) - s = [] - for f in mctx.subset: - if f in mctx.ctx: - p = mctx.ctx[f].parents() - if p and p[0].path() != f: - s.append(f) - return s + def copiedp(fctx): + p = fctx.parents() + return p and p[0].path() != fctx.path() + return mctx.fpredicate(copiedp, predrepr='copied', cache=True) @predicate('revs(revs, pattern)') def revs(mctx, x): @@ -496,15 +491,15 @@ repo = mctx.ctx.repo() revs = scmutil.revrange(repo, [revspec]) - found = set() - result = [] + matchers = [] for r in revs: ctx = repo[r] - for f in getset(mctx.switch(ctx, _buildstatus(ctx, x)), x): - if f not in found: - found.add(f) - result.append(f) - return result + matchers.append(getmatch(mctx.switch(ctx, _buildstatus(ctx, x)), x)) + if not matchers: + return mctx.never() + if len(matchers) == 1: + return matchers[0] + return matchmod.unionmatcher(matchers) @predicate('status(base, rev, pattern)') def status(mctx, x): @@ -526,7 +521,7 @@ if not revspec: raise error.ParseError(reverr) basectx, ctx = scmutil.revpair(repo, [baserevspec, revspec]) - return getset(mctx.switch(ctx, _buildstatus(ctx, x, basectx=basectx)), x) + return getmatch(mctx.switch(ctx, _buildstatus(ctx, x, basectx=basectx)), x) @predicate('subrepo([pattern])') def subrepo(mctx, x): @@ -535,7 +530,7 @@ # i18n: "subrepo" is a keyword getargs(x, 0, 1, _("subrepo takes at most one argument")) ctx = mctx.ctx - sstate = sorted(ctx.substate) + sstate = ctx.substate if x: pat = getpattern(x, matchmod.allpatternkinds, # i18n: "subrepo" is a keyword @@ -546,21 +541,22 @@ return (s == pat) else: m = matchmod.match(ctx.repo().root, '', [pat], ctx=ctx) - return [sub for sub in sstate if m(sub)] + return mctx.predicate(lambda f: f in sstate and m(f), + predrepr=('subrepo(%r)', pat)) else: - return [sub for sub in sstate] + return mctx.predicate(sstate.__contains__, predrepr='subrepo') methods = { - 'string': stringset, - 'symbol': stringset, - 'kindpat': kindpatset, - 'and': andset, - 'or': orset, - 'minus': minusset, - 'negate': negateset, - 'list': listset, - 'group': getset, - 'not': notset, + 'string': stringmatch, + 'symbol': stringmatch, + 'kindpat': kindpatmatch, + 'and': andmatch, + 'or': ormatch, + 'minus': minusmatch, + 'negate': negatematch, + 'list': listmatch, + 'group': getmatch, + 'not': notmatch, 'func': func, } @@ -680,12 +676,9 @@ def match(ctx, expr, badfn=None): """Create a matcher for a single fileset expression""" - repo = ctx.repo() tree = parse(expr) - fset = getset(fullmatchctx(ctx, _buildstatus(ctx, tree), badfn=badfn), tree) - return matchmod.predicatematcher(repo.root, repo.getcwd(), - fset.__contains__, - predrepr='fileset', badfn=badfn) + mctx = fullmatchctx(ctx, _buildstatus(ctx, tree), badfn=badfn) + return getmatch(mctx, tree) def _buildstatus(ctx, tree, basectx=None): # do we need status info?
--- a/tests/test-fileset.t Tue Jul 10 23:49:48 2018 +0900 +++ b/tests/test-fileset.t Sat Jun 09 19:55:10 2018 +0900 @@ -142,8 +142,10 @@ .hgignore c2 $ fileset 'hgignore()' + .hgignore a2 b2 + c2 $ fileset 'clean()' b1 $ fileset 'copied()' @@ -182,6 +184,7 @@ >>> open('bin', 'wb').write(b'\0a') and None $ fileset 'binary()' + bin $ fileset 'binary() and unknown()' bin $ echo '^bin$' >> .hgignore @@ -192,6 +195,7 @@ bin $ fileset 'grep("b{1}")' + .hgignore b1 b2 c1 @@ -354,8 +358,12 @@ $ fileset -r1 'unknown()' $ fileset -r1 'ignored()' $ fileset -r1 'hgignore()' + .hgignore + a2 b2 bin + c2 + sub2 $ fileset -r1 'binary()' bin $ fileset -r1 'size(1k)' @@ -403,30 +411,42 @@ dos mixed $ fileset 'eol(unix)' + .hgignore .hgsub .hgsubstate b1 b2 + b2.orig c1 + c2 + c3 + con.xml mixed + unknown $ fileset 'eol(mac)' mac Test safety of 'encoding' on removed files $ fileset 'encoding("ascii")' + .hgignore .hgsub .hgsubstate 1k 2k b1 b2 + b2.orig b2link (symlink !) bin c1 + c2 + c3 + con.xml dos mac mixed + unknown Test detection of unintentional 'matchctx.existing()' invocation @@ -437,7 +457,8 @@ > @filesetpredicate(b'existingcaller()', callexisting=False) > def existingcaller(mctx, x): > # this 'mctx.existing()' invocation is unintentional - > return [f for f in mctx.existing()] + > existing = set(mctx.existing()) + > return mctx.predicate(existing.__contains__, cache=False) > EOF $ cat >> .hg/hgrc <<EOF
--- a/tests/test-lfs.t Tue Jul 10 23:49:48 2018 +0900 +++ b/tests/test-lfs.t Sat Jun 09 19:55:10 2018 +0900 @@ -523,8 +523,8 @@ $ hg files --debug -r . 'set:eol("unix")' --config 'experimental.lfs.disableusercache=True' lfs: found c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80 in the local lfs store + 2 b lfs: found 5dde896887f6754c9b15bfe3a441ae4806df2fde94001311e08bf110622e0bbe in the local lfs store - 2 b $ hg files --debug -r . 'set:binary()' --config 'experimental.lfs.disableusercache=True' 2 a