changeset 38689:ff5b6fca1082

fileset: rewrite predicates to return matcher not closed to subset (API) (BC) This makes fileset expression open to any input, so that we can just say "hg status 'set: not binary()'" to select text files including unknowns. With this and removal of subset computation, 'set:**' becomes as fast as 'glob:**'. Further optimization will probably be possible by narrowing the file tree to compute status for example. This also fixes 'subrepo()' to not ignore the current mctx.subset. .. bc:: The fileset expression may include untracked files by default. Use ``tracked()`` to explicitly filter out files not existing at the context revision.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 09 Jun 2018 19:55:10 +0900
parents 2570dca0f21c
children 5d9749c598f0
files hgext/lfs/__init__.py mercurial/fileset.py tests/test-fileset.t tests/test-lfs.t
diffstat 4 files changed, 120 insertions(+), 104 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/lfs/__init__.py	Tue Jul 10 23:49:48 2018 +0900
+++ b/hgext/lfs/__init__.py	Sat Jun 09 19:55:10 2018 +0900
@@ -362,8 +362,10 @@
     """File that uses LFS storage."""
     # i18n: "lfs" is a keyword
     fileset.getargs(x, 0, 0, _("lfs takes no arguments"))
-    return [f for f in mctx.subset
-            if wrapper.pointerfromctx(mctx.ctx, f, removed=True) is not None]
+    ctx = mctx.ctx
+    def lfsfilep(f):
+        return wrapper.pointerfromctx(ctx, f, removed=True) is not None
+    return mctx.predicate(lfsfilep, predrepr='<lfs>')
 
 @templatekeyword('lfs_files', requires={'ctx'})
 def lfsfiles(context, mapping):
--- a/mercurial/fileset.py	Tue Jul 10 23:49:48 2018 +0900
+++ b/mercurial/fileset.py	Sat Jun 09 19:55:10 2018 +0900
@@ -140,43 +140,41 @@
         raise error.ParseError(err)
     return l
 
-def getset(mctx, x):
+def getmatch(mctx, x):
     if not x:
         raise error.ParseError(_("missing argument"))
     return methods[x[0]](mctx, *x[1:])
 
-def stringset(mctx, x):
-    m = mctx.matcher([x])
-    return [f for f in mctx.subset if m(f)]
+def stringmatch(mctx, x):
+    return mctx.matcher([x])
 
-def kindpatset(mctx, x, y):
-    return stringset(mctx, _getkindpat(x, y, matchmod.allpatternkinds,
-                                       _("pattern must be a string")))
+def kindpatmatch(mctx, x, y):
+    return stringmatch(mctx, _getkindpat(x, y, matchmod.allpatternkinds,
+                                         _("pattern must be a string")))
 
-def andset(mctx, x, y):
-    xl = set(getset(mctx, x))
-    yl = getset(mctx, y)
-    return [f for f in yl if f in xl]
+def andmatch(mctx, x, y):
+    xm = getmatch(mctx, x)
+    ym = getmatch(mctx, y)
+    return matchmod.intersectmatchers(xm, ym)
 
-def orset(mctx, x, y):
-    # needs optimizing
-    xl = getset(mctx, x)
-    yl = getset(mctx, y)
-    return xl + [f for f in yl if f not in xl]
+def ormatch(mctx, x, y):
+    xm = getmatch(mctx, x)
+    ym = getmatch(mctx, y)
+    return matchmod.unionmatcher([xm, ym])
 
-def notset(mctx, x):
-    s = set(getset(mctx, x))
-    return [r for r in mctx.subset if r not in s]
+def notmatch(mctx, x):
+    m = getmatch(mctx, x)
+    return mctx.predicate(lambda f: not m(f), predrepr=('<not %r>', m))
 
-def minusset(mctx, x, y):
-    xl = getset(mctx, x)
-    yl = set(getset(mctx, y))
-    return [f for f in xl if f not in yl]
+def minusmatch(mctx, x, y):
+    xm = getmatch(mctx, x)
+    ym = getmatch(mctx, y)
+    return matchmod.differencematcher(xm, ym)
 
-def negateset(mctx, x):
+def negatematch(mctx, x):
     raise error.ParseError(_("can't use negate operator in this context"))
 
-def listset(mctx, a, b):
+def listmatch(mctx, x, y):
     raise error.ParseError(_("can't use a list in this context"),
                            hint=_('see hg help "filesets.x or y"'))
 
@@ -217,7 +215,7 @@
     # i18n: "modified" is a keyword
     getargs(x, 0, 0, _("modified takes no arguments"))
     s = set(mctx.status().modified)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='modified')
 
 @predicate('added()', callstatus=True)
 def added(mctx, x):
@@ -226,7 +224,7 @@
     # i18n: "added" is a keyword
     getargs(x, 0, 0, _("added takes no arguments"))
     s = set(mctx.status().added)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='added')
 
 @predicate('removed()', callstatus=True)
 def removed(mctx, x):
@@ -235,7 +233,7 @@
     # i18n: "removed" is a keyword
     getargs(x, 0, 0, _("removed takes no arguments"))
     s = set(mctx.status().removed)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='removed')
 
 @predicate('deleted()', callstatus=True)
 def deleted(mctx, x):
@@ -244,7 +242,7 @@
     # i18n: "deleted" is a keyword
     getargs(x, 0, 0, _("deleted takes no arguments"))
     s = set(mctx.status().deleted)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='deleted')
 
 @predicate('missing()', callstatus=True)
 def missing(mctx, x):
@@ -253,27 +251,23 @@
     # i18n: "missing" is a keyword
     getargs(x, 0, 0, _("missing takes no arguments"))
     s = set(mctx.status().deleted)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='deleted')
 
 @predicate('unknown()', callstatus=True)
 def unknown(mctx, x):
-    """File that is unknown according to :hg:`status`. These files will only be
-    considered if this predicate is used.
-    """
+    """File that is unknown according to :hg:`status`."""
     # i18n: "unknown" is a keyword
     getargs(x, 0, 0, _("unknown takes no arguments"))
     s = set(mctx.status().unknown)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='unknown')
 
 @predicate('ignored()', callstatus=True)
 def ignored(mctx, x):
-    """File that is ignored according to :hg:`status`. These files will only be
-    considered if this predicate is used.
-    """
+    """File that is ignored according to :hg:`status`."""
     # i18n: "ignored" is a keyword
     getargs(x, 0, 0, _("ignored takes no arguments"))
     s = set(mctx.status().ignored)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='ignored')
 
 @predicate('clean()', callstatus=True)
 def clean(mctx, x):
@@ -282,14 +276,14 @@
     # i18n: "clean" is a keyword
     getargs(x, 0, 0, _("clean takes no arguments"))
     s = set(mctx.status().clean)
-    return [f for f in mctx.subset if f in s]
+    return mctx.predicate(s.__contains__, predrepr='clean')
 
 @predicate('tracked()')
 def tracked(mctx, x):
     """File that is under Mercurial control."""
     # i18n: "tracked" is a keyword
     getargs(x, 0, 0, _("tracked takes no arguments"))
-    return [f for f in mctx.subset if f in mctx.ctx]
+    return mctx.predicate(mctx.ctx.__contains__, predrepr='tracked')
 
 @predicate('binary()', callexisting=True)
 def binary(mctx, x):
@@ -297,7 +291,8 @@
     """
     # i18n: "binary" is a keyword
     getargs(x, 0, 0, _("binary takes no arguments"))
-    return [f for f in mctx.existing() if mctx.ctx[f].isbinary()]
+    return mctx.fpredicate(lambda fctx: fctx.isbinary(),
+                           predrepr='binary', cache=True)
 
 @predicate('exec()', callexisting=True)
 def exec_(mctx, x):
@@ -305,7 +300,8 @@
     """
     # i18n: "exec" is a keyword
     getargs(x, 0, 0, _("exec takes no arguments"))
-    return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x']
+    ctx = mctx.ctx
+    return mctx.predicate(lambda f: ctx.flags(f) == 'x', predrepr='exec')
 
 @predicate('symlink()', callexisting=True)
 def symlink(mctx, x):
@@ -313,7 +309,8 @@
     """
     # i18n: "symlink" is a keyword
     getargs(x, 0, 0, _("symlink takes no arguments"))
-    return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l']
+    ctx = mctx.ctx
+    return mctx.predicate(lambda f: ctx.flags(f) == 'l', predrepr='symlink')
 
 @predicate('resolved()')
 def resolved(mctx, x):
@@ -322,9 +319,10 @@
     # i18n: "resolved" is a keyword
     getargs(x, 0, 0, _("resolved takes no arguments"))
     if mctx.ctx.rev() is not None:
-        return []
+        return mctx.never()
     ms = merge.mergestate.read(mctx.ctx.repo())
-    return [f for f in mctx.subset if f in ms and ms[f] == 'r']
+    return mctx.predicate(lambda f: f in ms and ms[f] == 'r',
+                          predrepr='resolved')
 
 @predicate('unresolved()')
 def unresolved(mctx, x):
@@ -333,9 +331,10 @@
     # i18n: "unresolved" is a keyword
     getargs(x, 0, 0, _("unresolved takes no arguments"))
     if mctx.ctx.rev() is not None:
-        return []
+        return mctx.never()
     ms = merge.mergestate.read(mctx.ctx.repo())
-    return [f for f in mctx.subset if f in ms and ms[f] == 'u']
+    return mctx.predicate(lambda f: f in ms and ms[f] == 'u',
+                          predrepr='unresolved')
 
 @predicate('hgignore()')
 def hgignore(mctx, x):
@@ -343,8 +342,7 @@
     """
     # i18n: "hgignore" is a keyword
     getargs(x, 0, 0, _("hgignore takes no arguments"))
-    ignore = mctx.ctx.repo().dirstate._ignore
-    return [f for f in mctx.subset if ignore(f)]
+    return mctx.ctx.repo().dirstate._ignore
 
 @predicate('portable()')
 def portable(mctx, x):
@@ -353,8 +351,8 @@
     """
     # i18n: "portable" is a keyword
     getargs(x, 0, 0, _("portable takes no arguments"))
-    checkwinfilename = util.checkwinfilename
-    return [f for f in mctx.subset if checkwinfilename(f) is None]
+    return mctx.predicate(lambda f: util.checkwinfilename(f) is None,
+                          predrepr='portable')
 
 @predicate('grep(regex)', callexisting=True)
 def grep(mctx, x):
@@ -366,7 +364,8 @@
     except re.error as e:
         raise error.ParseError(_('invalid match pattern: %s') %
                                stringutil.forcebytestr(e))
-    return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())]
+    return mctx.fpredicate(lambda fctx: r.search(fctx.data()),
+                           predrepr=('grep(%r)', r.pattern), cache=True)
 
 def _sizetomax(s):
     try:
@@ -421,7 +420,8 @@
     # i18n: "size" is a keyword
     expr = getstring(x, _("size requires an expression"))
     m = sizematcher(expr)
-    return [f for f in mctx.existing() if m(mctx.ctx[f].size())]
+    return mctx.fpredicate(lambda fctx: m(fctx.size()),
+                           predrepr=('size(%r)', expr), cache=True)
 
 @predicate('encoding(name)', callexisting=True)
 def encoding(mctx, x):
@@ -433,18 +433,17 @@
     # i18n: "encoding" is a keyword
     enc = getstring(x, _("encoding requires an encoding name"))
 
-    s = []
-    for f in mctx.existing():
-        d = mctx.ctx[f].data()
+    def encp(fctx):
+        d = fctx.data()
         try:
             d.decode(pycompat.sysstr(enc))
+            return True
         except LookupError:
             raise error.Abort(_("unknown encoding '%s'") % enc)
         except UnicodeDecodeError:
-            continue
-        s.append(f)
+            return False
 
-    return s
+    return mctx.fpredicate(encp, predrepr=('encoding(%r)', enc), cache=True)
 
 @predicate('eol(style)', callexisting=True)
 def eol(mctx, x):
@@ -456,19 +455,18 @@
     # i18n: "eol" is a keyword
     enc = getstring(x, _("eol requires a style name"))
 
-    s = []
-    for f in mctx.existing():
-        fctx = mctx.ctx[f]
+    def eolp(fctx):
         if fctx.isbinary():
-            continue
+            return False
         d = fctx.data()
         if (enc == 'dos' or enc == 'win') and '\r\n' in d:
-            s.append(f)
+            return True
         elif enc == 'unix' and re.search('(?<!\r)\n', d):
-            s.append(f)
+            return True
         elif enc == 'mac' and re.search('\r(?!\n)', d):
-            s.append(f)
-    return s
+            return True
+        return False
+    return mctx.fpredicate(eolp, predrepr=('eol(%r)', enc), cache=True)
 
 @predicate('copied()')
 def copied(mctx, x):
@@ -476,13 +474,10 @@
     """
     # i18n: "copied" is a keyword
     getargs(x, 0, 0, _("copied takes no arguments"))
-    s = []
-    for f in mctx.subset:
-        if f in mctx.ctx:
-            p = mctx.ctx[f].parents()
-            if p and p[0].path() != f:
-                s.append(f)
-    return s
+    def copiedp(fctx):
+        p = fctx.parents()
+        return p and p[0].path() != fctx.path()
+    return mctx.fpredicate(copiedp, predrepr='copied', cache=True)
 
 @predicate('revs(revs, pattern)')
 def revs(mctx, x):
@@ -496,15 +491,15 @@
     repo = mctx.ctx.repo()
     revs = scmutil.revrange(repo, [revspec])
 
-    found = set()
-    result = []
+    matchers = []
     for r in revs:
         ctx = repo[r]
-        for f in getset(mctx.switch(ctx, _buildstatus(ctx, x)), x):
-            if f not in found:
-                found.add(f)
-                result.append(f)
-    return result
+        matchers.append(getmatch(mctx.switch(ctx, _buildstatus(ctx, x)), x))
+    if not matchers:
+        return mctx.never()
+    if len(matchers) == 1:
+        return matchers[0]
+    return matchmod.unionmatcher(matchers)
 
 @predicate('status(base, rev, pattern)')
 def status(mctx, x):
@@ -526,7 +521,7 @@
     if not revspec:
         raise error.ParseError(reverr)
     basectx, ctx = scmutil.revpair(repo, [baserevspec, revspec])
-    return getset(mctx.switch(ctx, _buildstatus(ctx, x, basectx=basectx)), x)
+    return getmatch(mctx.switch(ctx, _buildstatus(ctx, x, basectx=basectx)), x)
 
 @predicate('subrepo([pattern])')
 def subrepo(mctx, x):
@@ -535,7 +530,7 @@
     # i18n: "subrepo" is a keyword
     getargs(x, 0, 1, _("subrepo takes at most one argument"))
     ctx = mctx.ctx
-    sstate = sorted(ctx.substate)
+    sstate = ctx.substate
     if x:
         pat = getpattern(x, matchmod.allpatternkinds,
                          # i18n: "subrepo" is a keyword
@@ -546,21 +541,22 @@
                 return (s == pat)
         else:
             m = matchmod.match(ctx.repo().root, '', [pat], ctx=ctx)
-        return [sub for sub in sstate if m(sub)]
+        return mctx.predicate(lambda f: f in sstate and m(f),
+                              predrepr=('subrepo(%r)', pat))
     else:
-        return [sub for sub in sstate]
+        return mctx.predicate(sstate.__contains__, predrepr='subrepo')
 
 methods = {
-    'string': stringset,
-    'symbol': stringset,
-    'kindpat': kindpatset,
-    'and': andset,
-    'or': orset,
-    'minus': minusset,
-    'negate': negateset,
-    'list': listset,
-    'group': getset,
-    'not': notset,
+    'string': stringmatch,
+    'symbol': stringmatch,
+    'kindpat': kindpatmatch,
+    'and': andmatch,
+    'or': ormatch,
+    'minus': minusmatch,
+    'negate': negatematch,
+    'list': listmatch,
+    'group': getmatch,
+    'not': notmatch,
     'func': func,
 }
 
@@ -680,12 +676,9 @@
 
 def match(ctx, expr, badfn=None):
     """Create a matcher for a single fileset expression"""
-    repo = ctx.repo()
     tree = parse(expr)
-    fset = getset(fullmatchctx(ctx, _buildstatus(ctx, tree), badfn=badfn), tree)
-    return matchmod.predicatematcher(repo.root, repo.getcwd(),
-                                     fset.__contains__,
-                                     predrepr='fileset', badfn=badfn)
+    mctx = fullmatchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
+    return getmatch(mctx, tree)
 
 def _buildstatus(ctx, tree, basectx=None):
     # do we need status info?
--- a/tests/test-fileset.t	Tue Jul 10 23:49:48 2018 +0900
+++ b/tests/test-fileset.t	Sat Jun 09 19:55:10 2018 +0900
@@ -142,8 +142,10 @@
   .hgignore
   c2
   $ fileset 'hgignore()'
+  .hgignore
   a2
   b2
+  c2
   $ fileset 'clean()'
   b1
   $ fileset 'copied()'
@@ -182,6 +184,7 @@
 
   >>> open('bin', 'wb').write(b'\0a') and None
   $ fileset 'binary()'
+  bin
   $ fileset 'binary() and unknown()'
   bin
   $ echo '^bin$' >> .hgignore
@@ -192,6 +195,7 @@
   bin
 
   $ fileset 'grep("b{1}")'
+  .hgignore
   b1
   b2
   c1
@@ -354,8 +358,12 @@
   $ fileset -r1 'unknown()'
   $ fileset -r1 'ignored()'
   $ fileset -r1 'hgignore()'
+  .hgignore
+  a2
   b2
   bin
+  c2
+  sub2
   $ fileset -r1 'binary()'
   bin
   $ fileset -r1 'size(1k)'
@@ -403,30 +411,42 @@
   dos
   mixed
   $ fileset 'eol(unix)'
+  .hgignore
   .hgsub
   .hgsubstate
   b1
   b2
+  b2.orig
   c1
+  c2
+  c3
+  con.xml
   mixed
+  unknown
   $ fileset 'eol(mac)'
   mac
 
 Test safety of 'encoding' on removed files
 
   $ fileset 'encoding("ascii")'
+  .hgignore
   .hgsub
   .hgsubstate
   1k
   2k
   b1
   b2
+  b2.orig
   b2link (symlink !)
   bin
   c1
+  c2
+  c3
+  con.xml
   dos
   mac
   mixed
+  unknown
 
 Test detection of unintentional 'matchctx.existing()' invocation
 
@@ -437,7 +457,8 @@
   > @filesetpredicate(b'existingcaller()', callexisting=False)
   > def existingcaller(mctx, x):
   >     # this 'mctx.existing()' invocation is unintentional
-  >     return [f for f in mctx.existing()]
+  >     existing = set(mctx.existing())
+  >     return mctx.predicate(existing.__contains__, cache=False)
   > EOF
 
   $ cat >> .hg/hgrc <<EOF
--- a/tests/test-lfs.t	Tue Jul 10 23:49:48 2018 +0900
+++ b/tests/test-lfs.t	Sat Jun 09 19:55:10 2018 +0900
@@ -523,8 +523,8 @@
 
   $ hg files --debug -r . 'set:eol("unix")' --config 'experimental.lfs.disableusercache=True'
   lfs: found c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80 in the local lfs store
+           2   b
   lfs: found 5dde896887f6754c9b15bfe3a441ae4806df2fde94001311e08bf110622e0bbe in the local lfs store
-           2   b
 
   $ hg files --debug -r . 'set:binary()' --config 'experimental.lfs.disableusercache=True'
            2   a