--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/testparseutil.py Thu Aug 23 12:25:54 2018 +0900
@@ -0,0 +1,630 @@
+# testparseutil.py - utilities to parse test script for check tools
+#
+# Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import, print_function
+
+import abc
+import re
+import sys
+
+####################
+# for Python3 compatibility (almost comes from mercurial/pycompat.py)
+
+ispy3 = (sys.version_info[0] >= 3)
+
+def identity(a):
+ return a
+
+def _rapply(f, xs):
+ if xs is None:
+ # assume None means non-value of optional data
+ return xs
+ if isinstance(xs, (list, set, tuple)):
+ return type(xs)(_rapply(f, x) for x in xs)
+ if isinstance(xs, dict):
+ return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
+ return f(xs)
+
+def rapply(f, xs):
+ if f is identity:
+ # fast path mainly for py2
+ return xs
+ return _rapply(f, xs)
+
+if ispy3:
+ import builtins
+
+ # TODO: .buffer might not exist if std streams were replaced; we'll need
+ # a silly wrapper to make a bytes stream backed by a unicode one.
+ stdin = sys.stdin.buffer
+ stdout = sys.stdout.buffer
+ stderr = sys.stderr.buffer
+
+ def bytestr(s):
+ # tiny version of pycompat.bytestr
+ return s.encode('latin1')
+
+ def sysstr(s):
+ if isinstance(s, builtins.str):
+ return s
+ return s.decode(u'latin-1')
+
+ def opentext(f):
+ return open(f, 'rb')
+else:
+ stdin = sys.stdin
+ stdout = sys.stdout
+ stderr = sys.stderr
+
+ bytestr = str
+ sysstr = identity
+
+ opentext = open
+
+def b2s(x):
+ # convert BYTES elements in "x" to SYSSTR recursively
+ return rapply(sysstr, x)
+
+def writeout(data):
+ # write "data" in BYTES into stdout
+ stdout.write(data)
+
+def writeerr(data):
+ # write "data" in BYTES into stderr
+ stderr.write(data)
+
+####################
+
+class embeddedmatcher(object):
+ """Base class to detect embedded code fragments in *.t test script
+ """
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, desc):
+ self.desc = desc
+
+ @abc.abstractmethod
+ def startsat(self, line):
+ """Examine whether embedded code starts at line
+
+ This can return arbitrary object, and it is used as 'ctx' for
+ subsequent method invocations.
+ """
+
+ @abc.abstractmethod
+ def endsat(self, ctx, line):
+ """Examine whether embedded code ends at line"""
+
+ @abc.abstractmethod
+ def isinside(self, ctx, line):
+ """Examine whether line is inside embedded code, if not yet endsat
+ """
+
+ @abc.abstractmethod
+ def ignores(self, ctx):
+ """Examine whether detected embedded code should be ignored"""
+
+ @abc.abstractmethod
+ def filename(self, ctx):
+ """Return filename of embedded code
+
+ If filename isn't specified for embedded code explicitly, this
+ returns None.
+ """
+
+ @abc.abstractmethod
+ def codeatstart(self, ctx, line):
+ """Return actual code at the start line of embedded code
+
+ This might return None, if the start line doesn't contain
+ actual code.
+ """
+
+ @abc.abstractmethod
+ def codeatend(self, ctx, line):
+ """Return actual code at the end line of embedded code
+
+ This might return None, if the end line doesn't contain actual
+ code.
+ """
+
+ @abc.abstractmethod
+ def codeinside(self, ctx, line):
+ """Return actual code at line inside embedded code"""
+
+def embedded(basefile, lines, errors, matchers):
+ """pick embedded code fragments up from given lines
+
+ This is common parsing logic, which examines specified matchers on
+ given lines.
+
+ :basefile: a name of a file, from which lines to be parsed come.
+ :lines: to be parsed (might be a value returned by "open(basefile)")
+ :errors: an array, into which messages for detected error are stored
+ :matchers: an array of embeddedmatcher objects
+
+ This function yields '(filename, starts, ends, code)' tuple.
+
+ :filename: a name of embedded code, if it is explicitly specified
+ (e.g. "foobar" of "cat >> foobar <<EOF").
+ Otherwise, this is None
+ :starts: line number (1-origin), at which embedded code starts (inclusive)
+ :ends: line number (1-origin), at which embedded code ends (exclusive)
+ :code: extracted embedded code, which is single-stringified
+
+ >>> class ambigmatcher(object):
+ ... # mock matcher class to examine implementation of
+ ... # "ambiguous matching" corner case
+ ... def __init__(self, desc, matchfunc):
+ ... self.desc = desc
+ ... self.matchfunc = matchfunc
+ ... def startsat(self, line):
+ ... return self.matchfunc(line)
+ >>> ambig1 = ambigmatcher(b'ambiguous #1',
+ ... lambda l: l.startswith(b' $ cat '))
+ >>> ambig2 = ambigmatcher(b'ambiguous #2',
+ ... lambda l: l.endswith(b'<< EOF\\n'))
+ >>> lines = [b' $ cat > foo.py << EOF\\n']
+ >>> errors = []
+ >>> matchers = [ambig1, ambig2]
+ >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
+ []
+ >>> b2s(errors)
+ ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
+
+ """
+ matcher = None
+ ctx = filename = code = startline = None # for pyflakes
+
+ for lineno, line in enumerate(lines, 1):
+ if not line.endswith(b'\n'):
+ line += b'\n' # to normalize EOF line
+ if matcher: # now, inside embedded code
+ if matcher.endsat(ctx, line):
+ codeatend = matcher.codeatend(ctx, line)
+ if codeatend is not None:
+ code.append(codeatend)
+ if not matcher.ignores(ctx):
+ yield (filename, startline, lineno, b''.join(code))
+ matcher = None
+ # DO NOT "continue", because line might start next fragment
+ elif not matcher.isinside(ctx, line):
+ # this is an error of basefile
+ # (if matchers are implemented correctly)
+ errors.append(b'%s:%d: unexpected line for "%s"'
+ % (basefile, lineno, matcher.desc))
+ # stop extracting embedded code by current 'matcher',
+ # because appearance of unexpected line might mean
+ # that expected end-of-embedded-code line might never
+ # appear
+ matcher = None
+ # DO NOT "continue", because line might start next fragment
+ else:
+ code.append(matcher.codeinside(ctx, line))
+ continue
+
+ # examine whether current line starts embedded code or not
+ assert not matcher
+
+ matched = []
+ for m in matchers:
+ ctx = m.startsat(line)
+ if ctx:
+ matched.append((m, ctx))
+ if matched:
+ if len(matched) > 1:
+ # this is an error of matchers, maybe
+ errors.append(b'%s:%d: ambiguous line for %s' %
+ (basefile, lineno,
+ b', '.join([b'"%s"' % m.desc
+ for m, c in matched])))
+ # omit extracting embedded code, because choosing
+ # arbitrary matcher from matched ones might fail to
+ # detect the end of embedded code as expected.
+ continue
+ matcher, ctx = matched[0]
+ filename = matcher.filename(ctx)
+ code = []
+ codeatstart = matcher.codeatstart(ctx, line)
+ if codeatstart is not None:
+ code.append(codeatstart)
+ startline = lineno
+ else:
+ startline = lineno + 1
+
+ if matcher:
+ # examine whether EOF ends embedded code, because embedded
+ # code isn't yet ended explicitly
+ if matcher.endsat(ctx, b'\n'):
+ codeatend = matcher.codeatend(ctx, b'\n')
+ if codeatend is not None:
+ code.append(codeatend)
+ if not matcher.ignores(ctx):
+ yield (filename, startline, lineno + 1, b''.join(code))
+ else:
+ # this is an error of basefile
+ # (if matchers are implemented correctly)
+ errors.append(b'%s:%d: unexpected end of file for "%s"'
+ % (basefile, lineno, matcher.desc))
+
+# heredoc limit mark to ignore embedded code at check-code.py or so
+heredocignorelimit = b'NO_CHECK_EOF'
+
+# the pattern to match against cases below, and to return a limit mark
+# string as 'lname' group
+#
+# - << LIMITMARK
+# - << "LIMITMARK"
+# - << 'LIMITMARK'
+heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
+
+class fileheredocmatcher(embeddedmatcher):
+ """Detect "cat > FILE << LIMIT" style embedded code
+
+ >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py')
+ >>> b2s(matcher.startsat(b' $ cat > file.py << EOF\\n'))
+ ('file.py', ' > EOF\\n')
+ >>> b2s(matcher.startsat(b' $ cat >>file.py <<EOF\\n'))
+ ('file.py', ' > EOF\\n')
+ >>> b2s(matcher.startsat(b' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
+ ('any file.py', ' > EOF\\n')
+ >>> b2s(matcher.startsat(b" $ cat > file.py << 'ANYLIMIT'\\n"))
+ ('file.py', ' > ANYLIMIT\\n')
+ >>> b2s(matcher.startsat(b' $ cat<<ANYLIMIT>"file.py"\\n'))
+ ('file.py', ' > ANYLIMIT\\n')
+ >>> start = b' $ cat > file.py << EOF\\n'
+ >>> ctx = matcher.startsat(start)
+ >>> matcher.codeatstart(ctx, start)
+ >>> b2s(matcher.filename(ctx))
+ 'file.py'
+ >>> matcher.ignores(ctx)
+ False
+ >>> inside = b' > foo = 1\\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ 'foo = 1\\n'
+ >>> end = b' > EOF\\n'
+ >>> matcher.endsat(ctx, end)
+ True
+ >>> matcher.codeatend(ctx, end)
+ >>> matcher.endsat(ctx, b' > EOFEOF\\n')
+ False
+ >>> ctx = matcher.startsat(b' $ cat > file.py << NO_CHECK_EOF\\n')
+ >>> matcher.ignores(ctx)
+ True
+ """
+ _prefix = b' > '
+
+ def __init__(self, desc, namepat):
+ super(fileheredocmatcher, self).__init__(desc)
+
+ # build the pattern to match against cases below (and ">>"
+ # variants), and to return a target filename string as 'name'
+ # group
+ #
+ # - > NAMEPAT
+ # - > "NAMEPAT"
+ # - > 'NAMEPAT'
+ namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
+ % namepat)
+ self._fileres = [
+ # "cat > NAME << LIMIT" case
+ re.compile(br' \$ \s*cat' + namepat + heredoclimitpat),
+ # "cat << LIMIT > NAME" case
+ re.compile(br' \$ \s*cat' + heredoclimitpat + namepat),
+ ]
+
+ def startsat(self, line):
+ # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
+ for filere in self._fileres:
+ matched = filere.match(line)
+ if matched:
+ return (matched.group('name'),
+ b' > %s\n' % matched.group('limit'))
+
+ def endsat(self, ctx, line):
+ return ctx[1] == line
+
+ def isinside(self, ctx, line):
+ return line.startswith(self._prefix)
+
+ def ignores(self, ctx):
+ return b' > %s\n' % heredocignorelimit == ctx[1]
+
+ def filename(self, ctx):
+ return ctx[0]
+
+ def codeatstart(self, ctx, line):
+ return None # no embedded code at start line
+
+ def codeatend(self, ctx, line):
+ return None # no embedded code at end line
+
+ def codeinside(self, ctx, line):
+ return line[len(self._prefix):] # strip prefix
+
+####
+# for embedded python script
+
+class pydoctestmatcher(embeddedmatcher):
+ """Detect ">>> code" style embedded python code
+
+ >>> matcher = pydoctestmatcher()
+ >>> startline = b' >>> foo = 1\\n'
+ >>> matcher.startsat(startline)
+ True
+ >>> matcher.startsat(b' ... foo = 1\\n')
+ False
+ >>> ctx = matcher.startsat(startline)
+ >>> matcher.filename(ctx)
+ >>> matcher.ignores(ctx)
+ False
+ >>> b2s(matcher.codeatstart(ctx, startline))
+ 'foo = 1\\n'
+ >>> inside = b' >>> foo = 1\\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ 'foo = 1\\n'
+ >>> inside = b' ... foo = 1\\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ 'foo = 1\\n'
+ >>> inside = b' expected output\\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ '\\n'
+ >>> inside = b' \\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ '\\n'
+ >>> end = b' $ foo bar\\n'
+ >>> matcher.endsat(ctx, end)
+ True
+ >>> matcher.codeatend(ctx, end)
+ >>> end = b'\\n'
+ >>> matcher.endsat(ctx, end)
+ True
+ >>> matcher.codeatend(ctx, end)
+ """
+ _prefix = b' >>> '
+ _prefixre = re.compile(br' (>>>|\.\.\.) ')
+
+ # If a line matches against not _prefixre but _outputre, that line
+ # is "an expected output line" (= not a part of code fragment).
+ #
+ # Strictly speaking, a line matching against "(#if|#else|#endif)"
+ # is also treated similarly in "inline python code" semantics by
+ # run-tests.py. But "directive line inside inline python code"
+ # should be rejected by Mercurial reviewers. Therefore, this
+ # regexp does not matche against such directive lines.
+ _outputre = re.compile(br' $| [^$]')
+
+ def __init__(self):
+ super(pydoctestmatcher, self).__init__(b"doctest style python code")
+
+ def startsat(self, line):
+ # ctx is "True"
+ return line.startswith(self._prefix)
+
+ def endsat(self, ctx, line):
+ return not (self._prefixre.match(line) or self._outputre.match(line))
+
+ def isinside(self, ctx, line):
+ return True # always true, if not yet ended
+
+ def ignores(self, ctx):
+ return False # should be checked always
+
+ def filename(self, ctx):
+ return None # no filename
+
+ def codeatstart(self, ctx, line):
+ return line[len(self._prefix):] # strip prefix ' >>> '/' ... '
+
+ def codeatend(self, ctx, line):
+ return None # no embedded code at end line
+
+ def codeinside(self, ctx, line):
+ if self._prefixre.match(line):
+ return line[len(self._prefix):] # strip prefix ' >>> '/' ... '
+ return b'\n' # an expected output line is treated as an empty line
+
+class pyheredocmatcher(embeddedmatcher):
+ """Detect "python << LIMIT" style embedded python code
+
+ >>> matcher = pyheredocmatcher()
+ >>> b2s(matcher.startsat(b' $ python << EOF\\n'))
+ ' > EOF\\n'
+ >>> b2s(matcher.startsat(b' $ $PYTHON <<EOF\\n'))
+ ' > EOF\\n'
+ >>> b2s(matcher.startsat(b' $ "$PYTHON"<< "EOF"\\n'))
+ ' > EOF\\n'
+ >>> b2s(matcher.startsat(b" $ $PYTHON << 'ANYLIMIT'\\n"))
+ ' > ANYLIMIT\\n'
+ >>> matcher.startsat(b' $ "$PYTHON" < EOF\\n')
+ >>> start = b' $ python << EOF\\n'
+ >>> ctx = matcher.startsat(start)
+ >>> matcher.codeatstart(ctx, start)
+ >>> matcher.filename(ctx)
+ >>> matcher.ignores(ctx)
+ False
+ >>> inside = b' > foo = 1\\n'
+ >>> matcher.endsat(ctx, inside)
+ False
+ >>> matcher.isinside(ctx, inside)
+ True
+ >>> b2s(matcher.codeinside(ctx, inside))
+ 'foo = 1\\n'
+ >>> end = b' > EOF\\n'
+ >>> matcher.endsat(ctx, end)
+ True
+ >>> matcher.codeatend(ctx, end)
+ >>> matcher.endsat(ctx, b' > EOFEOF\\n')
+ False
+ >>> ctx = matcher.startsat(b' $ python << NO_CHECK_EOF\\n')
+ >>> matcher.ignores(ctx)
+ True
+ """
+ _prefix = b' > '
+
+ _startre = re.compile(br' \$ (\$PYTHON|"\$PYTHON"|python).*' +
+ heredoclimitpat)
+
+ def __init__(self):
+ super(pyheredocmatcher, self).__init__(b"heredoc python invocation")
+
+ def startsat(self, line):
+ # ctx is END-LINE-OF-EMBEDDED-CODE
+ matched = self._startre.match(line)
+ if matched:
+ return b' > %s\n' % matched.group('limit')
+
+ def endsat(self, ctx, line):
+ return ctx == line
+
+ def isinside(self, ctx, line):
+ return line.startswith(self._prefix)
+
+ def ignores(self, ctx):
+ return b' > %s\n' % heredocignorelimit == ctx
+
+ def filename(self, ctx):
+ return None # no filename
+
+ def codeatstart(self, ctx, line):
+ return None # no embedded code at start line
+
+ def codeatend(self, ctx, line):
+ return None # no embedded code at end line
+
+ def codeinside(self, ctx, line):
+ return line[len(self._prefix):] # strip prefix
+
+_pymatchers = [
+ pydoctestmatcher(),
+ pyheredocmatcher(),
+ # use '[^<]+' instead of '\S+', in order to match against
+ # paths including whitespaces
+ fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
+]
+
+def pyembedded(basefile, lines, errors):
+ return embedded(basefile, lines, errors, _pymatchers)
+
+####
+# for embedded shell script
+
+_shmatchers = [
+ # use '[^<]+' instead of '\S+', in order to match against
+ # paths including whitespaces
+ fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
+]
+
+def shembedded(basefile, lines, errors):
+ return embedded(basefile, lines, errors, _shmatchers)
+
+####
+# for embedded hgrc configuration
+
+_hgrcmatchers = [
+ # use '[^<]+' instead of '\S+', in order to match against
+ # paths including whitespaces
+ fileheredocmatcher(b'heredoc hgrc file',
+ br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
+]
+
+def hgrcembedded(basefile, lines, errors):
+ return embedded(basefile, lines, errors, _hgrcmatchers)
+
+####
+
+if __name__ == "__main__":
+ import optparse
+ import sys
+
+ def showembedded(basefile, lines, embeddedfunc, opts):
+ errors = []
+ for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
+ if not name:
+ name = b'<anonymous>'
+ writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
+ if opts.verbose and code:
+ writeout(b" |%s\n" %
+ b"\n |".join(l for l in code.splitlines()))
+ writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
+ for e in errors:
+ writeerr(b"%s\n" % e)
+ return len(errors)
+
+ def applyembedded(args, embeddedfunc, opts):
+ ret = 0
+ if args:
+ for f in args:
+ with opentext(f) as fp:
+ if showembedded(bytestr(f), fp, embeddedfunc, opts):
+ ret = 1
+ else:
+ lines = [l for l in stdin.readlines()]
+ if showembedded(b'<stdin>', lines, embeddedfunc, opts):
+ ret = 1
+ return ret
+
+ commands = {}
+ def command(name, desc):
+ def wrap(func):
+ commands[name] = (desc, func)
+ return wrap
+
+ @command("pyembedded", "detect embedded python script")
+ def pyembeddedcmd(args, opts):
+ return applyembedded(args, pyembedded, opts)
+
+ @command("shembedded", "detect embedded shell script")
+ def shembeddedcmd(args, opts):
+ return applyembedded(args, shembedded, opts)
+
+ @command("hgrcembedded", "detect embedded hgrc configuration")
+ def hgrcembeddedcmd(args, opts):
+ return applyembedded(args, hgrcembedded, opts)
+
+ availablecommands = "\n".join([" - %s: %s" % (key, value[0])
+ for key, value in commands.items()])
+
+ parser = optparse.OptionParser("""%prog COMMAND [file ...]
+
+Pick up embedded code fragments from given file(s) or stdin, and list
+up start/end lines of them in standard compiler format
+("FILENAME:LINENO:").
+
+Available commands are:
+""" + availablecommands + """
+""")
+ parser.add_option("-v", "--verbose",
+ help="enable additional output (e.g. actual code)",
+ action="store_true")
+ (opts, args) = parser.parse_args()
+
+ if not args or args[0] not in commands:
+ parser.print_help()
+ sys.exit(255)
+
+ sys.exit(commands[args[0]][1](args[1:], opts))