tests/filterpyflakes.py
author Bryan O'Sullivan <bryano@fb.com>
Fri, 01 Jun 2012 15:26:20 -0700
changeset 16943 8d08a28aa63e
parent 14209 08d84bdce1a5
child 19335 77440de177f7
permissions -rwxr-xr-x
matcher: use re2 bindings if available There are two sets of Python re2 bindings available on the internet; this code works with both. Using re2 can greatly improve "hg status" performance when a .hgignore file becomes even modestly complex. Example: "hg status" on a clean tree with 134K files, where "hg debugignore" reports a regexp 4256 bytes in size. no .hgignore: 1.76 sec Python re: 2.79 re2: 1.82 The overhead of regexp matching drops from 1.03 seconds with stock re to 0.06 with re2. (For comparison, a git repo with the same contents and .gitignore file runs "git status -s" in 1.71 seconds, i.e. only slightly faster than hg with re2.)

#!/usr/bin/env python

# Filter output by pyflakes to control which warnings we check

import sys, re, os

def makekey(message):
    # "path/file:line: message"
    match = re.search(r"(line \d+)", message)
    line = ''
    if match:
        line = match.group(0)
        message = re.sub(r"(line \d+)", '', message)
    return re.sub(r"([^:]*):([^:]+):([^']*)('[^']*')(.*)$",
                  r'\3:\5:\4:\1:\2:' + line,
                  message)

lines = []
for line in sys.stdin:
    # We whitelist tests
    pats = [
            r"imported but unused",
            r"local variable '.*' is assigned to but never used",
            r"unable to detect undefined names",
           ]
    if not re.search('|'.join(pats), line):
        continue
    fn = line.split(':', 1)[0]
    f = open(os.path.join(os.path.dirname(os.path.dirname(__file__)), fn))
    data = f.read()
    f.close()
    if 'no-check-code' in data:
        continue
    lines.append(line)

for line in sorted(lines, key = makekey):
    sys.stdout.write(line)
print