matcher: use re2 bindings if available
There are two sets of Python re2 bindings available on the internet;
this code works with both.
Using re2 can greatly improve "hg status" performance when a .hgignore
file becomes even modestly complex.
Example: "hg status" on a clean tree with 134K files, where "hg
debugignore" reports a regexp 4256 bytes in size.
no .hgignore: 1.76 sec
Python re: 2.79
re2: 1.82
The overhead of regexp matching drops from 1.03 seconds with stock
re to 0.06 with re2.
(For comparison, a git repo with the same contents and .gitignore
file runs "git status -s" in 1.71 seconds, i.e. only slightly faster
than hg with re2.)
#!/usr/bin/env python
# Filter output by pyflakes to control which warnings we check
import sys, re, os
def makekey(message):
# "path/file:line: message"
match = re.search(r"(line \d+)", message)
line = ''
if match:
line = match.group(0)
message = re.sub(r"(line \d+)", '', message)
return re.sub(r"([^:]*):([^:]+):([^']*)('[^']*')(.*)$",
r'\3:\5:\4:\1:\2:' + line,
message)
lines = []
for line in sys.stdin:
# We whitelist tests
pats = [
r"imported but unused",
r"local variable '.*' is assigned to but never used",
r"unable to detect undefined names",
]
if not re.search('|'.join(pats), line):
continue
fn = line.split(':', 1)[0]
f = open(os.path.join(os.path.dirname(os.path.dirname(__file__)), fn))
data = f.read()
f.close()
if 'no-check-code' in data:
continue
lines.append(line)
for line in sorted(lines, key = makekey):
sys.stdout.write(line)
print