Mercurial > hg-stable
changeset 16943:8d08a28aa63e
matcher: use re2 bindings if available
There are two sets of Python re2 bindings available on the internet;
this code works with both.
Using re2 can greatly improve "hg status" performance when a .hgignore
file becomes even modestly complex.
Example: "hg status" on a clean tree with 134K files, where "hg
debugignore" reports a regexp 4256 bytes in size.
no .hgignore: 1.76 sec
Python re: 2.79
re2: 1.82
The overhead of regexp matching drops from 1.03 seconds with stock
re to 0.06 with re2.
(For comparison, a git repo with the same contents and .gitignore
file runs "git status -s" in 1.71 seconds, i.e. only slightly faster
than hg with re2.)
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Fri, 01 Jun 2012 15:26:20 -0700 |
parents | 87882c8753d4 |
children | 5d3d77b3c512 |
files | mercurial/match.py mercurial/util.py |
diffstat | 2 files changed, 34 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/match.py Thu Jun 07 01:42:50 2012 +0200 +++ b/mercurial/match.py Fri Jun 01 15:26:20 2012 -0700 @@ -9,6 +9,14 @@ import scmutil, util, fileset from i18n import _ +def _rematcher(pat): + m = util.compilere(pat) + try: + # slightly faster, provided by facebook's re2 bindings + return m.test_match + except AttributeError: + return m.match + def _expandsets(pats, ctx): '''convert set: patterns into a list of files in the given context''' fset = set() @@ -280,7 +288,7 @@ pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats]) if len(pat) > 20000: raise OverflowError - return pat, re.compile(pat).match + return pat, _rematcher(pat) except OverflowError: # We're using a Python with a tiny regex engine and we # made it explode, so we'll divide the pattern list in two @@ -294,7 +302,7 @@ except re.error: for k, p in pats: try: - re.compile('(?:%s)' % _regex(k, p, tail)) + _rematcher('(?:%s)' % _regex(k, p, tail)) except re.error: raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) raise util.Abort(_("invalid pattern"))
--- a/mercurial/util.py Thu Jun 07 01:42:50 2012 +0200 +++ b/mercurial/util.py Fri Jun 01 15:26:20 2012 -0700 @@ -629,6 +629,30 @@ except OSError: return True +try: + import re2 + _re2 = None +except ImportError: + _re2 = False + +def compilere(pat): + '''Compile a regular expression, using re2 if possible + + For best performance, use only re2-compatible regexp features.''' + global _re2 + if _re2 is None: + try: + re2.compile + _re2 = True + except ImportError: + _re2 = False + if _re2: + try: + return re2.compile(pat) + except re2.error: + pass + return re.compile(pat) + _fspathcache = {} def fspath(name, root): '''Get name in the case stored in the filesystem