# HG changeset patch # User Bryan O'Sullivan # Date 1338589580 25200 # Node ID 8d08a28aa63e6239544b705a7835b14ea6b119e4 # Parent 87882c8753d47edbd71d527e9a7e12c1f1fe0eef matcher: use re2 bindings if available There are two sets of Python re2 bindings available on the internet; this code works with both. Using re2 can greatly improve "hg status" performance when a .hgignore file becomes even modestly complex. Example: "hg status" on a clean tree with 134K files, where "hg debugignore" reports a regexp 4256 bytes in size. no .hgignore: 1.76 sec Python re: 2.79 re2: 1.82 The overhead of regexp matching drops from 1.03 seconds with stock re to 0.06 with re2. (For comparison, a git repo with the same contents and .gitignore file runs "git status -s" in 1.71 seconds, i.e. only slightly faster than hg with re2.) diff -r 87882c8753d4 -r 8d08a28aa63e mercurial/match.py --- a/mercurial/match.py Thu Jun 07 01:42:50 2012 +0200 +++ b/mercurial/match.py Fri Jun 01 15:26:20 2012 -0700 @@ -9,6 +9,14 @@ import scmutil, util, fileset from i18n import _ +def _rematcher(pat): + m = util.compilere(pat) + try: + # slightly faster, provided by facebook's re2 bindings + return m.test_match + except AttributeError: + return m.match + def _expandsets(pats, ctx): '''convert set: patterns into a list of files in the given context''' fset = set() @@ -280,7 +288,7 @@ pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats]) if len(pat) > 20000: raise OverflowError - return pat, re.compile(pat).match + return pat, _rematcher(pat) except OverflowError: # We're using a Python with a tiny regex engine and we # made it explode, so we'll divide the pattern list in two @@ -294,7 +302,7 @@ except re.error: for k, p in pats: try: - re.compile('(?:%s)' % _regex(k, p, tail)) + _rematcher('(?:%s)' % _regex(k, p, tail)) except re.error: raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) raise util.Abort(_("invalid pattern")) diff -r 87882c8753d4 -r 8d08a28aa63e mercurial/util.py --- a/mercurial/util.py Thu Jun 07 01:42:50 2012 +0200 +++ b/mercurial/util.py Fri Jun 01 15:26:20 2012 -0700 @@ -629,6 +629,30 @@ except OSError: return True +try: + import re2 + _re2 = None +except ImportError: + _re2 = False + +def compilere(pat): + '''Compile a regular expression, using re2 if possible + + For best performance, use only re2-compatible regexp features.''' + global _re2 + if _re2 is None: + try: + re2.compile + _re2 = True + except ImportError: + _re2 = False + if _re2: + try: + return re2.compile(pat) + except re2.error: + pass + return re.compile(pat) + _fspathcache = {} def fspath(name, root): '''Get name in the case stored in the filesystem