i18n: cache the result of every gettext call
In looking at profiler output for 'hg log' on mozilla-central, I
noticed we spent a _huge_ amount of time in gettext relative to what
it's doing. Caching provides a roughly 15% performance improvement
even on repositories as small as hg.
== hg repo on linux ==
Before:
% cumulative self
time seconds seconds name
5.05 0.19 0.19 i18n.py:62:gettext
4.84 0.18 0.18 revlog.py:88:decompress
2.95 0.17 0.11 changelog.py:201:node
2.32 0.09 0.09 ui.py:577:write
2.11 0.08 0.08 i18n.py:72:gettext
2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers
1.89 0.07 0.07 obsolete.py:569:_load
1.68 0.63 0.06 localrepo.py:29:__get__
real 0m4.026s
user 0m3.993s
sys 0m0.034s
After:
% cumulative self
time seconds seconds name
8.05 0.26 0.26 revlog.py:88:decompress
2.68 0.22 0.09 color.py:395:write
2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers
1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers
1.95 0.06 0.06 ui.py:577:write
1.95 0.06 0.06 util.py:1228:datestr
1.71 0.06 0.06 utf_8.py:16:decode
1.71 0.06 0.06 revlog.py:273:__len__
real 0m3.519s
user 0m3.447s
sys 0m0.073s
== mozilla-central repo on linux ==
Before:
% cumulative self
time seconds seconds name
7.72 2.35 2.35 revlog.py:88:decompress
4.46 1.36 1.36 i18n.py:62:gettext
2.22 0.67 0.67 i18n.py:72:gettext
2.19 1.14 0.67 changelog.py:201:node
2.16 0.66 0.66 ui.py:577:write
1.96 0.60 0.60 utf_8.py:16:decode
1.93 1.97 0.59 color.py:395:write
1.85 0.81 0.56 changelog.py:136:tip
real 0m30.822s
user 0m30.660s
sys 0m0.149s
After:
% cumulative self
time seconds seconds name
9.82 2.49 2.49 revlog.py:88:decompress
2.67 1.31 0.68 localrepo.py:29:__get__
2.57 0.65 0.65 utf_8.py:16:decode
2.48 1.01 0.63 changelog.py:201:node
2.10 0.82 0.53 changelog.py:136:tip
2.01 0.51 0.51 ui.py:577:write
1.91 0.49 0.49 util.py:1232:datestr
1.85 1.65 0.47 color.py:395:write
real 0m25.619s
user 0m25.446s
sys 0m0.166s
== cpython repo on os x =
Before:
% cumulative self
time seconds seconds name
5.05 1.35 1.35 cmdutil.py:982:_show
4.59 1.22 1.22 revlog.py:274:__len__
3.98 1.06 1.06 i18n.py:62:gettext
3.91 1.04 1.04 revlog.py:1016:revision
3.68 0.98 0.98 revlog.py:337:parents
3.45 0.92 0.92 revlog.py:88:decompress
2.91 0.78 0.78 revlog.py:309:rev
2.62 0.70 0.70 revlog.py:1033:revision
real 0m30.414s
user 0m28.145s
sys 0m0.541s
After:
% cumulative self
time seconds seconds name
7.98 1.66 1.66 cmdutil.py:982:_show
6.83 1.42 1.42 changelog.py:46:decodeextra
5.18 1.08 1.08 revlog.py:274:__len__
3.94 0.82 0.82 revlog.py:1016:revision
3.41 0.71 0.71 revlog.py:309:rev
3.32 0.69 0.69 revlog.py:88:decompress
2.99 0.63 0.62 revlog.py:1033:revision
2.69 0.56 0.56 revlog.py:341:start
real 0m22.811s
user 0m21.883s
sys 0m0.397s
# parser.py - simple top-down operator precedence parser for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
# see http://effbot.org/zone/simple-top-down-parsing.htm and
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
# for background
# takes a tokenizer and elements
# tokenizer is an iterator that returns type, value pairs
# elements is a mapping of types to binding strength, prefix and infix actions
# an action is a tree node name, a tree label, and an optional match
# __call__(program) parses program into a labeled tree
import error
from i18n import _
class parser(object):
def __init__(self, tokenizer, elements, methods=None):
self._tokenizer = tokenizer
self._elements = elements
self._methods = methods
self.current = None
def _advance(self):
'advance the tokenizer'
t = self.current
try:
self.current = self._iter.next()
except StopIteration:
pass
return t
def _match(self, m, pos):
'make sure the tokenizer matches an end condition'
if self.current[0] != m:
raise error.ParseError(_("unexpected token: %s") % self.current[0],
self.current[2])
self._advance()
def _parse(self, bind=0):
token, value, pos = self._advance()
# handle prefix rules on current token
prefix = self._elements[token][1]
if not prefix:
raise error.ParseError(_("not a prefix: %s") % token, pos)
if len(prefix) == 1:
expr = (prefix[0], value)
else:
if len(prefix) > 2 and prefix[2] == self.current[0]:
self._match(prefix[2], pos)
expr = (prefix[0], None)
else:
expr = (prefix[0], self._parse(prefix[1]))
if len(prefix) > 2:
self._match(prefix[2], pos)
# gather tokens until we meet a lower binding strength
while bind < self._elements[self.current[0]][0]:
token, value, pos = self._advance()
e = self._elements[token]
# check for suffix - next token isn't a valid prefix
if len(e) == 4 and not self._elements[self.current[0]][1]:
suffix = e[3]
expr = (suffix[0], expr)
else:
# handle infix rules
if len(e) < 3 or not e[2]:
raise error.ParseError(_("not an infix: %s") % token, pos)
infix = e[2]
if len(infix) == 3 and infix[2] == self.current[0]:
self._match(infix[2], pos)
expr = (infix[0], expr, (None))
else:
expr = (infix[0], expr, self._parse(infix[1]))
if len(infix) == 3:
self._match(infix[2], pos)
return expr
def parse(self, message, lookup=None):
'generate a parse tree from a message'
if lookup:
self._iter = self._tokenizer(message, lookup)
else:
self._iter = self._tokenizer(message)
self._advance()
res = self._parse()
token, value, pos = self.current
return res, pos
def eval(self, tree):
'recursively evaluate a parse tree using node methods'
if not isinstance(tree, tuple):
return tree
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
def __call__(self, message):
'parse a message into a parse tree and evaluate if methods given'
t = self.parse(message)
if self._methods:
return self.eval(t)
return t