i18n: cache the result of every gettext call
In looking at profiler output for 'hg log' on mozilla-central, I
noticed we spent a _huge_ amount of time in gettext relative to what
it's doing. Caching provides a roughly 15% performance improvement
even on repositories as small as hg.
== hg repo on linux ==
Before:
% cumulative self
time seconds seconds name
5.05 0.19 0.19 i18n.py:62:gettext
4.84 0.18 0.18 revlog.py:88:decompress
2.95 0.17 0.11 changelog.py:201:node
2.32 0.09 0.09 ui.py:577:write
2.11 0.08 0.08 i18n.py:72:gettext
2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers
1.89 0.07 0.07 obsolete.py:569:_load
1.68 0.63 0.06 localrepo.py:29:__get__
real 0m4.026s
user 0m3.993s
sys 0m0.034s
After:
% cumulative self
time seconds seconds name
8.05 0.26 0.26 revlog.py:88:decompress
2.68 0.22 0.09 color.py:395:write
2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers
1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers
1.95 0.06 0.06 ui.py:577:write
1.95 0.06 0.06 util.py:1228:datestr
1.71 0.06 0.06 utf_8.py:16:decode
1.71 0.06 0.06 revlog.py:273:__len__
real 0m3.519s
user 0m3.447s
sys 0m0.073s
== mozilla-central repo on linux ==
Before:
% cumulative self
time seconds seconds name
7.72 2.35 2.35 revlog.py:88:decompress
4.46 1.36 1.36 i18n.py:62:gettext
2.22 0.67 0.67 i18n.py:72:gettext
2.19 1.14 0.67 changelog.py:201:node
2.16 0.66 0.66 ui.py:577:write
1.96 0.60 0.60 utf_8.py:16:decode
1.93 1.97 0.59 color.py:395:write
1.85 0.81 0.56 changelog.py:136:tip
real 0m30.822s
user 0m30.660s
sys 0m0.149s
After:
% cumulative self
time seconds seconds name
9.82 2.49 2.49 revlog.py:88:decompress
2.67 1.31 0.68 localrepo.py:29:__get__
2.57 0.65 0.65 utf_8.py:16:decode
2.48 1.01 0.63 changelog.py:201:node
2.10 0.82 0.53 changelog.py:136:tip
2.01 0.51 0.51 ui.py:577:write
1.91 0.49 0.49 util.py:1232:datestr
1.85 1.65 0.47 color.py:395:write
real 0m25.619s
user 0m25.446s
sys 0m0.166s
== cpython repo on os x =
Before:
% cumulative self
time seconds seconds name
5.05 1.35 1.35 cmdutil.py:982:_show
4.59 1.22 1.22 revlog.py:274:__len__
3.98 1.06 1.06 i18n.py:62:gettext
3.91 1.04 1.04 revlog.py:1016:revision
3.68 0.98 0.98 revlog.py:337:parents
3.45 0.92 0.92 revlog.py:88:decompress
2.91 0.78 0.78 revlog.py:309:rev
2.62 0.70 0.70 revlog.py:1033:revision
real 0m30.414s
user 0m28.145s
sys 0m0.541s
After:
% cumulative self
time seconds seconds name
7.98 1.66 1.66 cmdutil.py:982:_show
6.83 1.42 1.42 changelog.py:46:decodeextra
5.18 1.08 1.08 revlog.py:274:__len__
3.94 0.82 0.82 revlog.py:1016:revision
3.41 0.71 0.71 revlog.py:309:rev
3.32 0.69 0.69 revlog.py:88:decompress
2.99 0.63 0.62 revlog.py:1033:revision
2.69 0.56 0.56 revlog.py:341:start
real 0m22.811s
user 0m21.883s
sys 0m0.397s
--- a/mercurial/i18n.py Sat Aug 30 17:38:14 2014 +0200
+++ b/mercurial/i18n.py Fri Oct 17 13:52:10 2014 -0400
@@ -41,6 +41,8 @@
global _ugettext
_ugettext = t.ugettext
+_msgcache = {}
+
def gettext(message):
"""Translate message.
@@ -55,24 +57,26 @@
if message is None or not _ugettext:
return message
- if type(message) is unicode:
- # goofy unicode docstrings in test
- paragraphs = message.split(u'\n\n')
- else:
- paragraphs = [p.decode("ascii") for p in message.split('\n\n')]
- # Be careful not to translate the empty string -- it holds the
- # meta data of the .po file.
- u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs])
- try:
- # encoding.tolocal cannot be used since it will first try to
- # decode the Unicode string. Calling u.decode(enc) really
- # means u.encode(sys.getdefaultencoding()).decode(enc). Since
- # the Python encoding defaults to 'ascii', this fails if the
- # translated string use non-ASCII characters.
- return u.encode(encoding.encoding, "replace")
- except LookupError:
- # An unknown encoding results in a LookupError.
- return message
+ if message not in _msgcache:
+ if type(message) is unicode:
+ # goofy unicode docstrings in test
+ paragraphs = message.split(u'\n\n')
+ else:
+ paragraphs = [p.decode("ascii") for p in message.split('\n\n')]
+ # Be careful not to translate the empty string -- it holds the
+ # meta data of the .po file.
+ u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs])
+ try:
+ # encoding.tolocal cannot be used since it will first try to
+ # decode the Unicode string. Calling u.decode(enc) really
+ # means u.encode(sys.getdefaultencoding()).decode(enc). Since
+ # the Python encoding defaults to 'ascii', this fails if the
+ # translated string use non-ASCII characters.
+ _msgcache[message] = u.encode(encoding.encoding, "replace")
+ except LookupError:
+ # An unknown encoding results in a LookupError.
+ _msgcache[message] = message
+ return _msgcache[message]
def _plain():
if 'HGPLAIN' not in os.environ and 'HGPLAINEXCEPT' not in os.environ: