i18n: cache the result of every gettext call
In looking at profiler output for 'hg log' on mozilla-central, I
noticed we spent a _huge_ amount of time in gettext relative to what
it's doing. Caching provides a roughly 15% performance improvement
even on repositories as small as hg.
== hg repo on linux ==
Before:
% cumulative self
time seconds seconds name
5.05 0.19 0.19 i18n.py:62:gettext
4.84 0.18 0.18 revlog.py:88:decompress
2.95 0.17 0.11 changelog.py:201:node
2.32 0.09 0.09 ui.py:577:write
2.11 0.08 0.08 i18n.py:72:gettext
2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers
1.89 0.07 0.07 obsolete.py:569:_load
1.68 0.63 0.06 localrepo.py:29:__get__
real 0m4.026s
user 0m3.993s
sys 0m0.034s
After:
% cumulative self
time seconds seconds name
8.05 0.26 0.26 revlog.py:88:decompress
2.68 0.22 0.09 color.py:395:write
2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers
1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers
1.95 0.06 0.06 ui.py:577:write
1.95 0.06 0.06 util.py:1228:datestr
1.71 0.06 0.06 utf_8.py:16:decode
1.71 0.06 0.06 revlog.py:273:__len__
real 0m3.519s
user 0m3.447s
sys 0m0.073s
== mozilla-central repo on linux ==
Before:
% cumulative self
time seconds seconds name
7.72 2.35 2.35 revlog.py:88:decompress
4.46 1.36 1.36 i18n.py:62:gettext
2.22 0.67 0.67 i18n.py:72:gettext
2.19 1.14 0.67 changelog.py:201:node
2.16 0.66 0.66 ui.py:577:write
1.96 0.60 0.60 utf_8.py:16:decode
1.93 1.97 0.59 color.py:395:write
1.85 0.81 0.56 changelog.py:136:tip
real 0m30.822s
user 0m30.660s
sys 0m0.149s
After:
% cumulative self
time seconds seconds name
9.82 2.49 2.49 revlog.py:88:decompress
2.67 1.31 0.68 localrepo.py:29:__get__
2.57 0.65 0.65 utf_8.py:16:decode
2.48 1.01 0.63 changelog.py:201:node
2.10 0.82 0.53 changelog.py:136:tip
2.01 0.51 0.51 ui.py:577:write
1.91 0.49 0.49 util.py:1232:datestr
1.85 1.65 0.47 color.py:395:write
real 0m25.619s
user 0m25.446s
sys 0m0.166s
== cpython repo on os x =
Before:
% cumulative self
time seconds seconds name
5.05 1.35 1.35 cmdutil.py:982:_show
4.59 1.22 1.22 revlog.py:274:__len__
3.98 1.06 1.06 i18n.py:62:gettext
3.91 1.04 1.04 revlog.py:1016:revision
3.68 0.98 0.98 revlog.py:337:parents
3.45 0.92 0.92 revlog.py:88:decompress
2.91 0.78 0.78 revlog.py:309:rev
2.62 0.70 0.70 revlog.py:1033:revision
real 0m30.414s
user 0m28.145s
sys 0m0.541s
After:
% cumulative self
time seconds seconds name
7.98 1.66 1.66 cmdutil.py:982:_show
6.83 1.42 1.42 changelog.py:46:decodeextra
5.18 1.08 1.08 revlog.py:274:__len__
3.94 0.82 0.82 revlog.py:1016:revision
3.41 0.71 0.71 revlog.py:309:rev
3.32 0.69 0.69 revlog.py:88:decompress
2.99 0.63 0.62 revlog.py:1033:revision
2.69 0.56 0.56 revlog.py:341:start
real 0m22.811s
user 0m21.883s
sys 0m0.397s
# Perforce source for convert extension.
#
# Copyright 2009, Frank Kingswood <frank@kingswood-consulting.co.uk>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from mercurial import util
from mercurial.i18n import _
from common import commit, converter_source, checktool, NoRepo
import marshal
import re
def loaditer(f):
"Yield the dictionary objects generated by p4"
try:
while True:
d = marshal.load(f)
if not d:
break
yield d
except EOFError:
pass
class p4_source(converter_source):
def __init__(self, ui, path, rev=None):
super(p4_source, self).__init__(ui, path, rev=rev)
if "/" in path and not path.startswith('//'):
raise NoRepo(_('%s does not look like a P4 repository') % path)
checktool('p4', abort=False)
self.p4changes = {}
self.heads = {}
self.changeset = {}
self.files = {}
self.tags = {}
self.lastbranch = {}
self.parent = {}
self.encoding = "latin_1"
self.depotname = {} # mapping from local name to depot name
self.re_type = re.compile(
"([a-z]+)?(text|binary|symlink|apple|resource|unicode|utf\d+)"
"(\+\w+)?$")
self.re_keywords = re.compile(
r"\$(Id|Header|Date|DateTime|Change|File|Revision|Author)"
r":[^$\n]*\$")
self.re_keywords_old = re.compile("\$(Id|Header):[^$\n]*\$")
self._parse(ui, path)
def _parse_view(self, path):
"Read changes affecting the path"
cmd = 'p4 -G changes -s submitted %s' % util.shellquote(path)
stdout = util.popen(cmd, mode='rb')
for d in loaditer(stdout):
c = d.get("change", None)
if c:
self.p4changes[c] = True
def _parse(self, ui, path):
"Prepare list of P4 filenames and revisions to import"
ui.status(_('reading p4 views\n'))
# read client spec or view
if "/" in path:
self._parse_view(path)
if path.startswith("//") and path.endswith("/..."):
views = {path[:-3]:""}
else:
views = {"//": ""}
else:
cmd = 'p4 -G client -o %s' % util.shellquote(path)
clientspec = marshal.load(util.popen(cmd, mode='rb'))
views = {}
for client in clientspec:
if client.startswith("View"):
sview, cview = clientspec[client].split()
self._parse_view(sview)
if sview.endswith("...") and cview.endswith("..."):
sview = sview[:-3]
cview = cview[:-3]
cview = cview[2:]
cview = cview[cview.find("/") + 1:]
views[sview] = cview
# list of changes that affect our source files
self.p4changes = self.p4changes.keys()
self.p4changes.sort(key=int)
# list with depot pathnames, longest first
vieworder = views.keys()
vieworder.sort(key=len, reverse=True)
# handle revision limiting
startrev = self.ui.config('convert', 'p4.startrev', default=0)
self.p4changes = [x for x in self.p4changes
if ((not startrev or int(x) >= int(startrev)) and
(not self.rev or int(x) <= int(self.rev)))]
# now read the full changelists to get the list of file revisions
ui.status(_('collecting p4 changelists\n'))
lastid = None
for change in self.p4changes:
cmd = "p4 -G describe -s %s" % change
stdout = util.popen(cmd, mode='rb')
d = marshal.load(stdout)
desc = self.recode(d.get("desc", ""))
shortdesc = desc.split("\n", 1)[0]
t = '%s %s' % (d["change"], repr(shortdesc)[1:-1])
ui.status(util.ellipsis(t, 80) + '\n')
if lastid:
parents = [lastid]
else:
parents = []
date = (int(d["time"]), 0) # timezone not set
c = commit(author=self.recode(d["user"]),
date=util.datestr(date, '%Y-%m-%d %H:%M:%S %1%2'),
parents=parents, desc=desc, branch='',
extra={"p4": change})
files = []
i = 0
while ("depotFile%d" % i) in d and ("rev%d" % i) in d:
oldname = d["depotFile%d" % i]
filename = None
for v in vieworder:
if oldname.startswith(v):
filename = views[v] + oldname[len(v):]
break
if filename:
files.append((filename, d["rev%d" % i]))
self.depotname[filename] = oldname
i += 1
self.changeset[change] = c
self.files[change] = files
lastid = change
if lastid:
self.heads = [lastid]
def getheads(self):
return self.heads
def getfile(self, name, rev):
cmd = 'p4 -G print %s' \
% util.shellquote("%s#%s" % (self.depotname[name], rev))
stdout = util.popen(cmd, mode='rb')
mode = None
contents = ""
keywords = None
for d in loaditer(stdout):
code = d["code"]
data = d.get("data")
if code == "error":
raise IOError(d["generic"], data)
elif code == "stat":
if d.get("action") == "purge":
return None, None
p4type = self.re_type.match(d["type"])
if p4type:
mode = ""
flags = (p4type.group(1) or "") + (p4type.group(3) or "")
if "x" in flags:
mode = "x"
if p4type.group(2) == "symlink":
mode = "l"
if "ko" in flags:
keywords = self.re_keywords_old
elif "k" in flags:
keywords = self.re_keywords
elif code == "text" or code == "binary":
contents += data
if mode is None:
return None, None
if keywords:
contents = keywords.sub("$\\1$", contents)
if mode == "l" and contents.endswith("\n"):
contents = contents[:-1]
return contents, mode
def getchanges(self, rev, full):
if full:
raise util.Abort(_("convert from p4 do not support --full"))
return self.files[rev], {}
def getcommit(self, rev):
return self.changeset[rev]
def gettags(self):
return self.tags
def getchangedfiles(self, rev, i):
return sorted([x[0] for x in self.files[rev]])