changelog: lazily parse description
Before, the description field was converted to a localstr at parse
time. With this patch, we store the raw description and convert to
a localstr when it is first accessed.
We see a revset speedup for revsets that don't access the description:
author(mpm)
0.896565
0.914234
0.869085
date(2015)
0.878797
0.891980
0.862525
extra(rebase_source)
0.865446
0.912514
0.871500
author(mpm) or author(greg)
1.801832
1.860402
1.791589
date(2015) or branch(default)
0.968276
0.994673
0.974027
author(mpm) or desc(bug) or date(2015) or extra(rebase_source)
3.656193
3.721032
3.643593
As you can see, most of these revsets are already faster than from
before this refactoring: we have already offset the performance
loss from the introduction of the new class representing parsed
changelog entries!
# Read the output of a "svn log --xml" command on stdin, parse it and
# print a subset of attributes common to all svn versions tested by
# hg.
import xml.dom.minidom, sys
def xmltext(e):
return ''.join(c.data for c
in e.childNodes
if c.nodeType == c.TEXT_NODE)
def parseentry(entry):
e = {}
e['revision'] = entry.getAttribute('revision')
e['author'] = xmltext(entry.getElementsByTagName('author')[0])
e['msg'] = xmltext(entry.getElementsByTagName('msg')[0])
e['paths'] = []
paths = entry.getElementsByTagName('paths')
if paths:
paths = paths[0]
for p in paths.getElementsByTagName('path'):
action = p.getAttribute('action')
path = xmltext(p)
frompath = p.getAttribute('copyfrom-path')
fromrev = p.getAttribute('copyfrom-rev')
e['paths'].append((path, action, frompath, fromrev))
return e
def parselog(data):
entries = []
doc = xml.dom.minidom.parseString(data)
for e in doc.getElementsByTagName('logentry'):
entries.append(parseentry(e))
return entries
def printentries(entries):
fp = sys.stdout
for e in entries:
for k in ('revision', 'author', 'msg'):
fp.write(('%s: %s\n' % (k, e[k])).encode('utf-8'))
for path, action, fpath, frev in sorted(e['paths']):
frominfo = ''
if frev:
frominfo = ' (from %s@%s)' % (fpath, frev)
p = ' %s %s%s\n' % (action, path, frominfo)
fp.write(p.encode('utf-8'))
if __name__ == '__main__':
data = sys.stdin.read()
entries = parselog(data)
printentries(entries)