view tests/svnxml.py @ 26945:8a256cee72c8 stable

tags: create new sortdict for performance reasons sortdict internally maintains a list of keys in insertion order. When a key is replaced via __setitem__, we .remove() from this list. This involves a linear scan and array adjustment. This is an expensive operation. The tags reading code was calling into sortdict.__setitem__ for each tag in a read .hgtags revision. For repositories with thousands of tags or thousands of .hgtags revisions, the overhead from list.remove() noticeable. This patch creates a new sortdict() so __setitem__ calls don't incur a list.remove. This doesn't appear to have any performance impact on my Firefox repository. But that's only because tags reading doesn't show up in profiles to begin with. I'm still waiting to hear from a user with over 10,000 tags and hundreds of heads on the impact of this patch.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 12 Nov 2015 13:16:04 -0800
parents c58bdecdb800
children 812eb3b7dc43
line wrap: on
line source

# Read the output of a "svn log --xml" command on stdin, parse it and
# print a subset of attributes common to all svn versions tested by
# hg.
import xml.dom.minidom, sys

def xmltext(e):
    return ''.join(c.data for c
                   in e.childNodes
                   if c.nodeType == c.TEXT_NODE)

def parseentry(entry):
    e = {}
    e['revision'] = entry.getAttribute('revision')
    e['author'] = xmltext(entry.getElementsByTagName('author')[0])
    e['msg'] = xmltext(entry.getElementsByTagName('msg')[0])
    e['paths'] = []
    paths = entry.getElementsByTagName('paths')
    if paths:
        paths = paths[0]
        for p in paths.getElementsByTagName('path'):
            action = p.getAttribute('action')
            path = xmltext(p)
            frompath = p.getAttribute('copyfrom-path')
            fromrev = p.getAttribute('copyfrom-rev')
            e['paths'].append((path, action, frompath, fromrev))
    return e

def parselog(data):
    entries = []
    doc = xml.dom.minidom.parseString(data)
    for e in doc.getElementsByTagName('logentry'):
        entries.append(parseentry(e))
    return entries

def printentries(entries):
    fp = sys.stdout
    for e in entries:
        for k in ('revision', 'author', 'msg'):
            fp.write(('%s: %s\n' % (k, e[k])).encode('utf-8'))
        for path, action, fpath, frev in sorted(e['paths']):
            frominfo = ''
            if frev:
                frominfo = ' (from %s@%s)' % (fpath, frev)
            p = ' %s %s%s\n' % (action, path, frominfo)
            fp.write(p.encode('utf-8'))

if __name__ == '__main__':
    data = sys.stdin.read()
    entries = parselog(data)
    printentries(entries)