view tests/svnxml.py @ 28205:53f42c8d5f71

verify: show progress while verifying dirlogs In repos with treemanifests, the non-root-directory dirlogs often have many more total revisions than the root manifest log has. This change adds progress out to that part of 'hg verify'. Since the verification is recursive along the directory tree, we don't know how many total revisions there are at the beginning of the command, so instead we report progress in units of directories, much like we report progress for verification of files today. I'm not very happy with passing both 'storefiles' and 'progress' into the recursive calls. I tried passing in just a 'visitdir(dir)' callback, but the results did not seem better overall. I'm happy to update if anyone has better ideas.
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 11 Feb 2016 15:38:56 -0800
parents c58bdecdb800
children 812eb3b7dc43
line wrap: on
line source

# Read the output of a "svn log --xml" command on stdin, parse it and
# print a subset of attributes common to all svn versions tested by
# hg.
import xml.dom.minidom, sys

def xmltext(e):
    return ''.join(c.data for c
                   in e.childNodes
                   if c.nodeType == c.TEXT_NODE)

def parseentry(entry):
    e = {}
    e['revision'] = entry.getAttribute('revision')
    e['author'] = xmltext(entry.getElementsByTagName('author')[0])
    e['msg'] = xmltext(entry.getElementsByTagName('msg')[0])
    e['paths'] = []
    paths = entry.getElementsByTagName('paths')
    if paths:
        paths = paths[0]
        for p in paths.getElementsByTagName('path'):
            action = p.getAttribute('action')
            path = xmltext(p)
            frompath = p.getAttribute('copyfrom-path')
            fromrev = p.getAttribute('copyfrom-rev')
            e['paths'].append((path, action, frompath, fromrev))
    return e

def parselog(data):
    entries = []
    doc = xml.dom.minidom.parseString(data)
    for e in doc.getElementsByTagName('logentry'):
        entries.append(parseentry(e))
    return entries

def printentries(entries):
    fp = sys.stdout
    for e in entries:
        for k in ('revision', 'author', 'msg'):
            fp.write(('%s: %s\n' % (k, e[k])).encode('utf-8'))
        for path, action, fpath, frev in sorted(e['paths']):
            frominfo = ''
            if frev:
                frominfo = ' (from %s@%s)' % (fpath, frev)
            p = ' %s %s%s\n' % (action, path, frominfo)
            fp.write(p.encode('utf-8'))

if __name__ == '__main__':
    data = sys.stdin.read()
    entries = parselog(data)
    printentries(entries)