tests/svnxml.py
author Mike Edgar <adgar@google.com>
Wed, 14 Jan 2015 15:16:08 -0500
changeset 24255 4bfe9f2d9761
parent 16512 c58bdecdb800
child 28947 812eb3b7dc43
permissions -rw-r--r--
revlog: addgroup checks if incoming deltas add censored revs, sets flag bit A censored revision stored in a revlog should have the censored revlog index flag bit set. This implies we must know if a revision is censored before we add it to the revlog. When adding revisions from exchanged deltas, we would prefer to determine this flag without decoding every single full text. This change introduces a heuristic based on assumptions around the Mercurial delta format and filelog metadata. Since deltas which produce a censored revision must be full-replacement deltas, we can read the delta's first bytes to check the filelog metadata. Since "censored" is the alphabetically first filelog metadata key, censored filelog revisions have a well-known prefix we can look for. For more on the design and background of the censorship feature, see: http://mercurial.selenic.com/wiki/CensorPlan

# Read the output of a "svn log --xml" command on stdin, parse it and
# print a subset of attributes common to all svn versions tested by
# hg.
import xml.dom.minidom, sys

def xmltext(e):
    return ''.join(c.data for c
                   in e.childNodes
                   if c.nodeType == c.TEXT_NODE)

def parseentry(entry):
    e = {}
    e['revision'] = entry.getAttribute('revision')
    e['author'] = xmltext(entry.getElementsByTagName('author')[0])
    e['msg'] = xmltext(entry.getElementsByTagName('msg')[0])
    e['paths'] = []
    paths = entry.getElementsByTagName('paths')
    if paths:
        paths = paths[0]
        for p in paths.getElementsByTagName('path'):
            action = p.getAttribute('action')
            path = xmltext(p)
            frompath = p.getAttribute('copyfrom-path')
            fromrev = p.getAttribute('copyfrom-rev')
            e['paths'].append((path, action, frompath, fromrev))
    return e

def parselog(data):
    entries = []
    doc = xml.dom.minidom.parseString(data)
    for e in doc.getElementsByTagName('logentry'):
        entries.append(parseentry(e))
    return entries

def printentries(entries):
    fp = sys.stdout
    for e in entries:
        for k in ('revision', 'author', 'msg'):
            fp.write(('%s: %s\n' % (k, e[k])).encode('utf-8'))
        for path, action, fpath, frev in sorted(e['paths']):
            frominfo = ''
            if frev:
                frominfo = ' (from %s@%s)' % (fpath, frev)
            p = ' %s %s%s\n' % (action, path, frominfo)
            fp.write(p.encode('utf-8'))

if __name__ == '__main__':
    data = sys.stdin.read()
    entries = parselog(data)
    printentries(entries)