i18n/posplit
author Christian Ebert <blacktrash@gmx.net>
Fri, 08 Oct 2010 18:39:46 +0100
changeset 12630 94926263b7ab
parent 11389 4fd49329a1b5
child 20359 ff6ab0b2ebf7
permissions -rwxr-xr-x
keyword: specific regular expressions depending on read mode More safeguarding against accidental (un)expansion: Reading filelog: act only on \$(kw1|kw2|..)\$ as keywords are always stored unexpanded. Reading wdir: act only on \$(kw1|kw2|..): [^$\n\r]*? \$ as we only are interested in expanded keywords in this situation. Note: we cannot use ..): [^$\n\r]+? \$ because e.g. the {branch} template might be empty. hg record is a special case as we read from the working directory and need one regex each for modified and added files. Therefore test recording an added file. This way we finally also forbid sequences like $Id: $ being treated as keywords.

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid:
                newentry = mkentry(entry, delta, msgid, msgstr)
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()