i18n/posplit
author Siddharth Agarwal <sid0@fb.com>
Sun, 10 Feb 2013 16:55:01 +0000
changeset 18651 e556659340f0
parent 11389 4fd49329a1b5
child 20359 ff6ab0b2ebf7
permissions -rwxr-xr-x
manifestmerge: fix order in which manifests are fetched If the manifest of an earlier revision on the same delta chain is read before that of a later revision, the revlog remembers that we parsed the earlier revision and continues applying deltas from there onwards. If manifests are parsed the other way round, we have to start over from the fulltext. For a fresh clone of mozilla-central, updating from 29dd80c95b7d to its parent aab96936a177 requires approximately 400 fewer zlib.decompress calls, which results in a speedup from 1.10 seconds to 1.05.

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid:
                newentry = mkentry(entry, delta, msgid, msgstr)
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()