i18n/posplit
author Patrick Mezard <pmezard@gmail.com>
Fri, 27 May 2011 21:50:09 +0200
changeset 14451 c78d41db6f88
parent 11389 4fd49329a1b5
child 20359 ff6ab0b2ebf7
permissions -rwxr-xr-x
patch: refactor file creation/removal detection The patcher has to know if a file is being created or removed to check if the target already exists, or to actually unlink the file when a hunk emptying it is applied. This was done by embedding the creation/removal information in the first (and only) hunk attached to the file. There are two problems with this approach: - creation/removal is really a property of the file being patched and not its hunk. - for regular patches, file creation cannot be deduced at parsing time: there are case where the *stripped* file paths must be compared. Modifying hunks after their creation is clumsy and prevent further refactorings related to copies handling. Instead, we delegate this job to selectfile() which has all the relevant information, and remove the hunk createfile() and rmfile() methods.

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid:
                newentry = mkentry(entry, delta, msgid, msgstr)
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()