i18n/posplit
author Martin von Zweigbergk <martinvonz@google.com>
Fri, 04 Dec 2015 16:45:06 -0800
changeset 27249 0e5aab543d85
parent 20363 e3ee7ec85a15
child 28074 a1924bc6e267
permissions -rwxr-xr-x
revlog: clarify which revision is added to 'tested' when using cached delta The tested delta revisions are added to the 'tested' set. These are the same revisions we pass to builddelta(). However, in one case, we add builddelta(rev)[3] to the set intead of adding 'rev' itself. In that particular case, that element is the same as the function's input revision (because self._generaldelta is true), so the effect is the same. Still, let's just add the function's input revision to avoid confusing future readers.

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import re
import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            continue
                        else:
                            # lines following directly, unexpected
                            print 'Warning: text follows line with directive' \
                                  ' %s' % directive
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()