Mercurial > hg
changeset 11389:4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
With fixes and heavy refactoring by Martin Geisler.
author | Wagner Bruna <wbruna@yahoo.com> |
---|---|
date | Thu, 17 Jun 2010 20:10:34 -0300 |
parents | db957a72fbd7 |
children | 11cd65611f3f |
files | i18n/posplit |
diffstat | 1 files changed, 56 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/i18n/posplit Thu Jun 17 20:10:34 2010 -0300 @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# +# posplit - split messages in paragraphs on .po/.pot files +# +# license: MIT/X11/Expat +# + +import sys +import polib + +def addentry(po, entry, cache): + e = cache.get(entry.msgid) + if e: + e.occurrences.extend(entry.occurrences) + else: + po.append(entry) + cache[entry.msgid] = entry + +def mkentry(orig, delta, msgid, msgstr): + entry = polib.POEntry() + entry.merge(orig) + entry.msgid = msgid or orig.msgid + entry.msgstr = msgstr or orig.msgstr + entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] + return entry + +if __name__ == "__main__": + po = polib.pofile(sys.argv[1]) + + cache = {} + entries = po[:] + po[:] = [] + for entry in entries: + msgids = entry.msgid.split(u'\n\n') + if entry.msgstr: + msgstrs = entry.msgstr.split(u'\n\n') + else: + msgstrs = [u''] * len(msgids) + + if len(msgids) != len(msgstrs): + # places the whole existing translation as a fuzzy + # translation for each paragraph, to give the + # translator a chance to recover part of the old + # translation - erasing extra paragraphs is + # probably better than retranslating all from start + if 'fuzzy' not in entry.flags: + entry.flags.append('fuzzy') + msgstrs = [entry.msgstr] * len(msgids) + + delta = 0 + for msgid, msgstr in zip(msgids, msgstrs): + if msgid: + newentry = mkentry(entry, delta, msgid, msgstr) + addentry(po, newentry, cache) + delta += 2 + msgid.count('\n') + po.save()