i18n: script for splitting large messages on .po/.pot files
With fixes and heavy refactoring by Martin Geisler.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/posplit Thu Jun 17 20:10:34 2010 -0300
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# posplit - split messages in paragraphs on .po/.pot files
+#
+# license: MIT/X11/Expat
+#
+
+import sys
+import polib
+
+def addentry(po, entry, cache):
+ e = cache.get(entry.msgid)
+ if e:
+ e.occurrences.extend(entry.occurrences)
+ else:
+ po.append(entry)
+ cache[entry.msgid] = entry
+
+def mkentry(orig, delta, msgid, msgstr):
+ entry = polib.POEntry()
+ entry.merge(orig)
+ entry.msgid = msgid or orig.msgid
+ entry.msgstr = msgstr or orig.msgstr
+ entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
+ return entry
+
+if __name__ == "__main__":
+ po = polib.pofile(sys.argv[1])
+
+ cache = {}
+ entries = po[:]
+ po[:] = []
+ for entry in entries:
+ msgids = entry.msgid.split(u'\n\n')
+ if entry.msgstr:
+ msgstrs = entry.msgstr.split(u'\n\n')
+ else:
+ msgstrs = [u''] * len(msgids)
+
+ if len(msgids) != len(msgstrs):
+ # places the whole existing translation as a fuzzy
+ # translation for each paragraph, to give the
+ # translator a chance to recover part of the old
+ # translation - erasing extra paragraphs is
+ # probably better than retranslating all from start
+ if 'fuzzy' not in entry.flags:
+ entry.flags.append('fuzzy')
+ msgstrs = [entry.msgstr] * len(msgids)
+
+ delta = 0
+ for msgid, msgstr in zip(msgids, msgstrs):
+ if msgid:
+ newentry = mkentry(entry, delta, msgid, msgstr)
+ addentry(po, newentry, cache)
+ delta += 2 + msgid.count('\n')
+ po.save()