i18n: script for splitting large messages on .po/.pot files
authorWagner Bruna <wbruna@yahoo.com>
Thu, 17 Jun 2010 20:10:34 -0300
changeset 11389 4fd49329a1b5
parent 11388 db957a72fbd7
child 11390 11cd65611f3f
i18n: script for splitting large messages on .po/.pot files With fixes and heavy refactoring by Martin Geisler.
i18n/posplit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/posplit	Thu Jun 17 20:10:34 2010 -0300
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# posplit - split messages in paragraphs on .po/.pot files
+#
+# license: MIT/X11/Expat
+#
+
+import sys
+import polib
+
+def addentry(po, entry, cache):
+    e = cache.get(entry.msgid)
+    if e:
+        e.occurrences.extend(entry.occurrences)
+    else:
+        po.append(entry)
+        cache[entry.msgid] = entry
+
+def mkentry(orig, delta, msgid, msgstr):
+    entry = polib.POEntry()
+    entry.merge(orig)
+    entry.msgid = msgid or orig.msgid
+    entry.msgstr = msgstr or orig.msgstr
+    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
+    return entry
+
+if __name__ == "__main__":
+    po = polib.pofile(sys.argv[1])
+
+    cache = {}
+    entries = po[:]
+    po[:] = []
+    for entry in entries:
+        msgids = entry.msgid.split(u'\n\n')
+        if entry.msgstr:
+            msgstrs = entry.msgstr.split(u'\n\n')
+        else:
+            msgstrs = [u''] * len(msgids)
+
+        if len(msgids) != len(msgstrs):
+            # places the whole existing translation as a fuzzy
+            # translation for each paragraph, to give the
+            # translator a chance to recover part of the old
+            # translation - erasing extra paragraphs is
+            # probably better than retranslating all from start
+            if 'fuzzy' not in entry.flags:
+                entry.flags.append('fuzzy')
+            msgstrs = [entry.msgstr] * len(msgids)
+
+        delta = 0
+        for msgid, msgstr in zip(msgids, msgstrs):
+            if msgid:
+                newentry = mkentry(entry, delta, msgid, msgstr)
+                addentry(po, newentry, cache)
+            delta += 2 + msgid.count('\n')
+    po.save()