annotate i18n/posplit @ 16955:92e1c64ba0d4

parsers: add a C function to pack the dirstate This is about 9 times faster than the Python dirstate packing code. The relatively small speedup is due to the poor locality and memory access patterns caused by traversing dicts and other boxed Python values.
author Bryan O'Sullivan <bryano@fb.com>
date Wed, 30 May 2012 12:55:33 -0700
parents 4fd49329a1b5
children ff6ab0b2ebf7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11389
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
1 #!/usr/bin/env python
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
2 #
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
3 # posplit - split messages in paragraphs on .po/.pot files
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
4 #
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
5 # license: MIT/X11/Expat
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
6 #
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
7
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
8 import sys
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
9 import polib
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
10
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
11 def addentry(po, entry, cache):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
12 e = cache.get(entry.msgid)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
13 if e:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
14 e.occurrences.extend(entry.occurrences)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
15 else:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
16 po.append(entry)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
17 cache[entry.msgid] = entry
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
18
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
19 def mkentry(orig, delta, msgid, msgstr):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
20 entry = polib.POEntry()
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
21 entry.merge(orig)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
22 entry.msgid = msgid or orig.msgid
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
23 entry.msgstr = msgstr or orig.msgstr
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
24 entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
25 return entry
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
26
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
27 if __name__ == "__main__":
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
28 po = polib.pofile(sys.argv[1])
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
29
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
30 cache = {}
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
31 entries = po[:]
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
32 po[:] = []
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
33 for entry in entries:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
34 msgids = entry.msgid.split(u'\n\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
35 if entry.msgstr:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
36 msgstrs = entry.msgstr.split(u'\n\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
37 else:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
38 msgstrs = [u''] * len(msgids)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
39
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
40 if len(msgids) != len(msgstrs):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
41 # places the whole existing translation as a fuzzy
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
42 # translation for each paragraph, to give the
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
43 # translator a chance to recover part of the old
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
44 # translation - erasing extra paragraphs is
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
45 # probably better than retranslating all from start
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
46 if 'fuzzy' not in entry.flags:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
47 entry.flags.append('fuzzy')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
48 msgstrs = [entry.msgstr] * len(msgids)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
49
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
50 delta = 0
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
51 for msgid, msgstr in zip(msgids, msgstrs):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
52 if msgid:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
53 newentry = mkentry(entry, delta, msgid, msgstr)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
54 addentry(po, newentry, cache)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
55 delta += 2 + msgid.count('\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
56 po.save()