Mercurial > hg
annotate i18n/posplit @ 33449:5747967e257c
phase: put retractboundary out of the loop in advanceboundary
It seems that we were calling retractboundary for each phases to process.
Putting the retractboundary out of the loop reduce the number of calls,
helping tracking the phases changes.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Mon, 10 Jul 2017 22:22:42 +0200 |
parents | 90d84e1e427a |
children | d0e8933d6dad |
rev | line source |
---|---|
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
2 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
3 # posplit - split messages in paragraphs on .po/.pot files |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
4 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
5 # license: MIT/X11/Expat |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
6 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
7 |
29153
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
8 from __future__ import absolute_import, print_function |
29152
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
9 |
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
10 import polib |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
11 import re |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
12 import sys |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
13 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
14 def addentry(po, entry, cache): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
15 e = cache.get(entry.msgid) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
16 if e: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
17 e.occurrences.extend(entry.occurrences) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
18 else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
19 po.append(entry) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
20 cache[entry.msgid] = entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
21 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
22 def mkentry(orig, delta, msgid, msgstr): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
23 entry = polib.POEntry() |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
24 entry.merge(orig) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
25 entry.msgid = msgid or orig.msgid |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
26 entry.msgstr = msgstr or orig.msgstr |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
27 entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
28 return entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
29 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
30 if __name__ == "__main__": |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
31 po = polib.pofile(sys.argv[1]) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
32 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
33 cache = {} |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
34 entries = po[:] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
35 po[:] = [] |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
36 findd = re.compile(r' *\.\. (\w+)::') # for finding directives |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
37 for entry in entries: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
38 msgids = entry.msgid.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
39 if entry.msgstr: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
40 msgstrs = entry.msgstr.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
41 else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
42 msgstrs = [u''] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
43 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
44 if len(msgids) != len(msgstrs): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
45 # places the whole existing translation as a fuzzy |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
46 # translation for each paragraph, to give the |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
47 # translator a chance to recover part of the old |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
48 # translation - erasing extra paragraphs is |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
49 # probably better than retranslating all from start |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
50 if 'fuzzy' not in entry.flags: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
51 entry.flags.append('fuzzy') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
52 msgstrs = [entry.msgstr] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
53 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
54 delta = 0 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
55 for msgid, msgstr in zip(msgids, msgstrs): |
20361
3fe079d3a2b4
i18n: posplit removes the entry "::" from the pot file
Simon Heimberg <simohe@besonet.ch>
parents:
20359
diff
changeset
|
56 if msgid and msgid != '::': |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
57 newentry = mkentry(entry, delta, msgid, msgstr) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
58 mdirective = findd.match(msgid) |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
59 if mdirective: |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
60 if not msgid[mdirective.end():].rstrip(): |
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
61 # only directive, nothing to translate here |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
62 delta += 2 |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
63 continue |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
64 directive = mdirective.group(1) |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
65 if directive in ('container', 'include'): |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
66 if msgid.rstrip('\n').count('\n') == 0: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
67 # only rst syntax, nothing to translate |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
68 delta += 2 |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
69 continue |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
70 else: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
71 # lines following directly, unexpected |
29153
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
72 print('Warning: text follows line with directive' \ |
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
73 ' %s' % directive) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
74 comment = 'do not translate: .. %s::' % directive |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
75 if not newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
76 newentry.comment = comment |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
77 elif comment not in newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
78 newentry.comment += '\n' + comment |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
79 addentry(po, newentry, cache) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
80 delta += 2 + msgid.count('\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
81 po.save() |