author | Gregory Szorc <gregory.szorc@gmail.com> |
Fri, 06 Nov 2020 13:58:59 -0800 | |
changeset 45830 | c102b704edb5 |
parent 43691 | 47ef023d0165 |
child 48875 | 6000f5b25c9b |
permissions | -rwxr-xr-x |
45830
c102b704edb5
global: use python3 in shebangs
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43691
diff
changeset
|
1 |
#!/usr/bin/env python3 |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
2 |
# |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
3 |
# posplit - split messages in paragraphs on .po/.pot files |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
4 |
# |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
5 |
# license: MIT/X11/Expat |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
6 |
# |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
7 |
|
29153
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
8 |
from __future__ import absolute_import, print_function |
29152
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
9 |
|
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
10 |
import polib |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
11 |
import re |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
12 |
import sys |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
13 |
|
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
14 |
|
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
15 |
def addentry(po, entry, cache): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
16 |
e = cache.get(entry.msgid) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
17 |
if e: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
18 |
e.occurrences.extend(entry.occurrences) |
39269
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
19 |
|
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
20 |
# merge comments from entry |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
21 |
for comment in entry.comment.split('\n'): |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
22 |
if comment and comment not in e.comment: |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
23 |
if not e.comment: |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
24 |
e.comment = comment |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
25 |
else: |
d0e8933d6dad
i18n: merge i18n comments of translatable texts correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
29153
diff
changeset
|
26 |
e.comment += '\n' + comment |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
27 |
else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
28 |
po.append(entry) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
29 |
cache[entry.msgid] = entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
30 |
|
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
31 |
|
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
32 |
def mkentry(orig, delta, msgid, msgstr): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
33 |
entry = polib.POEntry() |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
34 |
entry.merge(orig) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
35 |
entry.msgid = msgid or orig.msgid |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
36 |
entry.msgstr = msgstr or orig.msgstr |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
37 |
entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
38 |
return entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
39 |
|
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
40 |
|
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
41 |
if __name__ == "__main__": |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
42 |
po = polib.pofile(sys.argv[1]) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
43 |
|
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
44 |
cache = {} |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
45 |
entries = po[:] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
46 |
po[:] = [] |
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
47 |
findd = re.compile(r' *\.\. (\w+)::') # for finding directives |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
48 |
for entry in entries: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
49 |
msgids = entry.msgid.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
50 |
if entry.msgstr: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
51 |
msgstrs = entry.msgstr.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
52 |
else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
53 |
msgstrs = [u''] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
54 |
|
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
55 |
if len(msgids) != len(msgstrs): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
56 |
# places the whole existing translation as a fuzzy |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
57 |
# translation for each paragraph, to give the |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
58 |
# translator a chance to recover part of the old |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
59 |
# translation - erasing extra paragraphs is |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
60 |
# probably better than retranslating all from start |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
61 |
if 'fuzzy' not in entry.flags: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
62 |
entry.flags.append('fuzzy') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
63 |
msgstrs = [entry.msgstr] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
64 |
|
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
65 |
delta = 0 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
66 |
for msgid, msgstr in zip(msgids, msgstrs): |
20361
3fe079d3a2b4
i18n: posplit removes the entry "::" from the pot file
Simon Heimberg <simohe@besonet.ch>
parents:
20359
diff
changeset
|
67 |
if msgid and msgid != '::': |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
68 |
newentry = mkentry(entry, delta, msgid, msgstr) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
69 |
mdirective = findd.match(msgid) |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
70 |
if mdirective: |
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
71 |
if not msgid[mdirective.end() :].rstrip(): |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
72 |
# only directive, nothing to translate here |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
73 |
delta += 2 |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
74 |
continue |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
75 |
directive = mdirective.group(1) |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
76 |
if directive in ('container', 'include'): |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
77 |
if msgid.rstrip('\n').count('\n') == 0: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
78 |
# only rst syntax, nothing to translate |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
79 |
delta += 2 |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
80 |
continue |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
81 |
else: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
82 |
# lines following directly, unexpected |
43691
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
83 |
print( |
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
84 |
'Warning: text follows line with directive' |
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
85 |
' %s' % directive |
47ef023d0165
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
41759
diff
changeset
|
86 |
) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
87 |
comment = 'do not translate: .. %s::' % directive |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
88 |
if not newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
89 |
newentry.comment = comment |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
90 |
elif comment not in newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
91 |
newentry.comment += '\n' + comment |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
92 |
addentry(po, newentry, cache) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
93 |
delta += 2 + msgid.count('\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
94 |
po.save() |