Mercurial > hg
annotate i18n/posplit @ 32151:4d504e541d3d
rebase: use matcher to optimize manifestmerge
The old merge code would call manifestmerge and calculate the complete diff
between the source to the destination. In many cases, like rebase, the vast
majority of differences between the source and destination are irrelevant
because they are differences between the destination and the common ancestor
only, and therefore don't affect the merge. Since most actions are 'keep', all
the effort to compute them is wasted.
Instead, let's compute the difference between the source and the common ancestor
and only perform the diff of those files against the merge destination. When
using treemanifest, this lets us avoid loading almost the entire tree when
rebasing from a very old ancestor. This speeds up rebase of an old stack of 27
commits by 20x.
In mozilla-central, without treemanifest, when rebasing a commit from
default~100000 to default, this speeds up the manifestmerge step from 2.6s to
1.2s. However, the additional diff adds an overhead to all manifestmerge calls,
especially for flat manifests. When rebasing a commit from default~1 to default
it appears to add 100ms in mozilla-central. While we could put this optimization
behind a flag, I think the fact that it makes merge O(number of changes being
applied) instead of O(number of changes between X and Y) justifies it.
author | Durham Goode <durham@fb.com> |
---|---|
date | Wed, 03 May 2017 10:43:59 -0700 |
parents | 90d84e1e427a |
children | d0e8933d6dad |
rev | line source |
---|---|
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
2 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
3 # posplit - split messages in paragraphs on .po/.pot files |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
4 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
5 # license: MIT/X11/Expat |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
6 # |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
7 |
29153
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
8 from __future__ import absolute_import, print_function |
29152
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
9 |
c5057b7780dc
py3: make i18n/posplit use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
28074
diff
changeset
|
10 import polib |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
11 import re |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
12 import sys |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
13 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
14 def addentry(po, entry, cache): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
15 e = cache.get(entry.msgid) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
16 if e: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
17 e.occurrences.extend(entry.occurrences) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
18 else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
19 po.append(entry) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
20 cache[entry.msgid] = entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
21 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
22 def mkentry(orig, delta, msgid, msgstr): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
23 entry = polib.POEntry() |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
24 entry.merge(orig) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
25 entry.msgid = msgid or orig.msgid |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
26 entry.msgstr = msgstr or orig.msgstr |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
27 entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
28 return entry |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
29 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
30 if __name__ == "__main__": |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
31 po = polib.pofile(sys.argv[1]) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
32 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
33 cache = {} |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
34 entries = po[:] |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
35 po[:] = [] |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
36 findd = re.compile(r' *\.\. (\w+)::') # for finding directives |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
37 for entry in entries: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
38 msgids = entry.msgid.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
39 if entry.msgstr: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
40 msgstrs = entry.msgstr.split(u'\n\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
41 else: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
42 msgstrs = [u''] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
43 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
44 if len(msgids) != len(msgstrs): |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
45 # places the whole existing translation as a fuzzy |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
46 # translation for each paragraph, to give the |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
47 # translator a chance to recover part of the old |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
48 # translation - erasing extra paragraphs is |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
49 # probably better than retranslating all from start |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
50 if 'fuzzy' not in entry.flags: |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
51 entry.flags.append('fuzzy') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
52 msgstrs = [entry.msgstr] * len(msgids) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
53 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
54 delta = 0 |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
55 for msgid, msgstr in zip(msgids, msgstrs): |
20361
3fe079d3a2b4
i18n: posplit removes the entry "::" from the pot file
Simon Heimberg <simohe@besonet.ch>
parents:
20359
diff
changeset
|
56 if msgid and msgid != '::': |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
57 newentry = mkentry(entry, delta, msgid, msgstr) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
58 mdirective = findd.match(msgid) |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
59 if mdirective: |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
60 if not msgid[mdirective.end():].rstrip(): |
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
61 # only directive, nothing to translate here |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
62 delta += 2 |
20362
1bce1078501d
i18n: leave out entries which contain only a rst directive
Simon Heimberg <simohe@besonet.ch>
parents:
20361
diff
changeset
|
63 continue |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
64 directive = mdirective.group(1) |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
65 if directive in ('container', 'include'): |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
66 if msgid.rstrip('\n').count('\n') == 0: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
67 # only rst syntax, nothing to translate |
28074
a1924bc6e267
i18n: calculate correct line number in source of messages to be translated
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20363
diff
changeset
|
68 delta += 2 |
20363
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
69 continue |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
70 else: |
e3ee7ec85a15
i18n: leave out entries which contain only rst syntax
Simon Heimberg <simohe@besonet.ch>
parents:
20362
diff
changeset
|
71 # lines following directly, unexpected |
29153
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
72 print('Warning: text follows line with directive' \ |
90d84e1e427a
py3: make i18n/posplit use print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents:
29152
diff
changeset
|
73 ' %s' % directive) |
20359
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
74 comment = 'do not translate: .. %s::' % directive |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
75 if not newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
76 newentry.comment = comment |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
77 elif comment not in newentry.comment: |
ff6ab0b2ebf7
i18n: posplit writes a warning for translators before rst directives
Simon Heimberg <simohe@besonet.ch>
parents:
11389
diff
changeset
|
78 newentry.comment += '\n' + comment |
11389
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
79 addentry(po, newentry, cache) |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
80 delta += 2 + msgid.count('\n') |
4fd49329a1b5
i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff
changeset
|
81 po.save() |