view i18n/posplit @ 22860:1dd178277cf5

revset-_descendant: rework the whole sorting and combining logic We use the & operator to combine with subset (since this is more likely to be optimised than filter) and we enforce the sorting of the result. Without this enforced sorting, we may result in a different iteration order than the set _descendent was computed from. This reverts a bad `test-glog.t` change from 69402eb72115. Another side effect is that `test-mq.t` shows `qparent::` including `-1` if `qparent is -1`. This sound like a positive change. This has good and bad impacts on the benchmarks, here is a good ones: revset: 0:: before) wall 0.045489 comb 0.040000 user 0.040000 sys 0.000000 (best of 100) after) wall 0.034330 comb 0.030000 user 0.030000 sys 0.000000 (best of 100) revset: roots((0::) - (0::tip)) before) wall 0.134090 comb 0.140000 user 0.140000 sys 0.000000 (best of 63) after) wall 0.128346 comb 0.130000 user 0.130000 sys 0.000000 (best of 69) revset: ::p1(p1(tip)):: before) wall 0.143892 comb 0.140000 user 0.140000 sys 0.000000 (best of 55) after) wall 0.124502 comb 0.130000 user 0.130000 sys 0.000000 (best of 65) revset: roots((0:tip)::) before) wall 0.204966 comb 0.200000 user 0.200000 sys 0.000000 (best of 43) after) wall 0.184455 comb 0.180000 user 0.180000 sys 0.000000 (best of 47) Here is a bad one: revset: (20000::) - (20000) before) wall 0.009592 comb 0.010000 user 0.010000 sys 0.000000 (best of 222) after) wall 0.029837 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
author Pierre-Yves David <pierre-yves.david@fb.com>
date Thu, 09 Oct 2014 09:12:54 -0700
parents e3ee7ec85a15
children a1924bc6e267
line wrap: on
line source

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import re
import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            continue
                        else:
                            # lines following directly, unexpected
                            print 'Warning: text follows line with directive' \
                                  ' %s' % directive
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()