view i18n/posplit @ 39366:a41497b5117c

copies: improve logic of deciding copytracing on based of config options Few months ago or maybe a year ago, I imported Fb's heuristics based copytracing algorithms. While importing that, I renamed `experimental.disablecopytrace` with `experimental.copytrace` and the behavior of the new config option was like this: * "heuristics" : Fb's heuristic copytracing algorithm * "off" : copytracing is turned off * something else: copytracing is on This is the behavior right now also and this is bad because it hardcodes the string 'off' to turn off the copytracing. On big repositories, copytracing is very slow and people wants to turn copytracing off. However if the user sets it to 'False', 'Off', '0', none of them is going to disbale copytracing while they should. I lacked the understanding of why this can be bad when I coded it. After this patch, the new behavior of the config option will be: * "heuristics": Fb's heuristic copytracing algorithm * '0', 'false', 'off', 'never', 'no', 'NO', all the values which repo.ui.configbool() evaluates to False: copytracing in turned off * something else: copytracing is on Since 'off' still evaluates to copytracing being turned off, this is not BC. Also the config option is experimental. Differential Revision: https://phab.mercurial-scm.org/D4416
author Pulkit Goyal <pulkit@yandex-team.ru>
date Wed, 29 Aug 2018 18:52:09 +0300
parents d0e8933d6dad
children aaad36b88298
line wrap: on
line source

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

from __future__ import absolute_import, print_function

import polib
import re
import sys

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)

        # merge comments from entry
        for comment in entry.comment.split('\n'):
            if comment and comment not in e.comment:
                if not e.comment:
                    e.comment = comment
                else:
                    e.comment += '\n' + comment
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        delta += 2
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            delta += 2
                            continue
                        else:
                            # lines following directly, unexpected
                            print('Warning: text follows line with directive' \
                                  ' %s' % directive)
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()