i18n/posplit
author Martin von Zweigbergk <martinvonz@google.com>
Thu, 14 Sep 2017 11:16:57 -0700
branchstable
changeset 34144 91f0677dc920
parent 29153 90d84e1e427a
child 39269 d0e8933d6dad
permissions -rwxr-xr-x
repair: preserve phase also when not using generaldelta (issue5678) It seems like we used to pick the oldest possible version of the changegroup to use for bundles created by the repair module (used e.g. by "hg strip" and for temporary bundles by "hg rebase"). I tried to preserve that behavior when I created the changegroup.safeversion() method in 3b2ac2115464 (changegroup: introduce safeversion(), 2016-01-19). However, we have recently chagned our minds and decided that these commands are only used locally and downgrades are unlikely. That decicion allowed us to start adding obsmarker and phase information to these bundles. However, as the bug report shows, it means we get different behavior e.g. when generaldelta is not enabled (because when it was enabled, it forced us to use bundle2). The commit that actually caused the reported bug was 8e3021fd1a44 (strip: include phases in bundle (BC), 2017-06-15). So, since we now depend on having more information in the bundles, let's make sure we instead pick the newest possible changegroup version. Differential Revision: https://phab.mercurial-scm.org/D715

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

from __future__ import absolute_import, print_function

import polib
import re
import sys

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        delta += 2
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            delta += 2
                            continue
                        else:
                            # lines following directly, unexpected
                            print('Warning: text follows line with directive' \
                                  ' %s' % directive)
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()