view i18n/posplit @ 48587:3c8cc987672e

simplemerge: take over formatting of label from `filemerge` The padding we do of conflict labels depends on which conflict marker style is used. For two-way conflict markers (the default), the length of the base label shouldn't matter. It does before this patch, however. This patch moves the formatting from `filemerge` to `simplemerge`. The latter knows which conflict marker style to use, so it can easily decide about the padding. This change will allow us to use more descriptive "base" labels without causing illogical padding in 2-way markers. I'll do that next. One wrinkle is that we pass the same labels to external merge tools. I decided to change that in this patch to be simpler: no padding, and no ellipsis to fit within 80 columns. My reasoning is that the typical external, 3-or-4-panel merge tool doesn't show the labels on top of each others, so the padding doesn't make sense there. The ellipsis is probably not necessary because the external tools probably have their own way of dealing with long labels. Also, we limit them to "80 - 8" to fit the "<<<<<<< " before, which is almost definitely not what an external tool would put there. Differential Revision: https://phab.mercurial-scm.org/D12019
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 20 Jan 2022 11:06:52 -0800
parents c102b704edb5
children 6000f5b25c9b
line wrap: on
line source

#!/usr/bin/env python3
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

from __future__ import absolute_import, print_function

import polib
import re
import sys


def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)

        # merge comments from entry
        for comment in entry.comment.split('\n'):
            if comment and comment not in e.comment:
                if not e.comment:
                    e.comment = comment
                else:
                    e.comment += '\n' + comment
    else:
        po.append(entry)
        cache[entry.msgid] = entry


def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry


if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::')  # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end() :].rstrip():
                        # only directive, nothing to translate here
                        delta += 2
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            delta += 2
                            continue
                        else:
                            # lines following directly, unexpected
                            print(
                                'Warning: text follows line with directive'
                                ' %s' % directive
                            )
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()