i18n/posplit
author Matt Harbison <matt_harbison@yahoo.com>
Thu, 26 Sep 2024 18:15:36 -0400
changeset 51923 b455dfddfed0
parent 51863 f4733654f144
permissions -rwxr-xr-x
interfaces: convert the zope `Attribute` attrs to regular fields At this point, we should have a useful protocol class. The file syntax requires the type to be supplied for any fields that are declared, but we'll leave the complex ones partially unspecified for now, for simplicity. (Also, the things documented as `Callable` are really as future type annotating worked showed- roll with it for now, but they're marked as TODO for fixing later.) All of the fields and all of the attrs will need type annotations, or the type rules say they are considered to be `Any`. That can be done in a separate pass, possibly applying the `dirstate.pyi` file generated from the concrete class. The first cut of this turned the `interfaceutil.Attribute` fields into plain fields, and thus the types on them. PyCharm flagged a few things as having incompatible signatures when the concrete dirstate class subclassed this, when the concrete class has them declared as `@property`. So they've been changed to `@property` here in those cases. The remaining fields that are decorated in the concrete class have comments noting the differences. We'll see if they need to be changed going forward, but leave them for now. We'll be in trouble if the `@util.propertycache` is needed, because we can't import that module here at runtime, due to circular imports.

#!/usr/bin/env python3
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

from __future__ import annotations

import polib
import re
import sys


def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)

        # merge comments from entry
        for comment in entry.comment.split('\n'):
            if comment and comment not in e.comment:
                if not e.comment:
                    e.comment = comment
                else:
                    e.comment += '\n' + comment
    else:
        po.append(entry)
        cache[entry.msgid] = entry


def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry


if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::')  # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end() :].rstrip():
                        # only directive, nothing to translate here
                        delta += 2
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            delta += 2
                            continue
                        else:
                            # lines following directly, unexpected
                            print(
                                'Warning: text follows line with directive'
                                ' %s' % directive
                            )
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()