view i18n/posplit @ 24834:6e31e1274080 stable

bundlerepo: use pathutil.normasprefix to ensure os.sep at the end of cwd Since Python 2.7.9, "os.path.join(path, '')" doesn't add "os.sep" at the end of UNC path (see issue4557 for detail). This makes bundlerepo incorrectly work, if: 1. cwd is the root of UNC share (e.g. "\host\share"), and 2. mainreporoot is near cwd (e.g. "\host\sharefoo\repo") - host of UNC path is same as one of cwd - share of UNC path starts with one of cwd 3. "repopath" isn't specified in bundle URI (e.g. "bundle:bundlefile" or just "bundlefile") For example: $ hg --cwd \host\share -R \host\sharefoo\repo incoming bundle In this case: - os.path.join(r"\host\share", "") returns r"\host\share", - r"\host\sharefoo\repo".startswith(r"\host\share") returns True, then - r"foo\repo" is treated as repopath of bundlerepo instead of r"\host\sharefoo\repo" This causes failure of combining "\host\sharefoo\repo" and bundle file: in addition to it, "\host\share\foo\repo" may be combined with bundle file, if it accidentally exists. This patch uses "pathutil.normasprefix()" to ensure "os.sep" at the end of cwd safely, even with some problematic encodings, which use 0x5c (= "os.sep" on Windows) as the tail byte of some multi-byte characters. BTW, normalization before "pathutil.normasprefix()" isn't needed in this case, because "os.getcwd()" always returns normalized one.
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
date Wed, 22 Apr 2015 23:38:55 +0900
parents e3ee7ec85a15
children a1924bc6e267
line wrap: on
line source

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import re
import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            continue
                        else:
                            # lines following directly, unexpected
                            print 'Warning: text follows line with directive' \
                                  ' %s' % directive
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()