i18n/posplit
author Matt Harbison <matt_harbison@yahoo.com>
Thu, 19 Jul 2012 11:12:05 -0400
branchstable
changeset 17231 2446b63c89ec
parent 11389 4fd49329a1b5
child 20359 ff6ab0b2ebf7
permissions -rwxr-xr-x
largefiles: fix a traceback when addremove follows a remove (issue3507) The problem only occurred if a file was removed with 'hg rm' (as opposed to the OS utilities), and then addremove was run before a commit. Both normal and large files were affected. Ensuring that the file exists prior to an lstat() for size seems like the Right Thing. But oddly enough, the missing file that was causing lstat() to blow up was a standin when a largefile was removed, which seems fishy, because a standin should never be added as a largefile. I was then able to get a standin added as a largefile (whose name is 'large') with hg addremove --config largefiles.patterns=**large which also causes a backtrace. That will be fixed next.

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid:
                newentry = mkentry(entry, delta, msgid, msgstr)
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()