Mercurial > hg
view i18n/posplit @ 12717:89df79b3c011 stable
convert/darcs: support changelogs with bytes 0x7F-0xFF (issue2411)
This is a followup to 4481f8a93c7a, which only fixed the conversion of
patches with UTF-8 metadata.
This patch allows a changelog to have any bytes with values
0x7F-0xFF. It parses the XML changelog as Latin-1 and uses
converter_source.recode() to decode the data as UTF-8/Latin-1.
Caveats:
- Since the convert extension doesn't provide any way to specify the
source encoding, users are still limited to UTF-8 and Latin-1.
- etree will still complain if the changelog has bytes with values
0x00-0x19. XML only allows printable characters.
author | Brodie Rao <brodie@bitheap.org> |
---|---|
date | Fri, 01 Oct 2010 10:15:04 -0500 |
parents | 4fd49329a1b5 |
children | ff6ab0b2ebf7 |
line wrap: on
line source
#!/usr/bin/env python # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import sys import polib def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid: newentry = mkentry(entry, delta, msgid, msgstr) addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()