Mercurial > hg
view i18n/posplit @ 21812:73e4a02e6d23
hg: add support for HGUNICODEPEDANTRY environment variable
This lets us easily verify that there are no implicit conversions
between unicodes and bytes in Mercurial's codebase. Based on something
mpm did by hand periodically, but it kept regressing, so just open the
door to running it in a buildbot.
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Mon, 23 Jun 2014 09:33:07 -0400 |
parents | e3ee7ec85a15 |
children | a1924bc6e267 |
line wrap: on
line source
#!/usr/bin/env python # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import re import sys import polib def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] findd = re.compile(r' *\.\. (\w+)::') # for finding directives for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid and msgid != '::': newentry = mkentry(entry, delta, msgid, msgstr) mdirective = findd.match(msgid) if mdirective: if not msgid[mdirective.end():].rstrip(): # only directive, nothing to translate here continue directive = mdirective.group(1) if directive in ('container', 'include'): if msgid.rstrip('\n').count('\n') == 0: # only rst syntax, nothing to translate continue else: # lines following directly, unexpected print 'Warning: text follows line with directive' \ ' %s' % directive comment = 'do not translate: .. %s::' % directive if not newentry.comment: newentry.comment = comment elif comment not in newentry.comment: newentry.comment += '\n' + comment addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()