Mercurial > hg
view i18n/posplit @ 16955:92e1c64ba0d4
parsers: add a C function to pack the dirstate
This is about 9 times faster than the Python dirstate packing code.
The relatively small speedup is due to the poor locality and memory
access patterns caused by traversing dicts and other boxed Python
values.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Wed, 30 May 2012 12:55:33 -0700 |
parents | 4fd49329a1b5 |
children | ff6ab0b2ebf7 |
line wrap: on
line source
#!/usr/bin/env python # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import sys import polib def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid: newentry = mkentry(entry, delta, msgid, msgstr) addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()