Mercurial > hg
view i18n/posplit @ 27475:a2e2a8fa5fd1
revlog: avoid string slice when decompressing u* chunks
Revlog chunks can be stored uncompressed. If the first byte of the
raw data is \0, we store the data as is. Else we prefix it with 'u'.
Before, we performed a string slice to strip out the 'u' prefix.
With this patch, we use a buffer to avoid an extra memory copy and
associated garbage collection overhead.
I was unable to verify any performance impact of this patch. For both
mozilla-central and the hg repos, the number of manifest revisions
with 'u' prefixes is very small - under 1%. So this change likely
isn't called enough to have an impact on manifest reading. However,
the reasoning behind this change is solid, so it should be safe.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sun, 20 Dec 2015 16:00:27 -0800 |
parents | e3ee7ec85a15 |
children | a1924bc6e267 |
line wrap: on
line source
#!/usr/bin/env python # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import re import sys import polib def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] findd = re.compile(r' *\.\. (\w+)::') # for finding directives for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid and msgid != '::': newentry = mkentry(entry, delta, msgid, msgstr) mdirective = findd.match(msgid) if mdirective: if not msgid[mdirective.end():].rstrip(): # only directive, nothing to translate here continue directive = mdirective.group(1) if directive in ('container', 'include'): if msgid.rstrip('\n').count('\n') == 0: # only rst syntax, nothing to translate continue else: # lines following directly, unexpected print 'Warning: text follows line with directive' \ ' %s' % directive comment = 'do not translate: .. %s::' % directive if not newentry.comment: newentry.comment = comment elif comment not in newentry.comment: newentry.comment += '\n' + comment addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()