Mercurial > hg
view i18n/posplit @ 25343:7fbef7932af9
revset: optimize 'or' operation of trivial revisions to a list
As seen in issue4565 and issue4624, GUI wrappers and automated scripts are
likely to generate a long query that just has numeric revisions joined by 'or'.
One reason why is that they allows users to choose arbitrary revisions from
a list. Because this use case isn't handled well by smartset, let's optimize
it to a plain old list.
Benchmarks:
1) reduce nesting of chained 'or' operations
2) optimize to a list (this patch)
revset #0: 0 + 1 + 2 + ... + 1000
1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20)
2) wall 0.025393 comb 0.020000 user 0.020000 sys 0.000000 (best of 107)
revset #1: sort(0 + 1 + 2 + ... + 1000)
1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
2) wall 0.026432 comb 0.030000 user 0.030000 sys 0.000000 (best of 102)
revset #2: first(0 + 1 + 2 + ... + 1000)
1) wall 0.028949 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
2) wall 0.025503 comb 0.030000 user 0.030000 sys 0.000000 (best of 106)
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 17 May 2015 15:11:38 +0900 |
parents | e3ee7ec85a15 |
children | a1924bc6e267 |
line wrap: on
line source
#!/usr/bin/env python # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import re import sys import polib def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] findd = re.compile(r' *\.\. (\w+)::') # for finding directives for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid and msgid != '::': newentry = mkentry(entry, delta, msgid, msgstr) mdirective = findd.match(msgid) if mdirective: if not msgid[mdirective.end():].rstrip(): # only directive, nothing to translate here continue directive = mdirective.group(1) if directive in ('container', 'include'): if msgid.rstrip('\n').count('\n') == 0: # only rst syntax, nothing to translate continue else: # lines following directly, unexpected print 'Warning: text follows line with directive' \ ' %s' % directive comment = 'do not translate: .. %s::' % directive if not newentry.comment: newentry.comment = comment elif comment not in newentry.comment: newentry.comment += '\n' + comment addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()