i18n/posplit
author Brodie Rao <brodie@sf.io>
Sun, 17 Nov 2013 18:04:28 -0500
changeset 20179 5bb3826bdac4
parent 11389 4fd49329a1b5
child 20359 ff6ab0b2ebf7
permissions -rwxr-xr-x
revlog: read/cache chunks in fixed windows of 64 KB When reading a revlog chunk, instead of reading up to 64 KB ahead of the request offset and caching that, this change caches a fixed window before and after the requested data that falls on 64 KB boundaries. This increases cache hits when reading revlogs backwards. Running perfmoonwalk on the Mercurial repo (with almost 20,000 changesets) on Mac OS X with an SSD, before this change: $ hg perfmoonwalk ! wall 2.307994 comb 2.310000 user 2.120000 sys 0.190000 (best of 5) (Each run has 10,668 cache hits and 9,304 misses.) After this change: $ hg perfmoonwalk ! wall 1.814117 comb 1.810000 user 1.810000 sys 0.000000 (best of 6) (19,931 cache hits, 62 misses.) On a busy NFS share, before this change: $ hg perfmoonwalk ! wall 17.000034 comb 4.100000 user 3.270000 sys 0.830000 (best of 3) After: $ hg perfmoonwalk ! wall 1.746115 comb 1.670000 user 1.660000 sys 0.010000 (best of 5)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11389
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     1
#!/usr/bin/env python
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     2
#
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     3
# posplit - split messages in paragraphs on .po/.pot files
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     4
#
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     5
# license: MIT/X11/Expat
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     6
#
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     7
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     8
import sys
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
     9
import polib
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    10
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    11
def addentry(po, entry, cache):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    12
    e = cache.get(entry.msgid)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    13
    if e:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    14
        e.occurrences.extend(entry.occurrences)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    15
    else:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    16
        po.append(entry)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    17
        cache[entry.msgid] = entry
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    18
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    19
def mkentry(orig, delta, msgid, msgstr):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    20
    entry = polib.POEntry()
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    21
    entry.merge(orig)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    22
    entry.msgid = msgid or orig.msgid
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    23
    entry.msgstr = msgstr or orig.msgstr
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    24
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    25
    return entry
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    26
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    27
if __name__ == "__main__":
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    28
    po = polib.pofile(sys.argv[1])
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    29
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    30
    cache = {}
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    31
    entries = po[:]
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    32
    po[:] = []
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    33
    for entry in entries:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    34
        msgids = entry.msgid.split(u'\n\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    35
        if entry.msgstr:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    36
            msgstrs = entry.msgstr.split(u'\n\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    37
        else:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    38
            msgstrs = [u''] * len(msgids)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    39
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    40
        if len(msgids) != len(msgstrs):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    41
            # places the whole existing translation as a fuzzy
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    42
            # translation for each paragraph, to give the
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    43
            # translator a chance to recover part of the old
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    44
            # translation - erasing extra paragraphs is
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    45
            # probably better than retranslating all from start
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    46
            if 'fuzzy' not in entry.flags:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    47
                entry.flags.append('fuzzy')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    48
            msgstrs = [entry.msgstr] * len(msgids)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    49
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    50
        delta = 0
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    51
        for msgid, msgstr in zip(msgids, msgstrs):
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    52
            if msgid:
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    53
                newentry = mkentry(entry, delta, msgid, msgstr)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    54
                addentry(po, newentry, cache)
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    55
            delta += 2 + msgid.count('\n')
4fd49329a1b5 i18n: script for splitting large messages on .po/.pot files
Wagner Bruna <wbruna@yahoo.com>
parents:
diff changeset
    56
    po.save()