view i18n/posplit @ 24305:867c3649be5d

cvsps: use a different tiebreaker to avoid flaky test After adding some sneaky debug printing[0], I determined that this test flaked when a CVS commit containing two files starts too close to the end of a second, thus putting file "a" in one second and "b/c" in the following second. The secondary sort key meant that these changes sorted in a different order when the timestamps were different than they did when they matched. As far as I can tell, CVS walks through the files in a stable order, so by sorting on the filenames in cvsps we'll get stable output. It's fine for us to switch from sorting on the branchpoint as a secondary key because this was already the point when we didn't care, and we're just trying to break ties in a stable way. It's unclear to be if having the branchpoint present matters anymore, but it doesn't really hurt to leave it. With this change in place, I was able to run test-convert-cvs over 650 times in a row without a failure. test-convert-cvcs-synthetic.t appears to still be flaky, but I don't think it's *worse* than it was before - just not better (I observed one flaky failure in 200 runs on that test). 0: The helpful debug hack ended up being this, in case it's useful to future flaky test assassins: --- a/hgext/convert/cvsps.py +++ b/hgext/convert/cvsps.py @@ -854,6 +854,8 @@ def debugcvsps(ui, *args, **opts): ui.write(('Branch: %s\n' % (cs.branch or 'HEAD'))) ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1], ','.join(cs.tags) or '(none)'))) + if cs.comment == 'ci1' and (cs.id == 6) == bool(cs.branchpoints): + ui.write('raw timestamp %r\n' % (cs.date,)) if cs.branchpoints: ui.write(('Branchpoints: %s \n') % ', '.join(sorted(cs.branchpoints)))
author Augie Fackler <raf@durin42.com>
date Fri, 13 Mar 2015 14:20:13 -0400
parents e3ee7ec85a15
children a1924bc6e267
line wrap: on
line source

#!/usr/bin/env python
#
# posplit - split messages in paragraphs on .po/.pot files
#
# license: MIT/X11/Expat
#

import re
import sys
import polib

def addentry(po, entry, cache):
    e = cache.get(entry.msgid)
    if e:
        e.occurrences.extend(entry.occurrences)
    else:
        po.append(entry)
        cache[entry.msgid] = entry

def mkentry(orig, delta, msgid, msgstr):
    entry = polib.POEntry()
    entry.merge(orig)
    entry.msgid = msgid or orig.msgid
    entry.msgstr = msgstr or orig.msgstr
    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
    return entry

if __name__ == "__main__":
    po = polib.pofile(sys.argv[1])

    cache = {}
    entries = po[:]
    po[:] = []
    findd = re.compile(r' *\.\. (\w+)::') # for finding directives
    for entry in entries:
        msgids = entry.msgid.split(u'\n\n')
        if entry.msgstr:
            msgstrs = entry.msgstr.split(u'\n\n')
        else:
            msgstrs = [u''] * len(msgids)

        if len(msgids) != len(msgstrs):
            # places the whole existing translation as a fuzzy
            # translation for each paragraph, to give the
            # translator a chance to recover part of the old
            # translation - erasing extra paragraphs is
            # probably better than retranslating all from start
            if 'fuzzy' not in entry.flags:
                entry.flags.append('fuzzy')
            msgstrs = [entry.msgstr] * len(msgids)

        delta = 0
        for msgid, msgstr in zip(msgids, msgstrs):
            if msgid and msgid != '::':
                newentry = mkentry(entry, delta, msgid, msgstr)
                mdirective = findd.match(msgid)
                if mdirective:
                    if not msgid[mdirective.end():].rstrip():
                        # only directive, nothing to translate here
                        continue
                    directive = mdirective.group(1)
                    if directive in ('container', 'include'):
                        if msgid.rstrip('\n').count('\n') == 0:
                            # only rst syntax, nothing to translate
                            continue
                        else:
                            # lines following directly, unexpected
                            print 'Warning: text follows line with directive' \
                                  ' %s' % directive
                    comment = 'do not translate: .. %s::' % directive
                    if not newentry.comment:
                        newentry.comment = comment
                    elif comment not in newentry.comment:
                        newentry.comment += '\n' + comment
                addentry(po, newentry, cache)
            delta += 2 + msgid.count('\n')
    po.save()