view tests/filtertraceback.py @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents fe044ce4bb17
children
line wrap: on
line source

#!/usr/bin/env python3

# Filters traceback lines from stdin.


import io
import sys

if sys.version_info[0] >= 3:
    # Prevent \r from being inserted on Windows.
    sys.stdout = io.TextIOWrapper(
        sys.stdout.buffer,
        sys.stdout.encoding,
        sys.stdout.errors,
        newline="\n",
        line_buffering=sys.stdout.line_buffering,
    )

state = 'none'

for line in sys.stdin:
    if state == 'none':
        if line.startswith('Traceback '):
            state = 'tb'

    elif state == 'tb':
        if line.startswith('  File '):
            state = 'file'
            continue

        elif not line.startswith(' '):
            state = 'none'

        elif not line.replace('^', '').replace('~', '').strip():
            # PEP 657: Fine-grained error locations in tracebacks
            #                       ~~~~~~^^^^^^^^^
            continue

    elif state == 'file':
        # Ignore lines after "  File "
        state = 'tb'
        continue

    print(line, end='')