tests/common-pattern.py
author Gregory Szorc <gregory.szorc@gmail.com>
Wed, 03 Oct 2018 09:43:01 -0700
changeset 40030 62160d3077cd
parent 39722 4bd6e444c76f
child 40881 8695fbe17f7c
permissions -rw-r--r--
cborutil: change buffering strategy Profiling revealed that we were spending a lot of time on the line that was concatenating the old buffer with the incoming data when attempting to decode long byte strings, such as manifest revisions. Essentially, we were feeding N chunks of size len(X) << len(Y) into decode() and continuously allocating a new, larger buffer to hold the undecoded input. This created substantial memory churn and slowed down execution. Changing the code to aggregate pending chunks in a list until we have enough data to fully decode the next atom makes things much more efficient. I don't have exact data, but I recall the old code spending >1s on manifest fulltexts from the mozilla-unified repo. The new code doesn't significantly appear in profile output. Differential Revision: https://phab.mercurial-scm.org/D4854

# common patterns in test at can safely be replaced
from __future__ import absolute_import

import os

substitutions = [
    # list of possible compressions
    (br'(zstd,)?zlib,none,bzip2',
     br'$USUAL_COMPRESSIONS$'
    ),
    (br'=(zstd,)?zlib',
     br'=$BUNDLE2_COMPRESSIONS$'
    ),
    # capabilities sent through http
    (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
     br'bookmarks%250A'
     br'changegroup%253D01%252C02%250A'
     br'digests%253Dmd5%252Csha1%252Csha512%250A'
     br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
     br'hgtagsfnodes%250A'
     br'listkeys%250A'
     br'phases%253Dheads%250A'
     br'pushkey%250A'
     br'remote-changegroup%253Dhttp%252Chttps%250A'
     br'rev-branch-cache%250A'
     br'stream%253Dv2',
     # (the replacement patterns)
     br'$USUAL_BUNDLE_CAPS$'
    ),
    (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
     br'bookmarks%250A'
     br'changegroup%253D01%252C02%250A'
     br'digests%253Dmd5%252Csha1%252Csha512%250A'
     br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
     br'hgtagsfnodes%250A'
     br'listkeys%250A'
     br'phases%253Dheads%250A'
     br'pushkey%250A'
     br'remote-changegroup%253Dhttp%252Chttps',
     # (the replacement patterns)
     br'$USUAL_BUNDLE_CAPS_SERVER$'
     ),
    # bundle2 capabilities sent through ssh
    (br'bundle2=HG20%0A'
     br'bookmarks%0A'
     br'changegroup%3D01%2C02%0A'
     br'digests%3Dmd5%2Csha1%2Csha512%0A'
     br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
     br'hgtagsfnodes%0A'
     br'listkeys%0A'
     br'phases%3Dheads%0A'
     br'pushkey%0A'
     br'remote-changegroup%3Dhttp%2Chttps%0A'
     br'rev-branch-cache%0A'
     br'stream%3Dv2',
     # (replacement patterns)
     br'$USUAL_BUNDLE2_CAPS$'
    ),
    # bundle2 capabilities advertised by the server
    (br'bundle2=HG20%0A'
     br'bookmarks%0A'
     br'changegroup%3D01%2C02%0A'
     br'digests%3Dmd5%2Csha1%2Csha512%0A'
     br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
     br'hgtagsfnodes%0A'
     br'listkeys%0A'
     br'phases%3Dheads%0A'
     br'pushkey%0A'
     br'remote-changegroup%3Dhttp%2Chttps%0A'
     br'rev-branch-cache',
     # (replacement patterns)
     br'$USUAL_BUNDLE2_CAPS_SERVER$'
     ),
    (
     br'bundle2=HG20%0A'
     br'bookmarks%0A'
     br'changegroup%3D01%2C02%0A'
     br'digests%3Dmd5%2Csha1%2Csha512%0A'
     br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
     br'hgtagsfnodes%0A'
     br'listkeys%0A'
     br'pushkey%0A'
     br'remote-changegroup%3Dhttp%2Chttps%0A'
     br'rev-branch-cache%0A'
     br'stream%3Dv2',
     # (replacement patterns)
     br'$USUAL_BUNDLE2_CAPS_NO_PHASES$'
    ),
    # HTTP access log dates
    (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] "(GET|PUT|POST)',
     lambda m: br' - - [$LOGDATE$] "' + m.group(1)
    ),
    # HTTP error log dates
    (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] (HG error:|Exception)',
     lambda m: br' - - [$ERRDATE$] ' + m.group(1)
    ),
    # HTTP header dates- RFC 1123
    (br'([Dd]ate): [A-Za-z]{3}, \d\d [A-Za-z]{3} \d{4} \d\d:\d\d:\d\d GMT',
     lambda m: br'%s: $HTTP_DATE$' % m.group(1)
    ),
    # LFS expiration value
    (br'"expires_at": "\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ"',
     br'"expires_at": "$ISO_8601_DATE_TIME$"'
    ),
    # Windows has an extra '/' in the following lines that get globbed away:
    #   pushing to file:/*/$TESTTMP/r2 (glob)
    #   comparing with file:/*/$TESTTMP/r2 (glob)
    #   sub/maybelarge.dat: largefile 34..9c not available from
    #       file:/*/$TESTTMP/largefiles-repo (glob)
    (br'(.*file:/)/?(/\$TESTTMP.*)',
     lambda m: m.group(1) + b'*' + m.group(2) + b' (glob)'
    ),
]

# Various platform error strings, keyed on a common replacement string
_errors = {
    br'$ENOENT$': (
        # strerror()
        br'No such file or directory',

        # FormatMessage(ERROR_FILE_NOT_FOUND)
        br'The system cannot find the file specified',
    ),
    br'$ENOTDIR$': (
        # strerror()
        br'Not a directory',

        # FormatMessage(ERROR_PATH_NOT_FOUND)
        br'The system cannot find the path specified',
    ),
    br'$ECONNRESET$': (
        # strerror()
        br'Connection reset by peer',

        # FormatMessage(WSAECONNRESET)
        br'An existing connection was forcibly closed by the remote host',
    ),
    br'$EADDRINUSE$': (
        # strerror()
        br'Address already in use',

        # FormatMessage(WSAEADDRINUSE)
        br'Only one usage of each socket address'
        br' \(protocol/network address/port\) is normally permitted',
    ),
}

for replace, msgs in _errors.items():
    substitutions.extend((m, replace) for m in msgs)

# Output lines on Windows that can be autocorrected for '\' vs '/' path
# differences.
_winpathfixes = [
    # cloning subrepo s\ss from $TESTTMP/t/s/ss
    # cloning subrepo foo\bar from http://localhost:$HGPORT/foo/bar
    br'(?m)^cloning subrepo \S+\\.*',

    # pulling from $TESTTMP\issue1852a
    br'(?m)^pulling from \$TESTTMP\\.*',

    # pushing to $TESTTMP\a
    br'(?m)^pushing to \$TESTTMP\\.*',

    # pushing subrepo s\ss to $TESTTMP/t/s/ss
    br'(?m)^pushing subrepo \S+\\\S+ to.*',

    # moving d1\d11\a1 to d3/d11/a1
    br'(?m)^moving \S+\\.*',

    # d1\a: not recording move - dummy does not exist
    br'\S+\\\S+: not recording move .+',

    # reverting s\a
    br'(?m)^reverting (?!subrepo ).*\\.*',

    # saved backup bundle to
    #     $TESTTMP\test\.hg\strip-backup/443431ffac4f-2fc5398a-backup.hg
    br'(?m)^saved backup bundle to \$TESTTMP.*\.hg',

    # no changes made to subrepo s\ss since last push to ../tcc/s/ss
    br'(?m)^no changes made to subrepo \S+\\\S+ since.*',

    # changeset 5:9cc5aa7204f0: stuff/maybelarge.dat references missing
    #     $TESTTMP\largefiles-repo-hg\.hg\largefiles\76..38
    br'(?m)^changeset .* references (corrupted|missing) \$TESTTMP\\.*',

    # stuff/maybelarge.dat: largefile 76..38 not available from
    #     file:/*/$TESTTMP\largefiles-repo (glob)
    br'.*: largefile \S+ not available from file:/\*/.+',
]

if os.name == 'nt':
    substitutions.extend([(s, lambda match: match.group().replace(b'\\', b'/'))
                          for s in _winpathfixes])