tests/common-pattern.py
author Georges Racinet <georges.racinet@octobus.net>
Tue, 20 Jul 2021 17:20:19 +0200
changeset 47909 de2e04fe4897
parent 46672 7015b0232c5e
child 47860 8ae828fd008b
permissions -rw-r--r--
hgwebdir: avoid systematic full garbage collection Forcing a systematic full garbage collection upon each request can serioulsy harm performance. This is reported as https://bz.mercurial-scm.org/show_bug.cgi?id=6075 With this change we're performing the full collection according to a new setting, `experimental.web.full-garbage-collection-rate`. The default value is 1, which doesn't change the behavior and will allow us to test on real use cases. If the value is 0, no full garbage collection occurs. Regardless of the value of the setting, a partial garbage collection still occurs upon each request (not attempting to collect objects from the oldest generation). This should be enough to take care of reference cycles that have been created by the last request (assessment of this requires changing the setting, not to be 1). In my experience chasing memory leaks in Mercurial servers, the full collection never reclaimed any memory, but this is with Python 3 and biased towards small repositories. On the other hand, as explained in the Python developer docs [1], frequent full collections are very harmful in terms of performance if lots of objects survive the collection, and hence stay in the oldest generation. Note that `gc.collect()` is indeed trying to collect the oldest generation [2]. This happens usually in two cases: - unwanted lingering objects (i.e., an actual memory leak that the GC cannot do anything about). Sadly, we have lots of those these days. - desireable long-term objects, typically in caches (not inner caches carried by repositories, which should be collected with them). This is a subject of interest for the Heptapod project. In short, the flat rate that this change still permits is probably a bad idea in most cases, and the default value can be tweaked later on (or even be set to 0) according to experiments in the wild. The test is inspired from test-hgwebdir-paths.py [1] https://devguide.python.org/garbage_collector/#collecting-the-oldest-generation [2] https://docs.python.org/3/library/gc.html#gc.collect Differential Revision: https://phab.mercurial-scm.org/D11204

# common patterns in test at can safely be replaced
from __future__ import absolute_import

import os

substitutions = [
    # list of possible compressions
    (br'(zstd,)?zlib,none,bzip2', br'$USUAL_COMPRESSIONS$'),
    (br'=(zstd,)?zlib', br'=$BUNDLE2_COMPRESSIONS$'),
    # capabilities sent through http
    (
        br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
        br'bookmarks%250A'
        br'changegroup%253D01%252C02%250A'
        br'checkheads%253Drelated%250A'
        br'digests%253Dmd5%252Csha1%252Csha512%250A'
        br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
        br'hgtagsfnodes%250A'
        br'listkeys%250A'
        br'phases%253Dheads%250A'
        br'pushkey%250A'
        br'remote-changegroup%253Dhttp%252Chttps%250A'
        br'stream%253Dv2',
        # (the replacement patterns)
        br'$USUAL_BUNDLE_CAPS$',
    ),
    (
        br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
        br'bookmarks%250A'
        br'changegroup%253D01%252C02%250A'
        br'checkheads%3Drelated%0A'
        br'digests%253Dmd5%252Csha1%252Csha512%250A'
        br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
        br'hgtagsfnodes%250A'
        br'listkeys%250A'
        br'phases%253Dheads%250A'
        br'pushkey%250A'
        br'remote-changegroup%253Dhttp%252Chttps',
        # (the replacement patterns)
        br'$USUAL_BUNDLE_CAPS_SERVER$',
    ),
    # bundle2 capabilities sent through ssh
    (
        br'bundle2=HG20%0A'
        br'bookmarks%0A'
        br'changegroup%3D01%2C02%0A'
        br'checkheads%3Drelated%0A'
        br'digests%3Dmd5%2Csha1%2Csha512%0A'
        br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
        br'hgtagsfnodes%0A'
        br'listkeys%0A'
        br'phases%3Dheads%0A'
        br'pushkey%0A'
        br'remote-changegroup%3Dhttp%2Chttps%0A'
        br'stream%3Dv2',
        # (replacement patterns)
        br'$USUAL_BUNDLE2_CAPS$',
    ),
    # bundle2 capabilities advertised by the server
    (
        br'bundle2=HG20%0A'
        br'bookmarks%0A'
        br'changegroup%3D01%2C02%0A'
        br'checkheads%3Drelated%0A'
        br'digests%3Dmd5%2Csha1%2Csha512%0A'
        br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
        br'hgtagsfnodes%0A'
        br'listkeys%0A'
        br'phases%3Dheads%0A'
        br'pushkey%0A'
        br'remote-changegroup%3Dhttp%2Chttps',
        # (replacement patterns)
        br'$USUAL_BUNDLE2_CAPS_SERVER$',
    ),
    (
        br'bundle2=HG20%0A'
        br'bookmarks%0A'
        br'changegroup%3D01%2C02%0A'
        br'digests%3Dmd5%2Csha1%2Csha512%0A'
        br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
        br'hgtagsfnodes%0A'
        br'listkeys%0A'
        br'pushkey%0A'
        br'remote-changegroup%3Dhttp%2Chttps%0A'
        br'stream%3Dv2',
        # (replacement patterns)
        br'$USUAL_BUNDLE2_CAPS_NO_PHASES$',
    ),
    # HTTP access log dates
    (
        br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] "(GET|PUT|POST)',
        lambda m: br' - - [$LOGDATE$] "' + m.group(1),
    ),
    # HTTP error log dates
    (
        br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] (HG error:|Exception)',
        lambda m: br' - - [$ERRDATE$] ' + m.group(1),
    ),
    # HTTP header dates- RFC 1123
    (
        br'([Dd]ate): [A-Za-z]{3}, \d\d [A-Za-z]{3} \d{4} \d\d:\d\d:\d\d GMT',
        lambda m: br'%s: $HTTP_DATE$' % m.group(1),
    ),
    # LFS expiration value
    (
        br'"expires_at": "\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ"',
        br'"expires_at": "$ISO_8601_DATE_TIME$"',
    ),
    # Windows has an extra '/' in the following lines that get globbed away:
    #   pushing to file:/*/$TESTTMP/r2 (glob)
    #   comparing with file:/*/$TESTTMP/r2 (glob)
    #   sub/maybelarge.dat: largefile 34..9c not available from
    #       file:/*/$TESTTMP/largefiles-repo (glob)
    (
        br'(.*file:/)/?(/\$TESTTMP.*)',
        lambda m: m.group(1) + b'*' + m.group(2) + b' (glob)',
    ),
]

# Various platform error strings, keyed on a common replacement string
_errors = {
    br'$ENOENT$': (
        # IOError in Python does not have the same error message
        # than in Rust, and automatic conversion is not possible
        # because of module member privacy.
        br'No such file or directory \(os error 2\)',
        # strerror()
        br'No such file or directory',
        # FormatMessage(ERROR_FILE_NOT_FOUND)
        br'The system cannot find the file specified',
    ),
    br'$ENOTDIR$': (
        # strerror()
        br'Not a directory',
        # FormatMessage(ERROR_PATH_NOT_FOUND)
        br'The system cannot find the path specified',
    ),
    br'$ECONNRESET$': (
        # strerror()
        br'Connection reset by peer',
        # FormatMessage(WSAECONNRESET)
        br'An existing connection was forcibly closed by the remote host',
    ),
    br'$EADDRINUSE$': (
        # strerror()
        br'Address already in use',
        # FormatMessage(WSAEADDRINUSE)
        br'Only one usage of each socket address'
        br' \(protocol/network address/port\) is normally permitted',
    ),
    br'$EADDRNOTAVAIL$': (
        # strerror()
        br'Cannot assign requested address',
        # FormatMessage(WSAEADDRNOTAVAIL)
    ),
}

for replace, msgs in _errors.items():
    substitutions.extend((m, replace) for m in msgs)

# Output lines on Windows that can be autocorrected for '\' vs '/' path
# differences.
_winpathfixes = [
    # cloning subrepo s\ss from $TESTTMP/t/s/ss
    # cloning subrepo foo\bar from http://localhost:$HGPORT/foo/bar
    br'(?m)^cloning subrepo \S+\\.*',
    # pulling from $TESTTMP\issue1852a
    br'(?m)^pulling from \$TESTTMP\\.*',
    # pushing to $TESTTMP\a
    br'(?m)^pushing to \$TESTTMP\\.*',
    # pushing subrepo s\ss to $TESTTMP/t/s/ss
    br'(?m)^pushing subrepo \S+\\\S+ to.*',
    # moving d1\d11\a1 to d3/d11/a1
    br'(?m)^moving \S+\\.*',
    # d1\a: not recording move - dummy does not exist
    br'\S+\\\S+: not recording move .+',
    # reverting s\a
    br'(?m)^reverting (?!subrepo ).*\\.*',
    # saved backup bundle to
    #     $TESTTMP\test\.hg\strip-backup/443431ffac4f-2fc5398a-backup.hg
    br'(?m)^saved backup bundle to \$TESTTMP.*\.hg',
    # no changes made to subrepo s\ss since last push to ../tcc/s/ss
    br'(?m)^no changes made to subrepo \S+\\\S+ since.*',
    # changeset 5:9cc5aa7204f0: stuff/maybelarge.dat references missing
    #     $TESTTMP\largefiles-repo-hg\.hg\largefiles\76..38
    br'(?m)^changeset .* references (corrupted|missing) \$TESTTMP\\.*',
    # stuff/maybelarge.dat: largefile 76..38 not available from
    #     file:/*/$TESTTMP\largefiles-repo (glob)
    br'.*: largefile \S+ not available from file:/\*/.+',
]

if os.name == 'nt':
    substitutions.extend(
        [
            (s, lambda match: match.group().replace(b'\\', b'/'))
            for s in _winpathfixes
        ]
    )