view tests/killdaemons.py @ 32001:c85f19c66e8d

tests: add tests for poorly behaving HTTP server I've spent several hours over the past few weeks investigating networking failures involving hg.mozilla.org. As part of this, it has become clear that the Mercurial client's error handling when it encounters network failures is far from robust. To prove this is true, I've devised a battery of tests simulating various network failures, notably premature connection closes. To achieve this, I've implemented an extension that monkeypatches the built-in HTTP server and hooks in at the socket level and allows various events to occur based on config options. For example, you can refuse to accept() a client socket or you can close() the socket after N bytes have been sent or received. The latter effectively simulates an unexpected connection drop (and these occur all the time in the real world). The new test file launches servers exhibiting various "bad" behaviors and points a client at them. As the many TODO comments in the test call attention to, Mercurial often displays unhelpful errors when network-related failures occur. This makes it difficult for users to understand what's going on and difficult for server administrators to pinpoint root causes without packet tracing. Upcoming patches will attempt to fix these error handling deficiencies.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 13 Apr 2017 22:19:28 -0700
parents 4ddfb730789d
children f840b2621cce
line wrap: on
line source

#!/usr/bin/env python

from __future__ import absolute_import
import errno
import os
import signal
import sys
import time

if os.name =='nt':
    import ctypes

    def _check(ret, expectederr=None):
        if ret == 0:
            winerrno = ctypes.GetLastError()
            if winerrno == expectederr:
                return True
            raise ctypes.WinError(winerrno)

    def kill(pid, logfn, tryhard=True):
        logfn('# Killing daemon process %d' % pid)
        PROCESS_TERMINATE = 1
        PROCESS_QUERY_INFORMATION = 0x400
        SYNCHRONIZE = 0x00100000
        WAIT_OBJECT_0 = 0
        WAIT_TIMEOUT = 258
        handle = ctypes.windll.kernel32.OpenProcess(
                PROCESS_TERMINATE|SYNCHRONIZE|PROCESS_QUERY_INFORMATION,
                False, pid)
        if handle == 0:
            _check(0, 87) # err 87 when process not found
            return # process not found, already finished
        try:
            r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
            if r == WAIT_OBJECT_0:
                pass # terminated, but process handle still available
            elif r == WAIT_TIMEOUT:
                _check(ctypes.windll.kernel32.TerminateProcess(handle, -1))
            else:
                _check(r)

            # TODO?: forcefully kill when timeout
            #        and ?shorter waiting time? when tryhard==True
            r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
                                                       # timeout = 100 ms
            if r == WAIT_OBJECT_0:
                pass # process is terminated
            elif r == WAIT_TIMEOUT:
                logfn('# Daemon process %d is stuck')
            else:
                _check(r) # any error
        except: #re-raises
            ctypes.windll.kernel32.CloseHandle(handle) # no _check, keep error
            raise
        _check(ctypes.windll.kernel32.CloseHandle(handle))

else:
    def kill(pid, logfn, tryhard=True):
        try:
            os.kill(pid, 0)
            logfn('# Killing daemon process %d' % pid)
            os.kill(pid, signal.SIGTERM)
            if tryhard:
                for i in range(10):
                    time.sleep(0.05)
                    os.kill(pid, 0)
            else:
                time.sleep(0.1)
                os.kill(pid, 0)
            logfn('# Daemon process %d is stuck - really killing it' % pid)
            os.kill(pid, signal.SIGKILL)
        except OSError as err:
            if err.errno != errno.ESRCH:
                raise

def killdaemons(pidfile, tryhard=True, remove=False, logfn=None):
    if not logfn:
        logfn = lambda s: s
    # Kill off any leftover daemon processes
    try:
        fp = open(pidfile)
        for line in fp:
            try:
                pid = int(line)
                if pid <= 0:
                    raise ValueError
            except ValueError:
                logfn('# Not killing daemon process %s - invalid pid'
                      % line.rstrip())
                continue
            kill(pid, logfn, tryhard)
        fp.close()
        if remove:
            os.unlink(pidfile)
    except IOError:
        pass

if __name__ == '__main__':
    if len(sys.argv) > 1:
        path, = sys.argv[1:]
    else:
        path = os.environ["DAEMON_PIDS"]

    killdaemons(path)