worker: rewrite error handling so os._exit covers all cases
Previously the worker error handling is like:
pid = os.fork() --+
if pid == 0: |
.... | problematic
.... --+
try: --+
.... | worker error handling
--+
If a signal arrives when Python is executing the "problematic" lines, an
external error handling (dispatch.py) will take over the control flow and
it's no longer guaranteed "os._exit" is called (see
86cd09bc13ba for why it
is necessary).
This patch rewrites the error handling so it covers all possible code paths
for a worker even during fork.
Note: "os.getpid() == parentpid" is used to test if the process is parent or
not intentionally, instead of checking "pid", because "pid = os.fork()" may
be not atomic - it's possible that that a signal hits the worker before the
assignment completes [1]. The newly added test replaces "os.fork" to
exercise that extreme case.
[1]: CPython compiles "pid = os.fork()" to 2 byte codes: "CALL_FUNCTION" and
"STORE_FAST", so it's probably not atomic:
def f():
pid = os.fork()
dis.dis(f)
2 0 LOAD_GLOBAL 0 (os)
3 LOAD_ATTR 1 (fork)
6 CALL_FUNCTION 0
9 STORE_FAST 0 (pid)
12 LOAD_CONST 0 (None)
15 RETURN_VALUE
#!/usr/bin/env python
"""
Utility for inspecting files in various ways.
This tool is like the collection of tools found in a unix environment but are
cross platform and stable and suitable for our needs in the test suite.
This can be used instead of tools like:
[
dd
find
head
hexdump
ls
md5sum
readlink
sha1sum
stat
tail
test
readlink.py
md5sum.py
"""
from __future__ import absolute_import
import glob
import hashlib
import optparse
import os
import re
import sys
def visit(opts, filenames, outfile):
"""Process filenames in the way specified in opts, writing output to
outfile."""
for f in sorted(filenames):
isstdin = f == '-'
if not isstdin and not os.path.lexists(f):
outfile.write('%s: file not found\n' % f)
continue
quiet = opts.quiet and not opts.recurse or isstdin
isdir = os.path.isdir(f)
islink = os.path.islink(f)
isfile = os.path.isfile(f) and not islink
dirfiles = None
content = None
facts = []
if isfile:
if opts.type:
facts.append('file')
if opts.hexdump or opts.dump or opts.md5:
content = file(f, 'rb').read()
elif islink:
if opts.type:
facts.append('link')
content = os.readlink(f)
elif isstdin:
content = sys.stdin.read()
if opts.size:
facts.append('size=%s' % len(content))
elif isdir:
if opts.recurse or opts.type:
dirfiles = glob.glob(f + '/*')
facts.append('directory with %s files' % len(dirfiles))
elif opts.type:
facts.append('type unknown')
if not isstdin:
stat = os.lstat(f)
if opts.size and not isdir:
facts.append('size=%s' % stat.st_size)
if opts.mode and not islink:
facts.append('mode=%o' % (stat.st_mode & 0o777))
if opts.links:
facts.append('links=%s' % stat.st_nlink)
if opts.newer:
# mtime might be in whole seconds so newer file might be same
if stat.st_mtime >= os.stat(opts.newer).st_mtime:
facts.append('newer than %s' % opts.newer)
else:
facts.append('older than %s' % opts.newer)
if opts.md5 and content is not None:
h = hashlib.md5(content)
facts.append('md5=%s' % h.hexdigest()[:opts.bytes])
if opts.sha1 and content is not None:
h = hashlib.sha1(content)
facts.append('sha1=%s' % h.hexdigest()[:opts.bytes])
if isstdin:
outfile.write(', '.join(facts) + '\n')
elif facts:
outfile.write('%s: %s\n' % (f, ', '.join(facts)))
elif not quiet:
outfile.write('%s:\n' % f)
if content is not None:
chunk = content
if not islink:
if opts.lines:
if opts.lines >= 0:
chunk = ''.join(chunk.splitlines(True)[:opts.lines])
else:
chunk = ''.join(chunk.splitlines(True)[opts.lines:])
if opts.bytes:
if opts.bytes >= 0:
chunk = chunk[:opts.bytes]
else:
chunk = chunk[opts.bytes:]
if opts.hexdump:
for i in range(0, len(chunk), 16):
s = chunk[i:i + 16]
outfile.write('%04x: %-47s |%s|\n' %
(i, ' '.join('%02x' % ord(c) for c in s),
re.sub('[^ -~]', '.', s)))
if opts.dump:
if not quiet:
outfile.write('>>>\n')
outfile.write(chunk)
if not quiet:
if chunk.endswith('\n'):
outfile.write('<<<\n')
else:
outfile.write('\n<<< no trailing newline\n')
if opts.recurse and dirfiles:
assert not isstdin
visit(opts, dirfiles, outfile)
if __name__ == "__main__":
parser = optparse.OptionParser("%prog [options] [filenames]")
parser.add_option("-t", "--type", action="store_true",
help="show file type (file or directory)")
parser.add_option("-m", "--mode", action="store_true",
help="show file mode")
parser.add_option("-l", "--links", action="store_true",
help="show number of links")
parser.add_option("-s", "--size", action="store_true",
help="show size of file")
parser.add_option("-n", "--newer", action="store",
help="check if file is newer (or same)")
parser.add_option("-r", "--recurse", action="store_true",
help="recurse into directories")
parser.add_option("-S", "--sha1", action="store_true",
help="show sha1 hash of the content")
parser.add_option("-M", "--md5", action="store_true",
help="show md5 hash of the content")
parser.add_option("-D", "--dump", action="store_true",
help="dump file content")
parser.add_option("-H", "--hexdump", action="store_true",
help="hexdump file content")
parser.add_option("-B", "--bytes", type="int",
help="number of characters to dump")
parser.add_option("-L", "--lines", type="int",
help="number of lines to dump")
parser.add_option("-q", "--quiet", action="store_true",
help="no default output")
(opts, filenames) = parser.parse_args(sys.argv[1:])
if not filenames:
filenames = ['-']
visit(opts, filenames, sys.stdout)