cvsps: use a different tiebreaker to avoid flaky test
After adding some sneaky debug printing[0], I determined that this
test flaked when a CVS commit containing two files starts too close to
the end of a second, thus putting file "a" in one second and "b/c" in
the following second. The secondary sort key meant that these changes
sorted in a different order when the timestamps were different than
they did when they matched. As far as I can tell, CVS walks through
the files in a stable order, so by sorting on the filenames in cvsps
we'll get stable output. It's fine for us to switch from sorting on
the branchpoint as a secondary key because this was already the point
when we didn't care, and we're just trying to break ties in a stable
way. It's unclear to be if having the branchpoint present matters
anymore, but it doesn't really hurt to leave it.
With this change in place, I was able to run test-convert-cvs over 650
times in a row without a failure. test-convert-cvcs-synthetic.t
appears to still be flaky, but I don't think it's *worse* than it was
before - just not better (I observed one flaky failure in 200 runs on
that test).
0: The helpful debug hack ended up being this, in case it's useful to
future flaky test assassins:
--- a/hgext/convert/cvsps.py
+++ b/hgext/convert/cvsps.py
@@ -854,6 +854,8 @@ def debugcvsps(ui, *args, **opts):
ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
','.join(cs.tags) or '(none)')))
+ if cs.comment == 'ci1' and (cs.id == 6) == bool(cs.branchpoints):
+ ui.write('raw timestamp %r\n' % (cs.date,))
if cs.branchpoints:
ui.write(('Branchpoints: %s \n') %
', '.join(sorted(cs.branchpoints)))
#!/usr/bin/env python
import os, sys, time, errno, signal
if os.name =='nt':
import ctypes
def _check(ret, expectederr=None):
if ret == 0:
winerrno = ctypes.GetLastError()
if winerrno == expectederr:
return True
raise ctypes.WinError(winerrno)
def kill(pid, logfn, tryhard=True):
logfn('# Killing daemon process %d' % pid)
PROCESS_TERMINATE = 1
PROCESS_QUERY_INFORMATION = 0x400
SYNCHRONIZE = 0x00100000
WAIT_OBJECT_0 = 0
WAIT_TIMEOUT = 258
handle = ctypes.windll.kernel32.OpenProcess(
PROCESS_TERMINATE|SYNCHRONIZE|PROCESS_QUERY_INFORMATION,
False, pid)
if handle == 0:
_check(0, 87) # err 87 when process not found
return # process not found, already finished
try:
r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
if r == WAIT_OBJECT_0:
pass # terminated, but process handle still available
elif r == WAIT_TIMEOUT:
_check(ctypes.windll.kernel32.TerminateProcess(handle, -1))
else:
_check(r)
# TODO?: forcefully kill when timeout
# and ?shorter waiting time? when tryhard==True
r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
# timeout = 100 ms
if r == WAIT_OBJECT_0:
pass # process is terminated
elif r == WAIT_TIMEOUT:
logfn('# Daemon process %d is stuck')
else:
_check(r) # any error
except: #re-raises
ctypes.windll.kernel32.CloseHandle(handle) # no _check, keep error
raise
_check(ctypes.windll.kernel32.CloseHandle(handle))
else:
def kill(pid, logfn, tryhard=True):
try:
os.kill(pid, 0)
logfn('# Killing daemon process %d' % pid)
os.kill(pid, signal.SIGTERM)
if tryhard:
for i in range(10):
time.sleep(0.05)
os.kill(pid, 0)
else:
time.sleep(0.1)
os.kill(pid, 0)
logfn('# Daemon process %d is stuck - really killing it' % pid)
os.kill(pid, signal.SIGKILL)
except OSError, err:
if err.errno != errno.ESRCH:
raise
def killdaemons(pidfile, tryhard=True, remove=False, logfn=None):
if not logfn:
logfn = lambda s: s
# Kill off any leftover daemon processes
try:
fp = open(pidfile)
for line in fp:
try:
pid = int(line)
except ValueError:
continue
kill(pid, logfn, tryhard)
fp.close()
if remove:
os.unlink(pidfile)
except IOError:
pass
if __name__ == '__main__':
path, = sys.argv[1:]
killdaemons(path)