lock: block signal interrupt while making a lock file
On Windows where symlink isn't supported, util.makelock() could leave an empty
file if interrupted immediately after os.open(). This empty lock never dies
as it has no process id recorded.
ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
# an interrupt may occur here
os.write(ld, info)
os.close(ld)
This was a long-standing bug of TortoiseHg which runs a command-server and
kills it by CTRL_C_EVENT, reported by random Windows users.
https://bitbucket.org/tortoisehg/thg/issues/4873/#comment-
43591129
At first, I tried to fix makelock() to clean up a stale lock file, which
turned out to be hard because any instructions may be interrupted by a
signal.
ld = None
try:
# CALL_FUNCTION # os.open(...)
# an interrupt may occur here
# STORE_FAST # ld = ...
ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
os.write(ld, info)
...
return True
except:
if ld:
...
os.unlink(pathname)
return False
So I decided to block signals by temporarily replacing the signal handlers
so makelcok() and held = 1 will never be interrupted.
Many thanks to Fernando Najera for investigating the issue.
--- a/mercurial/lock.py Sun Mar 04 09:40:12 2018 -0500
+++ b/mercurial/lock.py Sat Mar 03 23:49:39 2018 -0500
@@ -10,6 +10,7 @@
import contextlib
import errno
import os
+import signal
import socket
import time
import warnings
@@ -39,6 +40,64 @@
raise
return result
+@contextlib.contextmanager
+def _delayedinterrupt():
+ """Block signal interrupt while doing something critical
+
+ This makes sure that the code block wrapped by this context manager won't
+ be interrupted.
+
+ For Windows developers: It appears not possible to guard time.sleep()
+ from CTRL_C_EVENT, so please don't use time.sleep() to test if this is
+ working.
+ """
+ assertedsigs = []
+ blocked = False
+ orighandlers = {}
+
+ def raiseinterrupt(num):
+ if (num == getattr(signal, 'SIGINT', None) or
+ num == getattr(signal, 'CTRL_C_EVENT', None)):
+ raise KeyboardInterrupt
+ else:
+ raise error.SignalInterrupt
+ def catchterm(num, frame):
+ if blocked:
+ assertedsigs.append(num)
+ else:
+ raiseinterrupt(num)
+
+ try:
+ # save handlers first so they can be restored even if a setup is
+ # interrupted between signal.signal() and orighandlers[] =.
+ for name in ['CTRL_C_EVENT', 'SIGINT', 'SIGBREAK', 'SIGHUP', 'SIGTERM']:
+ num = getattr(signal, name, None)
+ if num and num not in orighandlers:
+ orighandlers[num] = signal.getsignal(num)
+ try:
+ for num in orighandlers:
+ signal.signal(num, catchterm)
+ except ValueError:
+ pass # in a thread? no luck
+
+ blocked = True
+ yield
+ finally:
+ # no simple way to reliably restore all signal handlers because
+ # any loops, recursive function calls, except blocks, etc. can be
+ # interrupted. so instead, make catchterm() raise interrupt.
+ blocked = False
+ try:
+ for num, handler in orighandlers.items():
+ signal.signal(num, handler)
+ except ValueError:
+ pass # in a thread?
+
+ # re-raise interrupt exception if any, which may be shadowed by a new
+ # interrupt occurred while re-raising the first one
+ if assertedsigs:
+ raiseinterrupt(assertedsigs[0])
+
def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs):
"""return an acquired lock or raise an a LockHeld exception
@@ -182,8 +241,9 @@
while not self.held and retry:
retry -= 1
try:
- self.vfs.makelock(lockname, self.f)
- self.held = 1
+ with _delayedinterrupt():
+ self.vfs.makelock(lockname, self.f)
+ self.held = 1
except (OSError, IOError) as why:
if why.errno == errno.EEXIST:
locker = self._readlock()
--- a/mercurial/util.py Sun Mar 04 09:40:12 2018 -0500
+++ b/mercurial/util.py Sat Mar 03 23:49:39 2018 -0500
@@ -1676,6 +1676,11 @@
timer = time.perf_counter
def makelock(info, pathname):
+ """Create a lock file atomically if possible
+
+ This may leave a stale lock file if symlink isn't supported and signal
+ interrupt is enabled.
+ """
try:
return os.symlink(info, pathname)
except OSError as why: