changeset 36701:d77c3b023393

lock: block signal interrupt while making a lock file On Windows where symlink isn't supported, util.makelock() could leave an empty file if interrupted immediately after os.open(). This empty lock never dies as it has no process id recorded. ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) # an interrupt may occur here os.write(ld, info) os.close(ld) This was a long-standing bug of TortoiseHg which runs a command-server and kills it by CTRL_C_EVENT, reported by random Windows users. https://bitbucket.org/tortoisehg/thg/issues/4873/#comment-43591129 At first, I tried to fix makelock() to clean up a stale lock file, which turned out to be hard because any instructions may be interrupted by a signal. ld = None try: # CALL_FUNCTION # os.open(...) # an interrupt may occur here # STORE_FAST # ld = ... ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) os.write(ld, info) ... return True except: if ld: ... os.unlink(pathname) return False So I decided to block signals by temporarily replacing the signal handlers so makelcok() and held = 1 will never be interrupted. Many thanks to Fernando Najera for investigating the issue.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 03 Mar 2018 23:49:39 -0500
parents e437de3881c1
children f659a407e5ee
files mercurial/lock.py mercurial/util.py
diffstat 2 files changed, 67 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/lock.py	Sun Mar 04 09:40:12 2018 -0500
+++ b/mercurial/lock.py	Sat Mar 03 23:49:39 2018 -0500
@@ -10,6 +10,7 @@
 import contextlib
 import errno
 import os
+import signal
 import socket
 import time
 import warnings
@@ -39,6 +40,64 @@
                 raise
     return result
 
+@contextlib.contextmanager
+def _delayedinterrupt():
+    """Block signal interrupt while doing something critical
+
+    This makes sure that the code block wrapped by this context manager won't
+    be interrupted.
+
+    For Windows developers: It appears not possible to guard time.sleep()
+    from CTRL_C_EVENT, so please don't use time.sleep() to test if this is
+    working.
+    """
+    assertedsigs = []
+    blocked = False
+    orighandlers = {}
+
+    def raiseinterrupt(num):
+        if (num == getattr(signal, 'SIGINT', None) or
+            num == getattr(signal, 'CTRL_C_EVENT', None)):
+            raise KeyboardInterrupt
+        else:
+            raise error.SignalInterrupt
+    def catchterm(num, frame):
+        if blocked:
+            assertedsigs.append(num)
+        else:
+            raiseinterrupt(num)
+
+    try:
+        # save handlers first so they can be restored even if a setup is
+        # interrupted between signal.signal() and orighandlers[] =.
+        for name in ['CTRL_C_EVENT', 'SIGINT', 'SIGBREAK', 'SIGHUP', 'SIGTERM']:
+            num = getattr(signal, name, None)
+            if num and num not in orighandlers:
+                orighandlers[num] = signal.getsignal(num)
+        try:
+            for num in orighandlers:
+                signal.signal(num, catchterm)
+        except ValueError:
+            pass # in a thread? no luck
+
+        blocked = True
+        yield
+    finally:
+        # no simple way to reliably restore all signal handlers because
+        # any loops, recursive function calls, except blocks, etc. can be
+        # interrupted. so instead, make catchterm() raise interrupt.
+        blocked = False
+        try:
+            for num, handler in orighandlers.items():
+                signal.signal(num, handler)
+        except ValueError:
+            pass # in a thread?
+
+    # re-raise interrupt exception if any, which may be shadowed by a new
+    # interrupt occurred while re-raising the first one
+    if assertedsigs:
+        raiseinterrupt(assertedsigs[0])
+
 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs):
     """return an acquired lock or raise an a LockHeld exception
 
@@ -182,8 +241,9 @@
         while not self.held and retry:
             retry -= 1
             try:
-                self.vfs.makelock(lockname, self.f)
-                self.held = 1
+                with _delayedinterrupt():
+                    self.vfs.makelock(lockname, self.f)
+                    self.held = 1
             except (OSError, IOError) as why:
                 if why.errno == errno.EEXIST:
                     locker = self._readlock()
--- a/mercurial/util.py	Sun Mar 04 09:40:12 2018 -0500
+++ b/mercurial/util.py	Sat Mar 03 23:49:39 2018 -0500
@@ -1676,6 +1676,11 @@
     timer = time.perf_counter
 
 def makelock(info, pathname):
+    """Create a lock file atomically if possible
+
+    This may leave a stale lock file if symlink isn't supported and signal
+    interrupt is enabled.
+    """
     try:
         return os.symlink(info, pathname)
     except OSError as why: