comparison mercurial/lock.py @ 36701:d77c3b023393

lock: block signal interrupt while making a lock file On Windows where symlink isn't supported, util.makelock() could leave an empty file if interrupted immediately after os.open(). This empty lock never dies as it has no process id recorded. ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) # an interrupt may occur here os.write(ld, info) os.close(ld) This was a long-standing bug of TortoiseHg which runs a command-server and kills it by CTRL_C_EVENT, reported by random Windows users. https://bitbucket.org/tortoisehg/thg/issues/4873/#comment-43591129 At first, I tried to fix makelock() to clean up a stale lock file, which turned out to be hard because any instructions may be interrupted by a signal. ld = None try: # CALL_FUNCTION # os.open(...) # an interrupt may occur here # STORE_FAST # ld = ... ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) os.write(ld, info) ... return True except: if ld: ... os.unlink(pathname) return False So I decided to block signals by temporarily replacing the signal handlers so makelcok() and held = 1 will never be interrupted. Many thanks to Fernando Najera for investigating the issue.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 03 Mar 2018 23:49:39 -0500
parents 214f61abd865
children a8a902d7176e
comparison
equal deleted inserted replaced
36700:e437de3881c1 36701:d77c3b023393
8 from __future__ import absolute_import 8 from __future__ import absolute_import
9 9
10 import contextlib 10 import contextlib
11 import errno 11 import errno
12 import os 12 import os
13 import signal
13 import socket 14 import socket
14 import time 15 import time
15 import warnings 16 import warnings
16 17
17 from .i18n import _ 18 from .i18n import _
36 result += '/%x' % os.stat('/proc/self/ns/pid').st_ino 37 result += '/%x' % os.stat('/proc/self/ns/pid').st_ino
37 except OSError as ex: 38 except OSError as ex:
38 if ex.errno not in (errno.ENOENT, errno.EACCES, errno.ENOTDIR): 39 if ex.errno not in (errno.ENOENT, errno.EACCES, errno.ENOTDIR):
39 raise 40 raise
40 return result 41 return result
42
43 @contextlib.contextmanager
44 def _delayedinterrupt():
45 """Block signal interrupt while doing something critical
46
47 This makes sure that the code block wrapped by this context manager won't
48 be interrupted.
49
50 For Windows developers: It appears not possible to guard time.sleep()
51 from CTRL_C_EVENT, so please don't use time.sleep() to test if this is
52 working.
53 """
54 assertedsigs = []
55 blocked = False
56 orighandlers = {}
57
58 def raiseinterrupt(num):
59 if (num == getattr(signal, 'SIGINT', None) or
60 num == getattr(signal, 'CTRL_C_EVENT', None)):
61 raise KeyboardInterrupt
62 else:
63 raise error.SignalInterrupt
64 def catchterm(num, frame):
65 if blocked:
66 assertedsigs.append(num)
67 else:
68 raiseinterrupt(num)
69
70 try:
71 # save handlers first so they can be restored even if a setup is
72 # interrupted between signal.signal() and orighandlers[] =.
73 for name in ['CTRL_C_EVENT', 'SIGINT', 'SIGBREAK', 'SIGHUP', 'SIGTERM']:
74 num = getattr(signal, name, None)
75 if num and num not in orighandlers:
76 orighandlers[num] = signal.getsignal(num)
77 try:
78 for num in orighandlers:
79 signal.signal(num, catchterm)
80 except ValueError:
81 pass # in a thread? no luck
82
83 blocked = True
84 yield
85 finally:
86 # no simple way to reliably restore all signal handlers because
87 # any loops, recursive function calls, except blocks, etc. can be
88 # interrupted. so instead, make catchterm() raise interrupt.
89 blocked = False
90 try:
91 for num, handler in orighandlers.items():
92 signal.signal(num, handler)
93 except ValueError:
94 pass # in a thread?
95
96 # re-raise interrupt exception if any, which may be shadowed by a new
97 # interrupt occurred while re-raising the first one
98 if assertedsigs:
99 raiseinterrupt(assertedsigs[0])
41 100
42 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs): 101 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs):
43 """return an acquired lock or raise an a LockHeld exception 102 """return an acquired lock or raise an a LockHeld exception
44 103
45 This function is responsible to issue warnings and or debug messages about 104 This function is responsible to issue warnings and or debug messages about
180 lockname = '%s:%d' % (lock._host, self.pid) 239 lockname = '%s:%d' % (lock._host, self.pid)
181 retry = 5 240 retry = 5
182 while not self.held and retry: 241 while not self.held and retry:
183 retry -= 1 242 retry -= 1
184 try: 243 try:
185 self.vfs.makelock(lockname, self.f) 244 with _delayedinterrupt():
186 self.held = 1 245 self.vfs.makelock(lockname, self.f)
246 self.held = 1
187 except (OSError, IOError) as why: 247 except (OSError, IOError) as why:
188 if why.errno == errno.EEXIST: 248 if why.errno == errno.EEXIST:
189 locker = self._readlock() 249 locker = self._readlock()
190 if locker is None: 250 if locker is None:
191 continue 251 continue