Mercurial > hg
comparison mercurial/lock.py @ 36701:d77c3b023393
lock: block signal interrupt while making a lock file
On Windows where symlink isn't supported, util.makelock() could leave an empty
file if interrupted immediately after os.open(). This empty lock never dies
as it has no process id recorded.
ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
# an interrupt may occur here
os.write(ld, info)
os.close(ld)
This was a long-standing bug of TortoiseHg which runs a command-server and
kills it by CTRL_C_EVENT, reported by random Windows users.
https://bitbucket.org/tortoisehg/thg/issues/4873/#comment-43591129
At first, I tried to fix makelock() to clean up a stale lock file, which
turned out to be hard because any instructions may be interrupted by a
signal.
ld = None
try:
# CALL_FUNCTION # os.open(...)
# an interrupt may occur here
# STORE_FAST # ld = ...
ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
os.write(ld, info)
...
return True
except:
if ld:
...
os.unlink(pathname)
return False
So I decided to block signals by temporarily replacing the signal handlers
so makelcok() and held = 1 will never be interrupted.
Many thanks to Fernando Najera for investigating the issue.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 03 Mar 2018 23:49:39 -0500 |
parents | 214f61abd865 |
children | a8a902d7176e |
comparison
equal
deleted
inserted
replaced
36700:e437de3881c1 | 36701:d77c3b023393 |
---|---|
8 from __future__ import absolute_import | 8 from __future__ import absolute_import |
9 | 9 |
10 import contextlib | 10 import contextlib |
11 import errno | 11 import errno |
12 import os | 12 import os |
13 import signal | |
13 import socket | 14 import socket |
14 import time | 15 import time |
15 import warnings | 16 import warnings |
16 | 17 |
17 from .i18n import _ | 18 from .i18n import _ |
36 result += '/%x' % os.stat('/proc/self/ns/pid').st_ino | 37 result += '/%x' % os.stat('/proc/self/ns/pid').st_ino |
37 except OSError as ex: | 38 except OSError as ex: |
38 if ex.errno not in (errno.ENOENT, errno.EACCES, errno.ENOTDIR): | 39 if ex.errno not in (errno.ENOENT, errno.EACCES, errno.ENOTDIR): |
39 raise | 40 raise |
40 return result | 41 return result |
42 | |
43 @contextlib.contextmanager | |
44 def _delayedinterrupt(): | |
45 """Block signal interrupt while doing something critical | |
46 | |
47 This makes sure that the code block wrapped by this context manager won't | |
48 be interrupted. | |
49 | |
50 For Windows developers: It appears not possible to guard time.sleep() | |
51 from CTRL_C_EVENT, so please don't use time.sleep() to test if this is | |
52 working. | |
53 """ | |
54 assertedsigs = [] | |
55 blocked = False | |
56 orighandlers = {} | |
57 | |
58 def raiseinterrupt(num): | |
59 if (num == getattr(signal, 'SIGINT', None) or | |
60 num == getattr(signal, 'CTRL_C_EVENT', None)): | |
61 raise KeyboardInterrupt | |
62 else: | |
63 raise error.SignalInterrupt | |
64 def catchterm(num, frame): | |
65 if blocked: | |
66 assertedsigs.append(num) | |
67 else: | |
68 raiseinterrupt(num) | |
69 | |
70 try: | |
71 # save handlers first so they can be restored even if a setup is | |
72 # interrupted between signal.signal() and orighandlers[] =. | |
73 for name in ['CTRL_C_EVENT', 'SIGINT', 'SIGBREAK', 'SIGHUP', 'SIGTERM']: | |
74 num = getattr(signal, name, None) | |
75 if num and num not in orighandlers: | |
76 orighandlers[num] = signal.getsignal(num) | |
77 try: | |
78 for num in orighandlers: | |
79 signal.signal(num, catchterm) | |
80 except ValueError: | |
81 pass # in a thread? no luck | |
82 | |
83 blocked = True | |
84 yield | |
85 finally: | |
86 # no simple way to reliably restore all signal handlers because | |
87 # any loops, recursive function calls, except blocks, etc. can be | |
88 # interrupted. so instead, make catchterm() raise interrupt. | |
89 blocked = False | |
90 try: | |
91 for num, handler in orighandlers.items(): | |
92 signal.signal(num, handler) | |
93 except ValueError: | |
94 pass # in a thread? | |
95 | |
96 # re-raise interrupt exception if any, which may be shadowed by a new | |
97 # interrupt occurred while re-raising the first one | |
98 if assertedsigs: | |
99 raiseinterrupt(assertedsigs[0]) | |
41 | 100 |
42 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs): | 101 def trylock(ui, vfs, lockname, timeout, warntimeout, *args, **kwargs): |
43 """return an acquired lock or raise an a LockHeld exception | 102 """return an acquired lock or raise an a LockHeld exception |
44 | 103 |
45 This function is responsible to issue warnings and or debug messages about | 104 This function is responsible to issue warnings and or debug messages about |
180 lockname = '%s:%d' % (lock._host, self.pid) | 239 lockname = '%s:%d' % (lock._host, self.pid) |
181 retry = 5 | 240 retry = 5 |
182 while not self.held and retry: | 241 while not self.held and retry: |
183 retry -= 1 | 242 retry -= 1 |
184 try: | 243 try: |
185 self.vfs.makelock(lockname, self.f) | 244 with _delayedinterrupt(): |
186 self.held = 1 | 245 self.vfs.makelock(lockname, self.f) |
246 self.held = 1 | |
187 except (OSError, IOError) as why: | 247 except (OSError, IOError) as why: |
188 if why.errno == errno.EEXIST: | 248 if why.errno == errno.EEXIST: |
189 locker = self._readlock() | 249 locker = self._readlock() |
190 if locker is None: | 250 if locker is None: |
191 continue | 251 continue |