# HG changeset patch # User Pierre-Yves David # Date 1720002168 -7200 # Node ID ba205f944cb4dacd2e19b8e4e7cbb66e681cdfa9 # Parent 5b803e5c13255f42fcd3cc913340f21367ae4809 mmap: add a `is_mmap_safe` method to vfs This will be useful to safeguard mmap usage to void SIGBUS when repositories lives on a NFS drive. diff -r 5b803e5c1325 -r ba205f944cb4 mercurial/util.py --- a/mercurial/util.py Mon Jun 24 13:14:05 2024 +0200 +++ b/mercurial/util.py Wed Jul 03 12:22:48 2024 +0200 @@ -441,6 +441,13 @@ def mmapread(fp, size=None): + """Read a file content using mmap + + The responsability of checking the file system is mmap safe is the + responsability of the caller. + + In some case, a normal string might be returned. + """ if size == 0: # size of 0 to mmap.mmap() means "all data" # rather than "zero bytes", so special case that. diff -r 5b803e5c1325 -r ba205f944cb4 mercurial/vfs.py --- a/mercurial/vfs.py Mon Jun 24 13:14:05 2024 +0200 +++ b/mercurial/vfs.py Wed Jul 03 12:22:48 2024 +0200 @@ -189,6 +189,35 @@ def lstat(self, path: Optional[bytes] = None): return os.lstat(self.join(path)) + def is_mmap_safe(self, path: Optional[bytes] = None) -> bool: + """return True if it is safe to read a file content as mmap + + This focus on the file system aspect of such safety, the application + logic around that file is not taken into account, so caller need to + make sure the file won't be truncated in a way that will create SIGBUS + on access. + + + The initial motivation for this logic is that if mmap is used on NFS + and somebody deletes the mapped file (e.g. by renaming on top of it), + then you get SIGBUS, which can be pretty disruptive: we get core dump + reports, and the process terminates without writing to the blackbox. + + Instead in this situation we prefer to read the file normally. + The risk of ESTALE in the middle of the read remains, but it's + smaller because we read sooner and the error should be reported + just as any other error. + + Note that python standard library does not offer the necessary function + to detect the file stem bits. So this detection rely on compiled bits + and is not available in pure python. + """ + # XXX Since we already assume a vfs to address a consistent file system + # in other location, we could determine the fstype once for the root + # and cache that value. + fstype = util.getfstype(self.join(path)) + return fstype is not None and fstype != b'nfs' + def listdir(self, path: Optional[bytes] = None): return os.listdir(self.join(path))