# HG changeset patch # User Pierre-Yves David # Date 1637237560 -3600 # Node ID 322525db4c98aff65e64d9f373976b1495d7e515 # Parent 03644a929d6efd7a0d7c6df6a73f714714698066 status: use filesystem time boundary to invalidate racy mtime We record the filesystem time at the start of the status walk and use that as a boundary to detect files that might be modified during (or right after) the status run without the mtime allowing that edition to be detected. We currently do this at a second precision. In a later patch, we will use nanosecond precision when available. To cope with "broken" time on the file system where file could be in the future, we also keep mtime for file over one day in the future. See inline comment for details. Large file tests get a bit more confused as we reduce the odds for race condition. As a "side effect", the win32text extension is happy again. Differential Revision: https://phab.mercurial-scm.org/D11794 diff -r 03644a929d6e -r 322525db4c98 hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py Thu Nov 18 15:00:13 2021 +0100 +++ b/hgext/largefiles/lfutil.py Thu Nov 18 13:12:40 2021 +0100 @@ -244,7 +244,7 @@ def lfdirstatestatus(lfdirstate, repo): pctx = repo[b'.'] match = matchmod.always() - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( match, subrepos=[], ignored=False, clean=False, unknown=False ) modified, clean = s.modified, s.clean @@ -263,6 +263,10 @@ size = st.st_size mtime = timestamp.mtime_of(st) cache_data = (mode, size, mtime) + # We should consider using the mtime_boundary + # logic here, but largefile never actually had + # ambiguity protection before, so this confuse + # the tests and need more thinking. lfdirstate.set_clean(lfile, cache_data) return s @@ -670,7 +674,7 @@ # large. lfdirstate = openlfdirstate(ui, repo) dirtymatch = matchmod.always() - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False ) modifiedfiles = unsure + s.modified + s.added + s.removed diff -r 03644a929d6e -r 322525db4c98 hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py Thu Nov 18 15:00:13 2021 +0100 +++ b/hgext/largefiles/overrides.py Thu Nov 18 13:12:40 2021 +0100 @@ -1519,7 +1519,7 @@ return orig(repo, matcher, prefix, uipathfn, opts) # Get the list of missing largefiles so we can remove them lfdirstate = lfutil.openlfdirstate(repo.ui, repo) - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( matchmod.always(), subrepos=[], ignored=False, @@ -1746,7 +1746,7 @@ # (*1) deprecated, but used internally (e.g: "rebase --collapse") lfdirstate = lfutil.openlfdirstate(repo.ui, repo) - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( matchmod.always(), subrepos=[], ignored=False, diff -r 03644a929d6e -r 322525db4c98 hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py Thu Nov 18 15:00:13 2021 +0100 +++ b/hgext/largefiles/reposetup.py Thu Nov 18 13:12:40 2021 +0100 @@ -197,7 +197,7 @@ match._files = [f for f in match._files if sfindirstate(f)] # Don't waste time getting the ignored and unknown # files from lfdirstate - unsure, s = lfdirstate.status( + unsure, s, mtime_boundary = lfdirstate.status( match, subrepos=[], ignored=False, @@ -230,6 +230,10 @@ size = s.st_size mtime = timestamp.mtime_of(s) cache_data = (mode, size, mtime) + # We should consider using the mtime_boundary + # logic here, but largefile never actually had + # ambiguity protection before, so this confuse + # the tests and need more thinking. lfdirstate.set_clean(lfile, cache_data) else: tocheck = unsure + modified + added + clean diff -r 03644a929d6e -r 322525db4c98 hgext/remotefilelog/__init__.py --- a/hgext/remotefilelog/__init__.py Thu Nov 18 15:00:13 2021 +0100 +++ b/hgext/remotefilelog/__init__.py Thu Nov 18 13:12:40 2021 +0100 @@ -520,7 +520,7 @@ # Prefetch files before status attempts to look at their size and contents -def checklookup(orig, self, files): +def checklookup(orig, self, files, mtime_boundary): repo = self._repo if isenabled(repo): prefetchfiles = [] @@ -530,7 +530,7 @@ prefetchfiles.append((f, hex(parent.filenode(f)))) # batch fetch the needed files from the server repo.fileservice.prefetch(prefetchfiles) - return orig(self, files) + return orig(self, files, mtime_boundary) # Prefetch the logic that compares added and removed files for renames diff -r 03644a929d6e -r 322525db4c98 mercurial/context.py --- a/mercurial/context.py Thu Nov 18 15:00:13 2021 +0100 +++ b/mercurial/context.py Thu Nov 18 13:12:40 2021 +0100 @@ -1796,13 +1796,14 @@ sane.append(f) return sane - def _checklookup(self, files): + def _checklookup(self, files, mtime_boundary): # check for any possibly clean files if not files: - return [], [], [] + return [], [], [], [] modified = [] deleted = [] + clean = [] fixup = [] pctx = self._parents[0] # do a full compare of any files that might have changed @@ -1816,22 +1817,35 @@ or pctx[f].cmp(self[f]) ): modified.append(f) + elif mtime_boundary is None: + clean.append(f) else: - # XXX note that we have a race windows here since we gather - # the stats after we compared so the file might have - # changed. - # - # However this have always been the case and the - # refactoring moving the code here is improving the - # situation by narrowing the race and moving the two steps - # (comparison + stat) in the same location. - # - # Making this code "correct" is now possible. s = self[f].lstat() mode = s.st_mode size = s.st_size - mtime = timestamp.mtime_of(s) - fixup.append((f, (mode, size, mtime))) + file_mtime = timestamp.mtime_of(s) + cache_info = (mode, size, file_mtime) + + file_second = file_mtime[0] + boundary_second = mtime_boundary[0] + # If the mtime of the ambiguous file is younger (or equal) + # to the starting point of the `status` walk, we cannot + # garantee that another, racy, write will not happen right + # after with the same mtime and we cannot cache the + # information. + # + # However is the mtime is far away in the future, this is + # likely some mismatch between the current clock and + # previous file system operation. So mtime more than one days + # in the future are considered fine. + if ( + boundary_second + <= file_second + < (3600 * 24 + boundary_second) + ): + clean.append(f) + else: + fixup.append((f, cache_info)) except (IOError, OSError): # A file become inaccessible in between? Mark it as deleted, # matching dirstate behavior (issue5584). @@ -1841,7 +1855,7 @@ # it's in the dirstate. deleted.append(f) - return modified, deleted, fixup + return modified, deleted, clean, fixup def _poststatusfixup(self, status, fixup): """update dirstate for files that are actually clean""" @@ -1895,17 +1909,21 @@ subrepos = [] if b'.hgsub' in self: subrepos = sorted(self.substate) - cmp, s = self._repo.dirstate.status( + cmp, s, mtime_boundary = self._repo.dirstate.status( match, subrepos, ignored=ignored, clean=clean, unknown=unknown ) # check for any possibly clean files fixup = [] if cmp: - modified2, deleted2, fixup = self._checklookup(cmp) + modified2, deleted2, clean_set, fixup = self._checklookup( + cmp, mtime_boundary + ) s.modified.extend(modified2) s.deleted.extend(deleted2) + if clean_set and clean: + s.clean.extend(clean_set) if fixup and clean: s.clean.extend((f for f, _ in fixup)) diff -r 03644a929d6e -r 322525db4c98 mercurial/dirstate.py --- a/mercurial/dirstate.py Thu Nov 18 15:00:13 2021 +0100 +++ b/mercurial/dirstate.py Thu Nov 18 13:12:40 2021 +0100 @@ -1308,11 +1308,20 @@ # Some matchers have yet to be implemented use_rust = False + # Get the time from the filesystem so we can disambiguate files that + # appear modified in the present or future. + try: + mtime_boundary = timestamp.get_fs_now(self._opener) + except OSError: + # In largefiles or readonly context + mtime_boundary = None + if use_rust: try: - return self._rust_status( + res = self._rust_status( match, listclean, listignored, listunknown ) + return res + (mtime_boundary,) except rustmod.FallbackError: pass @@ -1402,7 +1411,7 @@ status = scmutil.status( modified, added, removed, deleted, unknown, ignored, clean ) - return (lookup, status) + return (lookup, status, mtime_boundary) def matches(self, match): """ diff -r 03644a929d6e -r 322525db4c98 mercurial/narrowspec.py --- a/mercurial/narrowspec.py Thu Nov 18 15:00:13 2021 +0100 +++ b/mercurial/narrowspec.py Thu Nov 18 13:12:40 2021 +0100 @@ -323,7 +323,7 @@ removedmatch = matchmod.differencematcher(oldmatch, newmatch) ds = repo.dirstate - lookup, status = ds.status( + lookup, status, _mtime_boundary = ds.status( removedmatch, subrepos=[], ignored=True, clean=True, unknown=True ) trackeddirty = status.modified + status.added diff -r 03644a929d6e -r 322525db4c98 tests/test-dirstate-race.t --- a/tests/test-dirstate-race.t Thu Nov 18 15:00:13 2021 +0100 +++ b/tests/test-dirstate-race.t Thu Nov 18 13:12:40 2021 +0100 @@ -66,11 +66,11 @@ > ) > def extsetup(ui): > extensions.wrapfunction(context.workingctx, '_checklookup', overridechecklookup) - > def overridechecklookup(orig, self, files): + > def overridechecklookup(orig, self, *args, **kwargs): > # make an update that changes the dirstate from underneath > self._repo.ui.system(br"sh '$TESTTMP/dirstaterace.sh'", > cwd=self._repo.root) - > return orig(self, files) + > return orig(self, *args, **kwargs) > EOF $ hg debugrebuilddirstate