# HG changeset patch # User Pierre-Yves David # Date 1677780172 -3600 # Node ID a6b8b1ab9116d4d87cfedbe0cea566f46ddec13d # Parent 2fbc109fd58a44475ad5fa5fea66a6ae74e93e4b# Parent 1ded5b48b8aa01eb5917edb35d2ff282cdf363ad branching: merge stable into default The clippy god had to be appeased on some aspect. diff -r 2fbc109fd58a -r a6b8b1ab9116 .hgsigs --- a/.hgsigs Thu Mar 02 04:16:47 2023 +0100 +++ b/.hgsigs Thu Mar 02 19:02:52 2023 +0100 @@ -238,3 +238,4 @@ 04f1dba53c961dfdb875c8469adc96fa999cfbed 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmNyC5sZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVqF+C/4uLaV/4nizZkWD3PjU1WyFYDg4bWDFOHb+PWuQ/3uoHXu1/EaYRnqmcDyOSJ99aXZBQ78rm9xhjxdmbklZ4ll1EGkqfTiYH+ld+rqE8iaqlc/DVy7pFXaenYwxletzO1OezzwF4XDLi6hcqzY9CXA3NM40vf6W4Rs5bEIi4eSbgJSNB1ll6ZzjvkU5bWTUoxSH+fxIJUuo27El2etdlKFQkS3/oTzWHejpVn6SQ1KyojTHMQBDRK4rqJBISp3gTf4TEezb0q0HTutJYDFdQNIRqx7V1Ao4Ei+YNbenJzcWJOA/2uk4V0AvZ4tnjgAzBYKwvIL1HfoQ0OmILeXjlVzV7Xu0G57lavum0sKkz/KZLKyYhKQHjYQLE7YMSM2y6/UEoFNN577vB47CHUq446PSMb8dGs2rmj66rj4iz5ml0yX+V9O2PpmIKoPAu1Y5/6zB9rCL76MRx182IW2m3rm4lsTfXPBPtea/OFt6ylxqCJRxaA0pht4FiAOvicPKXh4= c890d8b8bc59b18e5febf60caada629df5356ee2 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmN48sEZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVqwwC/9GkaE5adkLaJBZeRqfLL710ZPMAttiPhLAYl9YcUeUjw2rTU1bxxUks0oSfW4J0AaJLscl+pG4zZW8FN2MXY3njdcpAA/bv4nb+rq50Mdm0mD3iLOyKbIDQbUoYe7YpIPbpyuf8G/y4R1IXiLJjK329vzIsHkqyKPwUzxvyfZkjg6Lx00RRcfWrosb2Jb0+EhP9Yi7tjJmNWjsaTb8Ufp+ImYAL3qcDErkqb6wJCGAM0AwVfAJ7MZz3v3E56n1HTPhNqf8UvfR4URsuDlk56mP4do/QThC7dANiKeWrFJSBPu8uSpaHzUk1XCat0RHK03DMr15Ln1YCEhTmaedHr2rtp0fgGqaMH1jLZt0+9fiPaaYjck7Y+aagdc3bt1VhqtClbCJz5KWynpCLrn8MX40QmXuwly+KHzMuPQ6i0ui95ifgtrW7/Zd7uI7mYZ2zUeFUZPnL9XmGpFI595N8TjoPuFeO/ea4OQbLUY+lmmgZQrWoTpc5LDUyFXSFzJS2bU= 59466b13a3ae0e29a5d4f485393e516cfbb057d0 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmO1XgoZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVn8nDACU04KbPloLl+if6DQYreESnF9LU8C+qnLC/j5RRuaFNh/ec6C3DzLWqWdmnWA/siV3nUR1bXHfTui95azxJfYvWoXH2R2yam+YhE256B4rDDYWS1LI9kNNM+A33xcPS2HxVowkByhjB5FPKR6I90dX42BYJpTS5s/VPx63wXLznjFWuD7XJ3P0VI7D72j/+6EQCmHaAUEE5bO00Ob2JxmzJlaP+02fYc814PAONE2/ocfR0aExAVS3VA+SJGXnXTVpoaHr7NJKC2sBLFsdnhIRwtCf3rtGEvIJ5v2U2xx0ZEz/mimtGzW5ovkthobV4mojk0DRz7xBtA96pOGSRTD8QndIsdMCUipo8zZ/AGAMByCtsQOX7OYhR6gp+I6+iPh8fTR5oCbkO7cizDDQtXcrR5OT/BDH9xkAF1ghNL8o23a09/wfZ9NPg5zrh/4T/dFfoe2COlkAJJ1ttDPYyQkCfMsoWm3OXk6xJ3ExVbwkZzUDQSzsxGS+oxbFDWJZ64Q= +8830004967ad865ead89c28a410405a6e71e0796 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmQAsOQZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVl7XC/0W+Wd4gzMUbaot+NVIZTpubNw3KHBDXrlMgwQgCDg7qcqJnVuT1NNEy5sRELjZOO0867k+pBchZaxdmAiFwY1W76+7nwiLBqfCkYgYY0iQe48JHTq9kCgohvx9PSEVbUsScmqAQImd5KzErjhsLj8D2FiFIrcMyqsCBq4ZPs0Ey7lVKu6q3z5eDjlrxUIr0up6yKvgBxhY0GxyTp6DGoinzlFMEadiJlsvlwO4C6UpzKiCGMeKNT5xHK/Hx3ChrOH2Yuu1fHaPLJ+ZpXjR33ileVYlkQrh1D6fWHXcP7ZuwsEKREtgsw1YjYczGFwmhBO362bNi5wy33mBtCvcIAqpsI0rMrExs66qqbfyG+Yp1dvkgzUfdhbYFHA+mvg3/YTSD9dLKzzsb69LM87+dvcLqhBJ0nEAuBmAzU5ECkoArbiwMT96NhhjLPRmJJdHNo0IDos/LBGTgkOZ6iqIx8Xm/tgjBjFJG8B+IVy3laNgun4AZ9Ejc3ahIfhJUIo2j8o= diff -r 2fbc109fd58a -r a6b8b1ab9116 .hgtags --- a/.hgtags Thu Mar 02 04:16:47 2023 +0100 +++ b/.hgtags Thu Mar 02 19:02:52 2023 +0100 @@ -254,3 +254,4 @@ 0000000000000000000000000000000000000000 6.3.0 c890d8b8bc59b18e5febf60caada629df5356ee2 6.3.1 59466b13a3ae0e29a5d4f485393e516cfbb057d0 6.3.2 +8830004967ad865ead89c28a410405a6e71e0796 6.3.3 diff -r 2fbc109fd58a -r a6b8b1ab9116 hgext/patchbomb.py --- a/hgext/patchbomb.py Thu Mar 02 04:16:47 2023 +0100 +++ b/hgext/patchbomb.py Thu Mar 02 19:02:52 2023 +0100 @@ -361,7 +361,12 @@ ui.warn(_(b'warning: working directory has uncommitted changes\n')) output = stringio() cmdutil.exportfile( - repo, [r], output, opts=patch.difffeatureopts(ui, opts, git=True) + repo, + [r], + output, + opts=patch.difffeatureopts( + ui, pycompat.byteskwargs(opts), git=True + ), ) yield output.getvalue().split(b'\n') diff -r 2fbc109fd58a -r a6b8b1ab9116 mercurial/configitems.py --- a/mercurial/configitems.py Thu Mar 02 04:16:47 2023 +0100 +++ b/mercurial/configitems.py Thu Mar 02 19:02:52 2023 +0100 @@ -652,6 +652,15 @@ b'deprec-warn', default=False, ) +# possible values: +# - auto (the default) +# - force-append +# - force-new +coreconfigitem( + b'devel', + b'dirstate.v2.data_update_mode', + default="auto", +) coreconfigitem( b'devel', b'disableloaddefaultcerts', @@ -689,6 +698,42 @@ b'serverrequirecert', default=False, ) +# Makes the status algorithm wait for the existence of this file +# (or until a timeout of `devel.sync.status.pre-dirstate-write-file-timeout` +# seconds) before taking the lock and writing the dirstate. +# Status signals that it's ready to wait by creating a file +# with the same name + `.waiting`. +# Useful when testing race conditions. +coreconfigitem( + b'devel', + b'sync.status.pre-dirstate-write-file', + default=None, +) +coreconfigitem( + b'devel', + b'sync.status.pre-dirstate-write-file-timeout', + default=2, +) +coreconfigitem( + b'devel', + b'sync.dirstate.post-docket-read-file', + default=None, +) +coreconfigitem( + b'devel', + b'sync.dirstate.post-docket-read-file-timeout', + default=2, +) +coreconfigitem( + b'devel', + b'sync.dirstate.pre-read-file', + default=None, +) +coreconfigitem( + b'devel', + b'sync.dirstate.pre-read-file-timeout', + default=2, +) coreconfigitem( b'devel', b'strip-obsmarkers', diff -r 2fbc109fd58a -r a6b8b1ab9116 mercurial/context.py --- a/mercurial/context.py Thu Mar 02 04:16:47 2023 +0100 +++ b/mercurial/context.py Thu Mar 02 19:02:52 2023 +0100 @@ -36,6 +36,7 @@ sparse, subrepo, subrepoutil, + testing, util, ) from .utils import ( @@ -1854,6 +1855,7 @@ def _poststatusfixup(self, status, fixup): """update dirstate for files that are actually clean""" + testing.wait_on_cfg(self._repo.ui, b'status.pre-dirstate-write-file') dirstate = self._repo.dirstate poststatus = self._repo.postdsstatus() if fixup: diff -r 2fbc109fd58a -r a6b8b1ab9116 mercurial/diffutil.py --- a/mercurial/diffutil.py Thu Mar 02 04:16:47 2023 +0100 +++ b/mercurial/diffutil.py Thu Mar 02 19:02:52 2023 +0100 @@ -7,6 +7,13 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import typing + +from typing import ( + Any, + Dict, + Optional, +) from .i18n import _ @@ -15,10 +22,20 @@ pycompat, ) +if typing.TYPE_CHECKING: + from . import ui as uimod + +# TODO: narrow the value after the config module is typed +_Opts = Dict[bytes, Any] + def diffallopts( - ui, opts=None, untrusted=False, section=b'diff', configprefix=b'' -): + ui: "uimod.ui", + opts: Optional[_Opts] = None, + untrusted: bool = False, + section: bytes = b'diff', + configprefix: bytes = b'', +) -> mdiff.diffopts: '''return diffopts with all features supported and parsed''' return difffeatureopts( ui, @@ -33,15 +50,15 @@ def difffeatureopts( - ui, - opts=None, - untrusted=False, - section=b'diff', - git=False, - whitespace=False, - formatchanging=False, - configprefix=b'', -): + ui: "uimod.ui", + opts: Optional[_Opts] = None, + untrusted: bool = False, + section: bytes = b'diff', + git: bool = False, + whitespace: bool = False, + formatchanging: bool = False, + configprefix: bytes = b'', +) -> mdiff.diffopts: """return diffopts with only opted-in features parsed Features: @@ -51,7 +68,12 @@ with most diff parsers """ - def get(key, name=None, getter=ui.configbool, forceplain=None): + def get( + key: bytes, + name: Optional[bytes] = None, + getter=ui.configbool, + forceplain: Optional[bool] = None, + ) -> Any: if opts: v = opts.get(key) # diffopts flags are either None-default (which is passed diff -r 2fbc109fd58a -r a6b8b1ab9116 mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py Thu Mar 02 04:16:47 2023 +0100 +++ b/mercurial/dirstatemap.py Thu Mar 02 19:02:52 2023 +0100 @@ -10,6 +10,7 @@ error, pathutil, policy, + testing, txnutil, util, ) @@ -31,6 +32,13 @@ rangemask = 0x7FFFFFFF +WRITE_MODE_AUTO = 0 +WRITE_MODE_FORCE_NEW = 1 +WRITE_MODE_FORCE_APPEND = 2 + + +V2_MAX_READ_ATTEMPTS = 5 + class _dirstatemapcommon: """ @@ -54,6 +62,16 @@ self._parents = None self._dirtyparents = False self._docket = None + write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode") + if write_mode == b"auto": + self._write_mode = WRITE_MODE_AUTO + elif write_mode == b"force-append": + self._write_mode = WRITE_MODE_FORCE_APPEND + elif write_mode == b"force-new": + self._write_mode = WRITE_MODE_FORCE_NEW + else: + # unknown value, fallback to default + self._write_mode = WRITE_MODE_AUTO # for consistent view between _pl() and _read() invocations self._pendingmode = None @@ -132,11 +150,31 @@ raise error.ProgrammingError( b'dirstate only has a docket in v2 format' ) + self._set_identity() self._docket = docketmod.DirstateDocket.parse( self._readdirstatefile(), self._nodeconstants ) return self._docket + def _read_v2_data(self): + data = None + attempts = 0 + while attempts < V2_MAX_READ_ATTEMPTS: + attempts += 1 + try: + # TODO: use mmap when possible + data = self._opener.read(self.docket.data_filename()) + except FileNotFoundError: + # read race detected between docket and data file + # reload the docket and retry + self._docket = None + if data is None: + assert attempts >= V2_MAX_READ_ATTEMPTS + msg = b"dirstate read race happened %d times in a row" + msg %= attempts + raise error.Abort(msg) + return self._opener.read(self.docket.data_filename()) + def write_v2_no_append(self, tr, st, meta, packed): old_docket = self.docket new_docket = docketmod.DirstateDocket.with_new_uuid( @@ -290,14 +328,15 @@ ### disk interaction def read(self): - # ignore HG_PENDING because identity is used only for writing - self._set_identity() + testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') + if self._use_dirstate_v2: - if self._use_dirstate_v2: if not self.docket.uuid: return - st = self._opener.read(self.docket.data_filename()) + testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') + st = self._read_v2_data() else: + self._set_identity() st = self._readdirstatefile() if not st: @@ -556,19 +595,39 @@ # ignore HG_PENDING because identity is used only for writing self._set_identity() + testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') if self._use_dirstate_v2: - if self.docket.uuid: - # TODO: use mmap when possible - data = self._opener.read(self.docket.data_filename()) + self.docket # load the data if needed + inode = ( + self.identity.stat.st_ino + if self.identity is not None + and self.identity.stat is not None + else None + ) + testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') + if not self.docket.uuid: + data = b'' + self._map = rustmod.DirstateMap.new_empty() else: - data = b'' - self._map = rustmod.DirstateMap.new_v2( - data, self.docket.data_size, self.docket.tree_metadata - ) + data = self._read_v2_data() + self._map = rustmod.DirstateMap.new_v2( + data, + self.docket.data_size, + self.docket.tree_metadata, + self.docket.uuid, + inode, + ) parents = self.docket.parents else: + self._set_identity() + inode = ( + self.identity.stat.st_ino + if self.identity is not None + and self.identity.stat is not None + else None + ) self._map, parents = rustmod.DirstateMap.new_v1( - self._readdirstatefile() + self._readdirstatefile(), inode ) if parents and not self._dirtyparents: @@ -638,8 +697,10 @@ return # We can only append to an existing data file if there is one - can_append = self.docket.uuid is not None - packed, meta, append = self._map.write_v2(can_append) + write_mode = self._write_mode + if self.docket.uuid is None: + write_mode = WRITE_MODE_FORCE_NEW + packed, meta, append = self._map.write_v2(write_mode) if append: docket = self.docket data_filename = docket.data_filename() diff -r 2fbc109fd58a -r a6b8b1ab9116 mercurial/testing/__init__.py --- a/mercurial/testing/__init__.py Thu Mar 02 04:16:47 2023 +0100 +++ b/mercurial/testing/__init__.py Thu Mar 02 19:02:52 2023 +0100 @@ -9,6 +9,21 @@ environ = getattr(os, 'environ') +def wait_on_cfg(ui, cfg, timeout=10): + """synchronize on the `cfg` config path + + Use this to synchronize commands during race tests. + """ + full_config = b'sync.' + cfg + wait_config = full_config + b'-timeout' + sync_path = ui.config(b'devel', full_config) + if sync_path is not None: + timeout = ui.config(b'devel', wait_config) + ready_path = sync_path + b'.waiting' + write_file(ready_path) + wait_file(sync_path, timeout=timeout) + + def _timeout_factor(): """return the current modification to timeout""" default = int(environ.get('HGTEST_TIMEOUT_DEFAULT', 360)) diff -r 2fbc109fd58a -r a6b8b1ab9116 relnotes/6.3 --- a/relnotes/6.3 Thu Mar 02 04:16:47 2023 +0100 +++ b/relnotes/6.3 Thu Mar 02 19:02:52 2023 +0100 @@ -1,3 +1,51 @@ += Mercurial 6.3.3 = + + * tests: filter out PEP 657 error locations in tracebacks (issue6780) + * tests: optional PEP 657 error location in test-extension.t (issue6781) + * tests: optional PEP 657 error location in test-lfs-serve-access.t (issue6782) + * histedit: byteify the help for the multifold action + * sparse: fix a py2 based usage of `map()` + * convert: stop passing str to the dateutil API in darcs + * convert: turn the last str regex into bytes in cvs.py (issue6789) + * convert: change socket mode from b'r+' to 'rwb' in cvs.py (issue6789) + * convert: replace repr() by pycompat.byterepr() in cvsps.py (issue6789) + * tests: os module is frozen in Python 3.11 (issue6786) + * hgweb: unbyteify the 100-continue check + * resourceutil: start using importlib.resources.files() when possible + * revset: the `random` sort should not depend on sys.maxsize (issue6770) + * tests: make sure pygments can detect python script without extension + * convert: brz 3.3.0 moved NoSuchFile exception to breezy.transport + * tests: pygments 2.14+ highlight whitespace in python code + * hghave: make different has_pyoxidizer functions have different names + * hghave: refactor checks for pygments versions using checkvers() + * rust-narrow: fix loop that never loops + * scmutil: make checknewlabel() allow "_" in otherwise numeric names (issue6737) + * bundlerepo: enforce the requirements declared by the underlying repository + * setup: make the version computation process more resistant + * fix: add more information to the debug output + * tag: disallow tagging the working directory + * dirstate: handle missing backup file on restoration + * dirstate-v2: complain early on docket name collision + * rust: upgrade minimum `rayon` dependency + * setup: support building from an ongoing merge + * rust: add debug log about skipping dirstate update + * rust-dirstate: trace append/no append to help debugging + * dirstate-v2: don't mmap the data file when on NFS + * run-tests: make it possible to nest conditionals + * dirstate: add some debug output when writing the dirstate + * testing: introduce util function to synchronize concurrent commands on files + * dirstate: add a way to test races happening during status + * dirstate: use more than a bool to control append behavior + * dirstate-v2: add devel config option to control write behavior + * rhg: fix race when a fixup file is deleted on disk + * rhg: fix race when an ambiguous file is deleted on disk + * dirstate: deal with read-race for pure python code + * dirstate: deal with read-race for python code using rust object + * dirstate: deal with read-race for pure rust code path (rhg) + * dirstate: set identity whenever we read the dirstate's v2 docket + * rust-dirstate-v2: don't write dirstate if data file has changed + * rhg: remember the inode of .hg/dirstate + = Mercurial 6.3.2 = * [ecfc84b956a8] tests: expect the message from 1baf0fffd82f in test-hghave.t (issue6762) diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Mar 02 19:02:52 2023 +0100 @@ -38,6 +38,13 @@ V2, } +#[derive(Debug, PartialEq, Eq)] +pub enum DirstateMapWriteMode { + Auto, + ForceNewDataFile, + ForceAppend, +} + #[derive(Debug)] pub struct DirstateMap<'on_disk> { /// Contents of the `.hg/dirstate` file @@ -59,10 +66,28 @@ pub(super) unreachable_bytes: u32, /// Size of the data used to first load this `DirstateMap`. Used in case - /// we need to write some new metadata, but no new data on disk. + /// we need to write some new metadata, but no new data on disk, + /// as well as to detect writes that have happened in another process + /// since first read. pub(super) old_data_size: usize, + /// UUID used when first loading this `DirstateMap`. Used to check if + /// the UUID has been changed by another process since first read. + /// Can be `None` if using dirstate v1 or if it's a brand new dirstate. + pub(super) old_uuid: Option>, + + /// Identity of the dirstate file (for dirstate-v1) or the docket file + /// (v2). Used to detect if the file has changed from another process. + /// Since it's always written atomically, we can compare the inode to + /// check the file identity. + /// + /// TODO On non-Unix systems, something like hashing is a possibility? + pub(super) identity: Option, + pub(super) dirstate_version: DirstateVersion, + + /// Controlled by config option `devel.dirstate.v2.data_update_mode` + pub(super) write_mode: DirstateMapWriteMode, } /// Using a plain `HgPathBuf` of the full path from the repository root as a @@ -445,7 +470,10 @@ ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], unreachable_bytes: 0, old_data_size: 0, + old_uuid: None, + identity: None, dirstate_version: DirstateVersion::V1, + write_mode: DirstateMapWriteMode::Auto, } } @@ -454,9 +482,11 @@ on_disk: &'on_disk [u8], data_size: usize, metadata: &[u8], + uuid: Vec, + identity: Option, ) -> Result { if let Some(data) = on_disk.get(..data_size) { - Ok(on_disk::read(data, metadata)?) + Ok(on_disk::read(data, metadata, uuid, identity)?) } else { Err(DirstateV2ParseError::new("not enough bytes on disk").into()) } @@ -465,6 +495,7 @@ #[logging_timer::time("trace")] pub fn new_v1( on_disk: &'on_disk [u8], + identity: Option, ) -> Result<(Self, Option), DirstateError> { let mut map = Self::empty(on_disk); if map.on_disk.is_empty() { @@ -506,6 +537,7 @@ }, )?; let parents = Some(*parents); + map.identity = identity; Ok((map, parents)) } @@ -514,8 +546,15 @@ /// append to the existing data file that contains `self.on_disk` (true), /// or create a new data file from scratch (false). pub(super) fn write_should_append(&self) -> bool { - let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32; - ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO + match self.write_mode { + DirstateMapWriteMode::ForceAppend => true, + DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::Auto => { + let ratio = + self.unreachable_bytes as f32 / self.on_disk.len() as f32; + ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO + } + } } fn get_node<'tree>( @@ -911,6 +950,10 @@ *unreachable_bytes += path.len() as u32 } } + + pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) { + self.write_mode = write_mode; + } } type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32)); @@ -1230,11 +1273,11 @@ #[logging_timer::time("trace")] pub fn pack_v2( &self, - can_append: bool, + write_mode: DirstateMapWriteMode, ) -> Result<(Vec, on_disk::TreeMetadata, bool, usize), DirstateError> { let map = self.get_map(); - on_disk::write(map, can_append) + on_disk::write(map, write_mode) } /// `callback` allows the caller to process and do something with the @@ -1794,7 +1837,7 @@ )?; let (packed, metadata, _should_append, _old_data_size) = - map.pack_v2(false)?; + map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?; let packed_len = packed.len(); assert!(packed_len > 0); @@ -1803,6 +1846,8 @@ packed, packed_len, metadata.as_bytes(), + vec![], + None, )?; // Check that everything is accounted for diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Mar 02 19:02:52 2023 +0100 @@ -4,7 +4,9 @@ use crate::dirstate::{DirstateV2Data, TruncatedTimestamp}; use crate::dirstate_tree::dirstate_map::DirstateVersion; -use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; +use crate::dirstate_tree::dirstate_map::{ + self, DirstateMap, DirstateMapWriteMode, NodeRef, +}; use crate::dirstate_tree::path_with_basename::WithBasename; use crate::errors::HgError; use crate::utils::hg_path::HgPath; @@ -285,6 +287,8 @@ pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], metadata: &[u8], + uuid: Vec, + identity: Option, ) -> Result, DirstateV2ParseError> { if on_disk.is_empty() { let mut map = DirstateMap::empty(on_disk); @@ -307,7 +311,10 @@ ignore_patterns_hash: meta.ignore_patterns_hash, unreachable_bytes: meta.unreachable_bytes.get(), old_data_size: on_disk.len(), + old_uuid: Some(uuid), + identity, dirstate_version: DirstateVersion::V2, + write_mode: DirstateMapWriteMode::Auto, }; Ok(dirstate_map) } @@ -605,9 +612,18 @@ /// (false), and the previous size of data on disk. pub(super) fn write( dirstate_map: &DirstateMap, - can_append: bool, + write_mode: DirstateMapWriteMode, ) -> Result<(Vec, TreeMetadata, bool, usize), DirstateError> { - let append = can_append && dirstate_map.write_should_append(); + let append = match write_mode { + DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), + DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::ForceAppend => true, + }; + if append { + log::trace!("appending to the dirstate data file"); + } else { + log::trace!("creating new dirstate data file"); + } // This ignores the space for paths, and for nodes without an entry. // TODO: better estimate? Skip the `Vec` and write to a file directly? diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/dirstate_tree/owning.rs --- a/rust/hg-core/src/dirstate_tree/owning.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/owning.rs Thu Mar 02 19:02:52 2023 +0100 @@ -31,6 +31,7 @@ pub fn new_v1( on_disk: OnDisk, + identity: Option, ) -> Result<(Self, DirstateParents), DirstateError> where OnDisk: Deref + Send + 'static, @@ -42,7 +43,7 @@ OwningDirstateMapTryBuilder { on_disk, map_builder: |bytes| { - DirstateMap::new_v1(bytes).map(|(dmap, p)| { + DirstateMap::new_v1(bytes, identity).map(|(dmap, p)| { parents = p.unwrap_or(DirstateParents::NULL); dmap }) @@ -57,6 +58,8 @@ on_disk: OnDisk, data_size: usize, metadata: &[u8], + uuid: Vec, + identity: Option, ) -> Result where OnDisk: Deref + Send + 'static, @@ -66,7 +69,7 @@ OwningDirstateMapTryBuilder { on_disk, map_builder: |bytes| { - DirstateMap::new_v2(bytes, data_size, metadata) + DirstateMap::new_v2(bytes, data_size, metadata, uuid, identity) }, } .try_build() @@ -86,4 +89,16 @@ pub fn on_disk(&self) -> &[u8] { self.borrow_on_disk() } + + pub fn old_uuid(&self) -> Option<&[u8]> { + self.get_map().old_uuid.as_deref() + } + + pub fn old_identity(&self) -> Option { + self.get_map().identity + } + + pub fn old_data_size(&self) -> usize { + self.get_map().old_data_size + } } diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/errors.rs --- a/rust/hg-core/src/errors.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/errors.rs Thu Mar 02 19:02:52 2023 +0100 @@ -46,6 +46,9 @@ /// Censored revision data. CensoredNodeError, + /// A race condition has been detected. This *must* be handled locally + /// and not directly surface to the user. + RaceDetected(String), } /// Details about where an I/O error happened @@ -111,6 +114,9 @@ write!(f, "encountered a censored node") } HgError::ConfigValueParseError(error) => error.fmt(f), + HgError::RaceDetected(context) => { + write!(f, "encountered a race condition {context}") + } } } } diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/repo.rs Thu Mar 02 19:02:52 2023 +0100 @@ -1,6 +1,7 @@ use crate::changelog::Changelog; use crate::config::{Config, ConfigError, ConfigParseError}; use crate::dirstate::DirstateParents; +use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode; use crate::dirstate_tree::on_disk::Docket as DirstateDocket; use crate::dirstate_tree::owning::OwningDirstateMap; use crate::errors::HgResultExt; @@ -9,6 +10,7 @@ use crate::manifest::{Manifest, Manifestlog}; use crate::revlog::filelog::Filelog; use crate::revlog::RevlogError; +use crate::utils::debug::debug_wait_for_file_or_print; use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; @@ -22,6 +24,10 @@ use std::io::Write as IoWrite; use std::path::{Path, PathBuf}; +const V2_MAX_READ_ATTEMPTS: usize = 5; + +type DirstateMapIdentity = (Option, Option>, usize); + /// A repository on disk pub struct Repo { working_directory: PathBuf, @@ -30,7 +36,6 @@ requirements: HashSet, config: Config, dirstate_parents: LazyCell, - dirstate_data_file_uuid: LazyCell>>, dirstate_map: LazyCell, changelog: LazyCell, manifestlog: LazyCell, @@ -180,7 +185,6 @@ dot_hg, config: repo_config, dirstate_parents: LazyCell::new(), - dirstate_data_file_uuid: LazyCell::new(), dirstate_map: LazyCell::new(), changelog: LazyCell::new(), manifestlog: LazyCell::new(), @@ -254,6 +258,15 @@ .unwrap_or_default()) } + fn dirstate_identity(&self) -> Result, HgError> { + use std::os::unix::fs::MetadataExt; + Ok(self + .hg_vfs() + .symlink_metadata("dirstate") + .io_not_found_as_none()? + .map(|meta| meta.ino())) + } + pub fn dirstate_parents(&self) -> Result { Ok(*self .dirstate_parents @@ -263,15 +276,10 @@ fn read_dirstate_parents(&self) -> Result { let dirstate = self.dirstate_file_contents()?; let parents = if dirstate.is_empty() { - if self.has_dirstate_v2() { - self.dirstate_data_file_uuid.set(None); - } DirstateParents::NULL } else if self.has_dirstate_v2() { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); docket.parents() } else { *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? @@ -280,57 +288,179 @@ Ok(parents) } - fn read_dirstate_data_file_uuid( + /// Returns the information read from the dirstate docket necessary to + /// check if the data file has been updated/deleted by another process + /// since we last read the dirstate. + /// Namely, the inode, data file uuid and the data size. + fn get_dirstate_data_file_integrity( &self, - ) -> Result>, HgError> { + ) -> Result { assert!( self.has_dirstate_v2(), "accessing dirstate data file ID without dirstate-v2" ); + // Get the identity before the contents since we could have a race + // between the two. Having an identity that is too old is fine, but + // one that is younger than the content change is bad. + let identity = self.dirstate_identity()?; let dirstate = self.dirstate_file_contents()?; if dirstate.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - Ok(None) + Ok((identity, None, 0)) } else { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; self.dirstate_parents.set(docket.parents()); - Ok(Some(docket.uuid.to_owned())) + Ok((identity, Some(docket.uuid.to_owned()), docket.data_size())) } } fn new_dirstate_map(&self) -> Result { + if self.has_dirstate_v2() { + // The v2 dirstate is split into a docket and a data file. + // Since we don't always take the `wlock` to read it + // (like in `hg status`), it is susceptible to races. + // A simple retry method should be enough since full rewrites + // only happen when too much garbage data is present and + // this race is unlikely. + let mut tries = 0; + + while tries < V2_MAX_READ_ATTEMPTS { + tries += 1; + match self.read_docket_and_data_file() { + Ok(m) => { + return Ok(m); + } + Err(e) => match e { + DirstateError::Common(HgError::RaceDetected( + context, + )) => { + log::info!( + "dirstate read race detected {} (retry {}/{})", + context, + tries, + V2_MAX_READ_ATTEMPTS, + ); + continue; + } + _ => return Err(e), + }, + } + } + let error = HgError::abort( + format!("dirstate read race happened {tries} times in a row"), + 255, + None, + ); + Err(DirstateError::Common(error)) + } else { + debug_wait_for_file_or_print( + self.config(), + "dirstate.pre-read-file", + ); + let identity = self.dirstate_identity()?; + let dirstate_file_contents = self.dirstate_file_contents()?; + if dirstate_file_contents.is_empty() { + self.dirstate_parents.set(DirstateParents::NULL); + Ok(OwningDirstateMap::new_empty(Vec::new())) + } else { + let (map, parents) = OwningDirstateMap::new_v1( + dirstate_file_contents, + identity, + )?; + self.dirstate_parents.set(parents); + Ok(map) + } + } + } + + fn read_docket_and_data_file( + &self, + ) -> Result { + debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); let dirstate_file_contents = self.dirstate_file_contents()?; + let identity = self.dirstate_identity()?; if dirstate_file_contents.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - if self.has_dirstate_v2() { - self.dirstate_data_file_uuid.set(None); - } - Ok(OwningDirstateMap::new_empty(Vec::new())) - } else if self.has_dirstate_v2() { - let docket = crate::dirstate_tree::on_disk::read_docket( - &dirstate_file_contents, - )?; - self.dirstate_parents.set(docket.parents()); - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); - let data_size = docket.data_size(); - let metadata = docket.tree_metadata(); - if let Some(data_mmap) = self + return Ok(OwningDirstateMap::new_empty(Vec::new())); + } + let docket = crate::dirstate_tree::on_disk::read_docket( + &dirstate_file_contents, + )?; + debug_wait_for_file_or_print( + self.config(), + "dirstate.post-docket-read-file", + ); + self.dirstate_parents.set(docket.parents()); + let uuid = docket.uuid.to_owned(); + let data_size = docket.data_size(); + + let context = "between reading dirstate docket and data file"; + let race_error = HgError::RaceDetected(context.into()); + let metadata = docket.tree_metadata(); + + let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) { + // Don't mmap on NFS to prevent `SIGBUS` error on deletion + let contents = self.hg_vfs().read(docket.data_filename()); + let contents = match contents { + Ok(c) => c, + Err(HgError::IoError { error, context }) => { + match error.raw_os_error().expect("real os error") { + // 2 = ENOENT, No such file or directory + // 116 = ESTALE, Stale NFS file handle + // + // TODO match on `error.kind()` when + // `ErrorKind::StaleNetworkFileHandle` is stable. + 2 | 116 => { + // Race where the data file was deleted right after + // we read the docket, try again + return Err(race_error.into()); + } + _ => { + return Err( + HgError::IoError { error, context }.into() + ) + } + } + } + Err(e) => return Err(e.into()), + }; + OwningDirstateMap::new_v2( + contents, data_size, metadata, uuid, identity, + ) + } else { + match self .hg_vfs() .mmap_open(docket.data_filename()) - .io_not_found_as_none()? + .io_not_found_as_none() { - OwningDirstateMap::new_v2(data_mmap, data_size, metadata) - } else { - OwningDirstateMap::new_v2(Vec::new(), data_size, metadata) + Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( + data_mmap, data_size, metadata, uuid, identity, + ), + Ok(None) => { + // Race where the data file was deleted right after we + // read the docket, try again + return Err(race_error.into()); + } + Err(e) => return Err(e.into()), } - } else { - let (map, parents) = - OwningDirstateMap::new_v1(dirstate_file_contents)?; - self.dirstate_parents.set(parents); - Ok(map) - } + }?; + + let write_mode_config = self + .config() + .get_str(b"devel", b"dirstate.v2.data_update_mode") + .unwrap_or(Some("auto")) + .unwrap_or("auto"); // don't bother for devel options + let write_mode = match write_mode_config { + "auto" => DirstateMapWriteMode::Auto, + "force-new" => DirstateMapWriteMode::ForceNewDataFile, + "force-append" => DirstateMapWriteMode::ForceAppend, + _ => DirstateMapWriteMode::Auto, + }; + + map.with_dmap_mut(|m| m.set_write_mode(write_mode)); + + Ok(map) } pub fn dirstate_map( @@ -421,13 +551,37 @@ // it’s unset let parents = self.dirstate_parents()?; let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { - let uuid_opt = self - .dirstate_data_file_uuid - .get_or_init(|| self.read_dirstate_data_file_uuid())?; - let uuid_opt = uuid_opt.as_ref(); - let can_append = uuid_opt.is_some(); + let (identity, uuid, data_size) = + self.get_dirstate_data_file_integrity()?; + let identity_changed = identity != map.old_identity(); + let uuid_changed = uuid.as_deref() != map.old_uuid(); + let data_length_changed = data_size != map.old_data_size(); + + if identity_changed || uuid_changed || data_length_changed { + // If any of identity, uuid or length have changed since + // last disk read, don't write. + // This is fine because either we're in a command that doesn't + // write anything too important (like `hg status`), or we're in + // `hg add` and we're supposed to have taken the lock before + // reading anyway. + // + // TODO complain loudly if we've changed anything important + // without taking the lock. + // (see `hg help config.format.use-dirstate-tracked-hint`) + log::debug!( + "dirstate has changed since last read, not updating." + ); + return Ok(()); + } + + let uuid_opt = map.old_uuid(); + let write_mode = if uuid_opt.is_some() { + DirstateMapWriteMode::Auto + } else { + DirstateMapWriteMode::ForceNewDataFile + }; let (data, tree_metadata, append, old_data_size) = - map.pack_v2(can_append)?; + map.pack_v2(write_mode)?; // Reuse the uuid, or generate a new one, keeping the old for // deletion. @@ -470,7 +624,10 @@ // https://github.com/openzfs/zfs/issues/13370 // if !append { + log::trace!("creating a new dirstate data file"); options.create_new(true); + } else { + log::trace!("appending to the dirstate data file"); } let data_size = (|| { @@ -499,6 +656,22 @@ (packed_dirstate, old_uuid) } else { + let identity = self.dirstate_identity()?; + if identity != map.old_identity() { + // If identity changed since last disk read, don't write. + // This is fine because either we're in a command that doesn't + // write anything too important (like `hg status`), or we're in + // `hg add` and we're supposed to have taken the lock before + // reading anyway. + // + // TODO complain loudly if we've changed anything important + // without taking the lock. + // (see `hg help config.format.use-dirstate-tracked-hint`) + log::debug!( + "dirstate has changed since last read, not updating." + ); + return Ok(()); + } (map.pack_v1(parents)?, None) }; diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/utils.rs --- a/rust/hg-core/src/utils.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/utils.rs Thu Mar 02 19:02:52 2023 +0100 @@ -15,6 +15,7 @@ use std::fmt; use std::{io::Write, ops::Deref}; +pub mod debug; pub mod files; pub mod hg_path; pub mod path_auditor; diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/utils/debug.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/utils/debug.rs Thu Mar 02 19:02:52 2023 +0100 @@ -0,0 +1,87 @@ +//! Utils for debugging hg-core + +use crate::config::Config; + +/// Write the file path given by the config option `devel.` with +/// the suffix `.waiting`, then wait for the file path given by the +/// config option `devel.` to appear on disk +/// up to `devel.-timeout` seconds. +/// Note that the timeout may be higher because we scale it if global +/// `run-tests` timeouts are raised to prevent flakiness on slower hardware. +/// +/// Useful for testing race conditions. +pub fn debug_wait_for_file( + config: &Config, + config_option: &str, +) -> Result<(), String> { + let path_opt = format!("sync.{config_option}"); + let file_path = match config.get_str(b"devel", path_opt.as_bytes()).ok() { + Some(Some(file_path)) => file_path, + _ => return Ok(()), + }; + + // TODO make it so `configitems` is shared between Rust and Python so that + // defaults work out of the box, etc. + let default_timeout = 2; + let timeout_opt = format!("sync.{config_option}-timeout"); + let timeout_seconds = + match config.get_u32(b"devel", timeout_opt.as_bytes()) { + Ok(Some(timeout)) => timeout, + Err(e) => { + log::debug!("{e}"); + default_timeout + } + _ => default_timeout, + }; + let timeout_seconds = timeout_seconds as u64; + + log::debug!( + "Config option `{config_option}` found, \ + waiting for file `{file_path}` to be created" + ); + std::fs::File::create(format!("{file_path}.waiting")).ok(); + // If the test timeout have been extended, scale the timer relative + // to the normal timing. + let global_default_timeout: u64 = std::env::var("HGTEST_TIMEOUT_DEFAULT") + .map(|t| t.parse()) + .unwrap_or(Ok(0)) + .unwrap(); + let global_timeout_override: u64 = std::env::var("HGTEST_TIMEOUT") + .map(|t| t.parse()) + .unwrap_or(Ok(0)) + .unwrap(); + let timeout_seconds = if global_default_timeout < global_timeout_override { + timeout_seconds * global_timeout_override / global_default_timeout + } else { + timeout_seconds + }; + let timeout = std::time::Duration::from_secs(timeout_seconds); + + let start = std::time::Instant::now(); + let path = std::path::Path::new(file_path); + let mut found = false; + while start.elapsed() < timeout { + if path.exists() { + log::debug!("File `{file_path}` was created"); + found = true; + break; + } else { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + } + if !found { + let msg = format!( + "File `{file_path}` set by `{config_option}` was not found \ + within the allocated {timeout_seconds} seconds timeout" + ); + Err(msg) + } else { + Ok(()) + } +} + +pub fn debug_wait_for_file_or_print(config: &Config, config_option: &str) { + if let Err(e) = debug_wait_for_file(config, config_option) { + eprintln!("{e}"); + }; +} diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-core/src/vfs.rs --- a/rust/hg-core/src/vfs.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-core/src/vfs.rs Thu Mar 02 19:02:52 2023 +0100 @@ -172,3 +172,24 @@ pub(crate) fn is_file(path: impl AsRef) -> Result { Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_file())) } + +/// Returns whether the given `path` is on a network file system. +/// Taken from `cargo`'s codebase. +#[cfg(target_os = "linux")] +pub(crate) fn is_on_nfs_mount(path: impl AsRef) -> bool { + use std::ffi::CString; + use std::mem; + use std::os::unix::prelude::*; + + let path = match CString::new(path.as_ref().as_os_str().as_bytes()) { + Ok(path) => path, + Err(_) => return false, + }; + + unsafe { + let mut buf: libc::statfs = mem::zeroed(); + let r = libc::statfs(path.as_ptr(), &mut buf); + + r == 0 && buf.f_type as u32 == libc::NFS_SUPER_MAGIC as u32 + } +} diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Mar 02 19:02:52 2023 +0100 @@ -22,7 +22,8 @@ pybytes_deref::PyBytesDeref, }; use hg::{ - dirstate::StateMapIter, dirstate_tree::on_disk::DirstateV2ParseError, + dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMapWriteMode, + dirstate_tree::on_disk::DirstateV2ParseError, dirstate_tree::owning::OwningDirstateMap, revlog::Node, utils::files::normalize_case, utils::hg_path::HgPath, DirstateEntry, DirstateError, DirstateParents, @@ -47,9 +48,10 @@ @staticmethod def new_v1( on_disk: PyBytes, + identity: Option, ) -> PyResult { let on_disk = PyBytesDeref::new(py, on_disk); - let (map, parents) = OwningDirstateMap::new_v1(on_disk) + let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity) .map_err(|e| dirstate_error(py, e))?; let map = Self::create_instance(py, map)?; let p1 = PyBytes::new(py, parents.p1.as_bytes()); @@ -64,18 +66,33 @@ on_disk: PyBytes, data_size: usize, tree_metadata: PyBytes, + uuid: PyBytes, + identity: Option, ) -> PyResult { let dirstate_error = |e: DirstateError| { PyErr::new::(py, format!("Dirstate error: {:?}", e)) }; let on_disk = PyBytesDeref::new(py, on_disk); + let uuid = uuid.data(py); let map = OwningDirstateMap::new_v2( - on_disk, data_size, tree_metadata.data(py), + on_disk, + data_size, + tree_metadata.data(py), + uuid.to_owned(), + identity, ).map_err(dirstate_error)?; let map = Self::create_instance(py, map)?; Ok(map.into_object()) } + /// Returns an empty DirstateMap. Only used for a new dirstate. + @staticmethod + def new_empty() -> PyResult { + let map = OwningDirstateMap::new_empty(vec![]); + let map = Self::create_instance(py, map)?; + Ok(map.into_object()) + } + def clear(&self) -> PyResult { self.inner(py).borrow_mut().clear(); Ok(py.None()) @@ -236,10 +253,16 @@ /// instead of written to a new data file (False). def write_v2( &self, - can_append: bool, + write_mode: usize, ) -> PyResult { let inner = self.inner(py).borrow(); - let result = inner.pack_v2(can_append); + let rust_write_mode = match write_mode { + 0 => DirstateMapWriteMode::Auto, + 1 => DirstateMapWriteMode::ForceNewDataFile, + 2 => DirstateMapWriteMode::ForceAppend, + _ => DirstateMapWriteMode::Auto, // XXX should we error out? + }; + let result = inner.pack_v2(rust_write_mode); match result { Ok((packed, tree_metadata, append, _old_data_size)) => { let packed = PyBytes::new(py, &packed); diff -r 2fbc109fd58a -r a6b8b1ab9116 rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs Thu Mar 02 04:16:47 2023 +0100 +++ b/rust/rhg/src/commands/status.rs Thu Mar 02 19:02:52 2023 +0100 @@ -21,6 +21,7 @@ use hg::manifest::Manifest; use hg::matchers::{AlwaysMatcher, IntersectionMatcher}; use hg::repo::Repo; +use hg::utils::debug::debug_wait_for_file; use hg::utils::files::get_bytes_from_os_string; use hg::utils::files::get_path_from_bytes; use hg::utils::hg_path::{hg_path_to_path_buf, HgPath}; @@ -308,26 +309,46 @@ .unsure .into_par_iter() .map(|to_check| { - unsure_is_modified( + // The compiler seems to get a bit confused with complex + // inference when using a parallel iterator + map + // + map_err + collect, so let's just inline some of the + // logic. + match unsure_is_modified( working_directory_vfs, store_vfs, check_exec, &manifest, &to_check.path, - ) - .map(|modified| (to_check, modified)) + ) { + Err(HgError::IoError { .. }) => { + // IO errors most likely stem from the file being + // deleted even though we know it's in the + // dirstate. + Ok((to_check, UnsureOutcome::Deleted)) + } + Ok(outcome) => Ok((to_check, outcome)), + Err(e) => Err(e), + } }) .collect::>()?; - for (status_path, is_modified) in res.into_iter() { - if is_modified { - if display_states.modified { - ds_status.modified.push(status_path); + for (status_path, outcome) in res.into_iter() { + match outcome { + UnsureOutcome::Clean => { + if display_states.clean { + ds_status.clean.push(status_path.clone()); + } + fixup.push(status_path.path.into_owned()) } - } else { - if display_states.clean { - ds_status.clean.push(status_path.clone()); + UnsureOutcome::Modified => { + if display_states.modified { + ds_status.modified.push(status_path); + } } - fixup.push(status_path.path.into_owned()) + UnsureOutcome::Deleted => { + if display_states.deleted { + ds_status.deleted.push(status_path); + } + } } } } @@ -401,6 +422,13 @@ after_status, )?; + // Development config option to test write races + if let Err(e) = + debug_wait_for_file(config, "status.pre-dirstate-write-file") + { + ui.write_stderr(e.as_bytes()).ok(); + } + if (fixup.is_empty() || filesystem_time_at_status_start.is_none()) && !dirstate_write_needed { @@ -420,9 +448,21 @@ // `unsure_is_clean` which was needed before reading // contents. Here we access metadata again after reading // content, in case it changed in the meantime. - let fs_metadata = repo + let metadata_res = repo .working_directory_vfs() - .symlink_metadata(&fs_path)?; + .symlink_metadata(&fs_path); + let fs_metadata = match metadata_res { + Ok(meta) => meta, + Err(err) => match err { + HgError::IoError { .. } => { + // The file has probably been deleted. In any + // case, it was in the dirstate before, so + // let's ignore the error. + continue; + } + _ => return Err(err.into()), + }, + }; if let Some(mtime) = TruncatedTimestamp::for_reliable_mtime_of( &fs_metadata, @@ -449,6 +489,7 @@ // Not updating the dirstate is not ideal but not critical: // don’t keep our caller waiting until some other Mercurial // process releases the lock. + log::info!("not writing dirstate from `status`: lock is held") } Err(LockError::Other(HgError::IoError { error, .. })) if error.kind() == io::ErrorKind::PermissionDenied => @@ -528,6 +569,16 @@ } } +/// Outcome of the additional check for an ambiguous tracked file +enum UnsureOutcome { + /// The file is actually clean + Clean, + /// The file has been modified + Modified, + /// The file was deleted on disk (or became another type of fs entry) + Deleted, +} + /// Check if a file is modified by comparing actual repo store and file system. /// /// This meant to be used for those that the dirstate cannot resolve, due @@ -538,7 +589,7 @@ check_exec: bool, manifest: &Manifest, hg_path: &HgPath, -) -> Result { +) -> Result { let vfs = working_directory_vfs; let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion"); let fs_metadata = vfs.symlink_metadata(&fs_path)?; @@ -567,7 +618,7 @@ }; if entry_flags != fs_flags { - return Ok(true); + return Ok(UnsureOutcome::Modified); } let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?; let fs_len = fs_metadata.len(); @@ -581,7 +632,7 @@ if filelog_entry.file_data_len_not_equal_to(fs_len) { // No need to read file contents: // it cannot be equal if it has a different length. - return Ok(true); + return Ok(UnsureOutcome::Modified); } let p1_filelog_data = filelog_entry.data()?; @@ -589,7 +640,7 @@ if p1_contents.len() as u64 != fs_len { // No need to read file contents: // it cannot be equal if it has a different length. - return Ok(true); + return Ok(UnsureOutcome::Modified); } let fs_contents = if is_symlink { @@ -597,5 +648,10 @@ } else { vfs.read(fs_path)? }; - Ok(p1_contents != &*fs_contents) + + Ok(if p1_contents != &*fs_contents { + UnsureOutcome::Modified + } else { + UnsureOutcome::Clean + }) } diff -r 2fbc109fd58a -r a6b8b1ab9116 tests/run-tests.py --- a/tests/run-tests.py Thu Mar 02 04:16:47 2023 +0100 +++ b/tests/run-tests.py Thu Mar 02 19:02:52 2023 +0100 @@ -1831,8 +1831,44 @@ pos = prepos = -1 - # True or False when in a true or false conditional section - skipping = None + # The current stack of conditionnal section. + # Each relevant conditionnal section can have the following value: + # - True: we should run this block + # - False: we should skip this block + # - None: The parent block is skipped, + # (no branch of this one will ever run) + condition_stack = [] + + def run_line(): + """return True if the current line should be run""" + if not condition_stack: + return True + return bool(condition_stack[-1]) + + def push_conditional_block(should_run): + """Push a new conditional context, with its initial state + + i.e. entry a #if block""" + if not run_line(): + condition_stack.append(None) + else: + condition_stack.append(should_run) + + def flip_conditional(): + """reverse the current condition state + + i.e. enter a #else + """ + assert condition_stack + if condition_stack[-1] is not None: + condition_stack[-1] = not condition_stack[-1] + + def pop_conditional(): + """exit the current skipping context + + i.e. reach the #endif""" + assert condition_stack + condition_stack.pop() # We keep track of whether or not we're in a Python block so we # can generate the surrounding doctest magic. @@ -1888,7 +1924,7 @@ after.setdefault(pos, []).append( b' !!! invalid #require\n' ) - if not skipping: + if run_line(): haveresult, message = self._hghave(lsplit[1:]) if not haveresult: script = [b'echo "%s"\nexit 80\n' % message] @@ -1898,21 +1934,19 @@ lsplit = l.split() if len(lsplit) < 2 or lsplit[0] != b'#if': after.setdefault(pos, []).append(b' !!! invalid #if\n') - if skipping is not None: - after.setdefault(pos, []).append(b' !!! nested #if\n') - skipping = not self._iftest(lsplit[1:]) + push_conditional_block(self._iftest(lsplit[1:])) after.setdefault(pos, []).append(l) elif l.startswith(b'#else'): - if skipping is None: + if not condition_stack: after.setdefault(pos, []).append(b' !!! missing #if\n') - skipping = not skipping + flip_conditional() after.setdefault(pos, []).append(l) elif l.startswith(b'#endif'): - if skipping is None: + if not condition_stack: after.setdefault(pos, []).append(b' !!! missing #if\n') - skipping = None + pop_conditional() after.setdefault(pos, []).append(l) - elif skipping: + elif not run_line(): after.setdefault(pos, []).append(l) elif l.startswith(b' >>> '): # python inlines after.setdefault(pos, []).append(l) @@ -1957,7 +1991,7 @@ if inpython: script.append(b'EOF\n') - if skipping is not None: + if condition_stack: after.setdefault(pos, []).append(b' !!! missing #endif\n') addsalt(n + 1, False) # Need to end any current per-command trace diff -r 2fbc109fd58a -r a6b8b1ab9116 tests/test-dirstate-read-race.t --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-dirstate-read-race.t Thu Mar 02 19:02:52 2023 +0100 @@ -0,0 +1,410 @@ +============================================================================== +Check potential race conditions between a dirstate's read and other operations +============================================================================== + +#testcases dirstate-v1 dirstate-v2-append dirstate-v2-rewrite +#testcases pre-all-read pre-some-read + +Some commands, like `hg status`, do not need to take the wlock but need to +access dirstate data. +Other commands might update the dirstate data while this happens. + +This can create issues if repository data is read in the wrong order, or for +the dirstate-v2 format where the data is contained in multiple files. + +This test file is meant to test various cases where such parallel operations +happen and make sure the reading process behaves fine. We do so with a `hg +status` command since it is probably the most advanced of such read-only +command. + +It bears simililarity with `tests/test-dirstate-status-race.t ` but tests a +different type of race. + +Setup +===== + + $ cat >> $HGRCPATH << EOF + > [storage] + > dirstate-v2.slow-path=allow + > EOF + +#if no-dirstate-v1 + $ cat >> $HGRCPATH << EOF + > [format] + > use-dirstate-v2=yes + > EOF +#else + $ cat >> $HGRCPATH << EOF + > [format] + > use-dirstate-v2=no + > EOF +#endif + +#if dirstate-v2-rewrite + $ d2args="--config devel.dirstate.v2.data_update_mode=force-new" +#endif +#if dirstate-v2-append + $ d2args="--config devel.dirstate.v2.data_update_mode=force-append" +#endif + + +#if dirstate-v1 + $ cfg="devel.sync.dirstate.pre-read-file" +#else +#if pre-all-read + $ cfg="devel.sync.dirstate.pre-read-file" +#else + $ cfg="devel.sync.dirstate.post-docket-read-file" +#endif +#endif + + $ directories="dir dir/nested dir2" + $ first_files="dir/nested/a dir/b dir/c dir/d dir2/e f" + $ second_files="g dir/nested/h dir/i dir/j dir2/k dir2/l dir/nested/m" + $ extra_files="dir/n dir/o p q" + + $ hg init reference-repo + $ cd reference-repo + $ mkdir -p dir/nested dir2 + $ touch -t 200001010000 $first_files $directories + $ hg commit -Aqm "recreate a bunch of files to facilitate dirstate-v2 append" + $ touch -t 200001010010 $second_files $directories + $ hg commit -Aqm "more files to have two commit" + $ hg log -G -v + @ changeset: 1:9a86dcbfb938 + | tag: tip + | user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | files: dir/i dir/j dir/nested/h dir/nested/m dir2/k dir2/l g + | description: + | more files to have two commit + | + | + o changeset: 0:4f23db756b09 + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + files: dir/b dir/c dir/d dir/nested/a dir2/e f + description: + recreate a bunch of files to facilitate dirstate-v2 append + + + $ hg manifest + dir/b + dir/c + dir/d + dir/i + dir/j + dir/nested/a + dir/nested/h + dir/nested/m + dir2/e + dir2/k + dir2/l + f + g + +Add some unknown files and refresh the dirstate + + $ touch -t 200001010020 $extra_files + $ hg add dir/o + $ hg remove dir/nested/m + + $ hg st --config devel.dirstate.v2.data_update_mode=force-new + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ hg debugstate + n 644 0 2000-01-01 00:00:00 dir/b + n 644 0 2000-01-01 00:00:00 dir/c + n 644 0 2000-01-01 00:00:00 dir/d + n 644 0 2000-01-01 00:10:00 dir/i + n 644 0 2000-01-01 00:10:00 dir/j + n 644 0 2000-01-01 00:00:00 dir/nested/a + n 644 0 2000-01-01 00:10:00 dir/nested/h + r ?????????????????????????????????? dir/nested/m (glob) + a ?????????????????????????????????? dir/o (glob) + n 644 0 2000-01-01 00:00:00 dir2/e + n 644 0 2000-01-01 00:10:00 dir2/k + n 644 0 2000-01-01 00:10:00 dir2/l + n 644 0 2000-01-01 00:00:00 f + n 644 0 2000-01-01 00:10:00 g + $ hg debugstate > ../reference + $ cd .. + +Actual Testing +============== + +Race with a `hg add` +------------------- + + $ cp -a reference-repo race-with-add + $ cd race-with-add + +spin a `hg status` with some caches to update + + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config ${cfg}=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +Add a file + + $ hg $d2args add dir/n + $ touch $TESTTMP/status-race-lock + $ wait + +The file should in a "added" state + + $ hg status + A dir/n + A dir/o + R dir/nested/m + ? p + ? q + +The status process should return a consistent result and not crash. + +#if dirstate-v1 + $ cat $TESTTMP/status-race-lock.out + A dir/n + A dir/o + R dir/nested/m + ? p + ? q +#else +#if rhg pre-some-read dirstate-v2-append + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q +#else +#if rust no-rhg dirstate-v2-append + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q +#else + $ cat $TESTTMP/status-race-lock.out + A dir/n + A dir/o + R dir/nested/m + ? p + ? q +#endif +#endif +#endif + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with a `hg commit` +----------------------- + + $ cp -a reference-repo race-with-commit + $ cd race-with-commit + +spin a `hg status with some cache to update + + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config ${cfg}=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +Add a do a commit + + $ hg status + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ hg $d2args commit -m 'racing commit' + $ touch $TESTTMP/status-race-lock + $ wait + +commit was created, and status is now clean + + $ hg log -GT '{node|short} {desc}\n' + @ 02a67a77ee9b racing commit + | + o 9a86dcbfb938 more files to have two commit + | + o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append + + $ hg status + ? dir/n + ? p + ? q + +The status process should return a consistent result and not crash. + +#if rust no-rhg dirstate-v2-append + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log +#else +#if rhg pre-some-read dirstate-v2-append + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log +#else + $ cat $TESTTMP/status-race-lock.out + M dir/o (no-rhg known-bad-output !) + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + warning: ignoring unknown working parent 02a67a77ee9b! (no-rhg !) +#endif +#endif + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with a `hg update` +----------------------- + + $ cp -a reference-repo race-with-update + $ cd race-with-update + +spin a `hg status` with some caches to update + + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config ${cfg}=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting +do an update + + $ hg status + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ hg log -GT '{node|short} {desc}\n' + @ 9a86dcbfb938 more files to have two commit + | + o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append + + $ hg $d2args update --merge ".~1" + 0 files updated, 0 files merged, 6 files removed, 0 files unresolved + $ touch $TESTTMP/status-race-lock + $ wait + $ hg log -GT '{node|short} {desc}\n' + o 9a86dcbfb938 more files to have two commit + | + @ 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append + + $ hg status + A dir/o + ? dir/n + ? p + ? q + +The status process should return a consistent result and not crash. + +#if rhg dirstate-v2-append pre-some-read + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ! dir/i + ! dir/j + ! dir/nested/h + ! dir2/k + ! dir2/l + ! g + ? dir/n + ? p + ? q +#else +#if rust no-rhg dirstate-v2-append + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ! dir/i + ! dir/j + ! dir/nested/h + ! dir2/k + ! dir2/l + ! g + ? dir/n + ? p + ? q +#else + $ cat $TESTTMP/status-race-lock.out + A dir/o + ? dir/n + ? p + ? q +#endif +#endif + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with a cache updating `hg status` +-------------------------------------- + +It is interesting to race with "read-only" operation (that still update its cache) + + $ cp -a reference-repo race-with-status + $ cd race-with-status + +spin a `hg status` with some caches to update + + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config ${cfg}=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting +do an update + + $ touch -t 200001020006 f + $ hg $d2args status + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ touch $TESTTMP/status-race-lock + $ wait + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. diff -r 2fbc109fd58a -r a6b8b1ab9116 tests/test-dirstate-status-write-race.t --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-dirstate-status-write-race.t Thu Mar 02 19:02:52 2023 +0100 @@ -0,0 +1,460 @@ +===================================================================== +Check potential race conditions between a status and other operations +===================================================================== + +#testcases dirstate-v1 dirstate-v2-append dirstate-v2-rewrite + +The `hg status` command can run without the wlock, however it might end up +having to update the on-disk dirstate files, for example to mark ambiguous +files as clean, or to update directory caches information with dirstate-v2. + + +If another process updates the dirstate in the meantime we might run into +trouble. Especially, commands doing semantic changes like `hg add` or + `hg commit` should not see their update erased by a concurrent status. + +Unlike commands like `add` or `commit`, `status` only writes the dirstate +to update caches, no actual information is lost if we fail to write to disk. + + +This test file is meant to test various cases where such parallel operations +between a status with reasons to update the dirstate and another semantic +changes happen. + + +Setup +===== + + $ cat >> $HGRCPATH << EOF + > [storage] + > dirstate-v2.slow-path=allow + > EOF + +#if no-dirstate-v1 + $ cat >> $HGRCPATH << EOF + > [format] + > use-dirstate-v2=yes + > EOF +#else + $ cat >> $HGRCPATH << EOF + > [format] + > use-dirstate-v2=no + > EOF +#endif + +#if dirstate-v2-rewrite + $ d2args="--config devel.dirstate.v2.data_update_mode=force-new" +#endif +#if dirstate-v2-append + $ d2args="--config devel.dirstate.v2.data_update_mode=force-append" +#endif + + $ directories="dir dir/nested dir2" + $ first_files="dir/nested/a dir/b dir/c dir/d dir2/e f" + $ second_files="g dir/nested/h dir/i dir/j dir2/k dir2/l dir/nested/m" + $ extra_files="dir/n dir/o p q" + + $ hg init reference-repo + $ cd reference-repo + $ mkdir -p dir/nested dir2 + $ touch -t 200001010000 $first_files $directories + $ hg commit -Aqm "recreate a bunch of files to facilitate dirstate-v2 append" + $ touch -t 200001010010 $second_files $directories + $ hg commit -Aqm "more files to have two commits" + $ hg log -G -v + @ changeset: 1:c349430a1631 + | tag: tip + | user: test + | date: Thu Jan 01 00:00:00 1970 +0000 + | files: dir/i dir/j dir/nested/h dir/nested/m dir2/k dir2/l g + | description: + | more files to have two commits + | + | + o changeset: 0:4f23db756b09 + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + files: dir/b dir/c dir/d dir/nested/a dir2/e f + description: + recreate a bunch of files to facilitate dirstate-v2 append + + + $ hg manifest + dir/b + dir/c + dir/d + dir/i + dir/j + dir/nested/a + dir/nested/h + dir/nested/m + dir2/e + dir2/k + dir2/l + f + g + +Add some unknown files and refresh the dirstate + + $ touch -t 200001010020 $extra_files + $ hg add dir/o + $ hg remove dir/nested/m + + $ hg st --config devel.dirstate.v2.data_update_mode=force-new + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ hg debugstate + n 644 0 2000-01-01 00:00:00 dir/b + n 644 0 2000-01-01 00:00:00 dir/c + n 644 0 2000-01-01 00:00:00 dir/d + n 644 0 2000-01-01 00:10:00 dir/i + n 644 0 2000-01-01 00:10:00 dir/j + n 644 0 2000-01-01 00:00:00 dir/nested/a + n 644 0 2000-01-01 00:10:00 dir/nested/h + r ?????????????????????????????????? dir/nested/m (glob) + a ?????????????????????????????????? dir/o (glob) + n 644 0 2000-01-01 00:00:00 dir2/e + n 644 0 2000-01-01 00:10:00 dir2/k + n 644 0 2000-01-01 00:10:00 dir2/l + n 644 0 2000-01-01 00:00:00 f + n 644 0 2000-01-01 00:10:00 g + $ hg debugstate > ../reference + $ cd .. + +Explain / verify the test principles +------------------------------------ + +First, we can properly copy the reference + + $ cp -a reference-repo sanity-check + $ cd sanity-check + $ hg debugstate + n 644 0 2000-01-01 00:00:00 dir/b + n 644 0 2000-01-01 00:00:00 dir/c + n 644 0 2000-01-01 00:00:00 dir/d + n 644 0 2000-01-01 00:10:00 dir/i + n 644 0 2000-01-01 00:10:00 dir/j + n 644 0 2000-01-01 00:00:00 dir/nested/a + n 644 0 2000-01-01 00:10:00 dir/nested/h + r ?????????????????????????????????? dir/nested/m (glob) + a ?????????????????????????????????? dir/o (glob) + n 644 0 2000-01-01 00:00:00 dir2/e + n 644 0 2000-01-01 00:10:00 dir2/k + n 644 0 2000-01-01 00:10:00 dir2/l + n 644 0 2000-01-01 00:00:00 f + n 644 0 2000-01-01 00:10:00 g + $ hg debugstate > ../post-copy + $ diff ../reference ../post-copy + +And status thinks the cache is in a proper state + + $ hg st + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ hg debugstate + n 644 0 2000-01-01 00:00:00 dir/b + n 644 0 2000-01-01 00:00:00 dir/c + n 644 0 2000-01-01 00:00:00 dir/d + n 644 0 2000-01-01 00:10:00 dir/i + n 644 0 2000-01-01 00:10:00 dir/j + n 644 0 2000-01-01 00:00:00 dir/nested/a + n 644 0 2000-01-01 00:10:00 dir/nested/h + r ?????????????????????????????????? dir/nested/m (glob) + a ?????????????????????????????????? dir/o (glob) + n 644 0 2000-01-01 00:00:00 dir2/e + n 644 0 2000-01-01 00:10:00 dir2/k + n 644 0 2000-01-01 00:10:00 dir2/l + n 644 0 2000-01-01 00:00:00 f + n 644 0 2000-01-01 00:10:00 g + $ hg debugstate > ../post-status + $ diff ../reference ../post-status + +Then we can start a status that: +- has some update to do (the touch call) +- will wait AFTER running status, but before updating the cache on disk + + $ touch -t 200001010001 dir/c + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +We check it runs the status first by modifying a file and updating another timestamp + + $ touch -t 200001010003 dir/i + $ echo babar > dir/j + $ touch $TESTTMP/status-race-lock + $ wait + +The test process should have reported a status before the change we made, +and should have missed the timestamp update + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + $ hg debugstate | grep dir/c + n 644 0 2000-01-01 00:01:00 dir/c + $ hg debugstate | grep dir/i + n 644 0 2000-01-01 00:10:00 dir/i + $ hg debugstate | grep dir/j + n 644 0 2000-01-01 00:10:00 dir/j + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Actual Testing +============== + +Race with a `hg add` +------------------- + + $ cp -a reference-repo race-with-add + $ cd race-with-add + +spin a `hg status` with some caches to update + + $ touch -t 200001020001 f + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +Add a file + + $ hg $d2args add dir/n + $ touch $TESTTMP/status-race-lock + $ wait + +The file should in a "added" state + + $ hg status + A dir/n + A dir/o + R dir/nested/m + ? p + ? q + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with a `hg commit` +---------------------- + + $ cp -a reference-repo race-with-commit + $ cd race-with-commit + +spin a `hg status` with some caches to update + + $ touch -t 200001020001 dir/j + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +Add a file and force the data file rewrite + + $ hg $d2args commit -m created-during-status dir/o + $ touch $TESTTMP/status-race-lock + $ wait + +The parent must change and the status should be clean + + $ hg summary + parent: 2:2e3b442a2fd4 tip + created-during-status + branch: default + commit: 1 removed, 3 unknown + update: (current) + phases: 3 draft + $ hg status + R dir/nested/m + ? dir/n + ? p + ? q + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with a `hg update` +---------------------- + + $ cp -a reference-repo race-with-update + $ cd race-with-update + +spin a `hg status` with some caches to update + + $ touch -t 200001020001 dir2/k + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +Add a file and force the data file rewrite + + $ hg $d2args update ".~1" + 0 files updated, 0 files merged, 6 files removed, 0 files unresolved + $ touch $TESTTMP/status-race-lock + $ wait + +The parent must change and the status should be clean + + $ hg summary + parent: 0:4f23db756b09 + recreate a bunch of files to facilitate dirstate-v2 append + branch: default + commit: 1 added, 3 unknown (new branch head) + update: 1 new changesets (update) + phases: 2 draft + $ hg status + A dir/o + ? dir/n + ? p + ? q + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with another status +------------------------ + + $ cp -a reference-repo race-with-status + $ cd race-with-status + +spin a `hg status` with some caches to update + + $ touch -t 200001010030 dir/nested/h + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +touch g + + $ touch -t 200001010025 g + $ hg $d2args status + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ touch $TESTTMP/status-race-lock + $ wait + +the first update should be on disk + + $ hg debugstate --all | grep "g" + n 644 0 2000-01-01 00:25:00 g + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. + +Race with the removal of an ambiguous file +----------------------è------------------- + + $ cp -a reference-repo race-with-remove + $ cd race-with-remove + +spin a `hg status` with some caches to update + + $ touch -t 200001010035 dir2/l + $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \ + > --config rhg.on-unsupported=abort \ + > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \ + > & + $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting + +remove that same file + + $ hg $d2args remove dir2/l + $ touch $TESTTMP/status-race-lock + $ wait + +file should be marked as removed + + $ hg status + A dir/o + R dir/nested/m + R dir2/l + ? dir/n + ? p + ? q + +The status process should return a consistent result and not crash. + + $ cat $TESTTMP/status-race-lock.out + A dir/o + R dir/nested/m + ? dir/n + ? p + ? q + $ cat $TESTTMP/status-race-lock.log + +final cleanup + + $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting + $ cd .. diff -r 2fbc109fd58a -r a6b8b1ab9116 tests/test-dirstate.t --- a/tests/test-dirstate.t Thu Mar 02 04:16:47 2023 +0100 +++ b/tests/test-dirstate.t Thu Mar 02 19:02:52 2023 +0100 @@ -148,7 +148,7 @@ $ hg init append-mostly $ cd append-mostly $ mkdir dir dir2 - $ touch dir/a dir/b dir/c dir/d dir/e dir2/f + $ touch -t 200001010000 dir/a dir/b dir/c dir/d dir/e dir2/f dir dir2 $ hg commit -Aqm initial $ hg st $ dirstate_data_files | wc -l @@ -163,12 +163,11 @@ $ dirstate_uuid_has_not_changed not testing because using Python implementation (no-rust no-rhg !) -Trigger an append with a small change +Trigger an append with a small change to directory mtime $ current_data_size=$(find_dirstate_data_size) - $ rm dir2/f + $ touch -t 201001010000 dir2 $ hg st - ! dir2/f $ dirstate_data_files | wc -l *1 (re) $ dirstate_uuid_has_not_changed @@ -187,7 +186,6 @@ Trigger a rust/rhg run which updates the unused bytes value $ hg st A file - ! dir2/f $ dirstate_data_files | wc -l *1 (re) $ dirstate_uuid_has_not_changed @@ -213,8 +211,223 @@ #endif +(non-Rust always rewrites) + +Test the devel option to control write behavior +============================================== + +Sometimes, debugging or testing the dirstate requires making sure that we have +done a complete rewrite of the data file and have no unreachable data around, +sometimes it requires we ensure we don't. + +We test the option to force this rewrite by creating the situation where an +append would happen and check that it doesn't happen. + + $ cd .. + $ hg init force-base + $ cd force-base + $ mkdir -p dir/nested dir2 + $ touch -t 200001010000 f dir/nested/a dir/b dir/c dir/d dir2/e dir/nested dir dir2 + $ hg commit -Aqm "recreate a bunch of files to facilitate append" + $ hg st --config devel.dirstate.v2.data_update_mode=force-new + $ cd .. + +#if dirstate-v2 + $ hg -R force-base debugstate --docket | grep unused + number of unused bytes: 0 + +Check with the option in "auto" mode +------------------------------------ + $ cp -a force-base append-mostly-no-force-rewrite + $ cd append-mostly-no-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=auto + +UUID hasn't changed and a non-zero number of unused bytes means we've appended + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation never appends at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + +Check the same scenario with the option set to "force-new" +--------------------------------------------------------- + + $ cp -a force-base append-mostly-force-rewrite + $ cd append-mostly-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append, but we force the rewrite + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-new + +UUID has changed and zero unused bytes means a full-rewrite happened + + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + $ hg debugstate --docket | grep unused + number of unused bytes: 0 + $ cd .. + + +Check the same scenario with the option set to "force-append" +------------------------------------------------------------- + +(should behave the same as "auto" here) + + $ cp -a force-base append-mostly-force-append + $ cd append-mostly-force-append + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append, which we are forcing here anyway. + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-append + +UUID has not changed and some unused bytes exist in the data file + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation never appends at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + +Check with the option in "auto" mode +------------------------------------ + $ cp -a force-base append-mostly-no-force-rewrite + $ cd append-mostly-no-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=auto + +UUID has changed and zero unused bytes means we've rewritten. + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) + $ cd .. + +Check the same scenario with the option set to "force-new" +--------------------------------------------------------- + +(should be the same as auto) + + $ cp -a force-base append-mostly-force-rewrite + $ cd append-mostly-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-new + +UUID has changed and a zero number unused bytes means we've rewritten. + + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + $ hg debugstate --docket | grep unused + number of unused bytes: 0 + $ cd .. + + +Check the same scenario with the option set to "force-append" +------------------------------------------------------------- + +Should append even if "auto" did not + + $ cp -a force-base append-mostly-force-append + $ cd append-mostly-force-append + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-append + +UUID has not changed and some unused bytes exist in the data file + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation is never appending at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + + + +Get back into a state suitable for the test of the file. + + $ cd ./append-mostly + +#else + $ cd ./u +#endif + Transaction compatibility -------------------------- +========================= The transaction preserves the dirstate. We should make sure all of it (docket + data) is preserved diff -r 2fbc109fd58a -r a6b8b1ab9116 tests/test-patchbomb.t --- a/tests/test-patchbomb.t Thu Mar 02 04:16:47 2023 +0100 +++ b/tests/test-patchbomb.t Thu Mar 02 19:02:52 2023 +0100 @@ -183,6 +183,43 @@ +a +Test --git is respected + $ hg email --git --date '1970-1-1 0:1' -n -f quux -t foo -c bar -r tip + this patch series consists of 1 patches. + + + displaying [PATCH] a ... + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: [PATCH] a + X-Mercurial-Node: 8580ff50825a50c8f716709acdf8de0deddcd6ab + X-Mercurial-Series-Index: 1 + X-Mercurial-Series-Total: 1 + Message-Id: <8580ff50825a50c8f716.60@test-hostname> + X-Mercurial-Series-Id: <8580ff50825a50c8f716.60@test-hostname> + User-Agent: Mercurial-patchbomb/* (glob) + Date: Thu, 01 Jan 1970 00:01:00 +0000 + From: quux + To: foo + Cc: bar + + # HG changeset patch + # User test + # Date 1 0 + # Thu Jan 01 00:00:01 1970 +0000 + # Node ID 8580ff50825a50c8f716709acdf8de0deddcd6ab + # Parent 0000000000000000000000000000000000000000 + a + + diff --git a/a b/a + new file mode 100644 + --- /dev/null + +++ b/a + @@ -0,0 +1,1 @@ + +a + + Test breaking format changes aren't $ hg --config diff.noprefix=True email --date '1970-1-1 0:1' -n -f quux -t foo -c bar -r tip