branching: merge stable into default
The clippy god had to be appeased on some aspect.
--- a/.hgsigs Thu Mar 02 04:16:47 2023 +0100
+++ b/.hgsigs Thu Mar 02 19:02:52 2023 +0100
@@ -238,3 +238,4 @@
04f1dba53c961dfdb875c8469adc96fa999cfbed 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmNyC5sZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVqF+C/4uLaV/4nizZkWD3PjU1WyFYDg4bWDFOHb+PWuQ/3uoHXu1/EaYRnqmcDyOSJ99aXZBQ78rm9xhjxdmbklZ4ll1EGkqfTiYH+ld+rqE8iaqlc/DVy7pFXaenYwxletzO1OezzwF4XDLi6hcqzY9CXA3NM40vf6W4Rs5bEIi4eSbgJSNB1ll6ZzjvkU5bWTUoxSH+fxIJUuo27El2etdlKFQkS3/oTzWHejpVn6SQ1KyojTHMQBDRK4rqJBISp3gTf4TEezb0q0HTutJYDFdQNIRqx7V1Ao4Ei+YNbenJzcWJOA/2uk4V0AvZ4tnjgAzBYKwvIL1HfoQ0OmILeXjlVzV7Xu0G57lavum0sKkz/KZLKyYhKQHjYQLE7YMSM2y6/UEoFNN577vB47CHUq446PSMb8dGs2rmj66rj4iz5ml0yX+V9O2PpmIKoPAu1Y5/6zB9rCL76MRx182IW2m3rm4lsTfXPBPtea/OFt6ylxqCJRxaA0pht4FiAOvicPKXh4=
c890d8b8bc59b18e5febf60caada629df5356ee2 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmN48sEZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVqwwC/9GkaE5adkLaJBZeRqfLL710ZPMAttiPhLAYl9YcUeUjw2rTU1bxxUks0oSfW4J0AaJLscl+pG4zZW8FN2MXY3njdcpAA/bv4nb+rq50Mdm0mD3iLOyKbIDQbUoYe7YpIPbpyuf8G/y4R1IXiLJjK329vzIsHkqyKPwUzxvyfZkjg6Lx00RRcfWrosb2Jb0+EhP9Yi7tjJmNWjsaTb8Ufp+ImYAL3qcDErkqb6wJCGAM0AwVfAJ7MZz3v3E56n1HTPhNqf8UvfR4URsuDlk56mP4do/QThC7dANiKeWrFJSBPu8uSpaHzUk1XCat0RHK03DMr15Ln1YCEhTmaedHr2rtp0fgGqaMH1jLZt0+9fiPaaYjck7Y+aagdc3bt1VhqtClbCJz5KWynpCLrn8MX40QmXuwly+KHzMuPQ6i0ui95ifgtrW7/Zd7uI7mYZ2zUeFUZPnL9XmGpFI595N8TjoPuFeO/ea4OQbLUY+lmmgZQrWoTpc5LDUyFXSFzJS2bU=
59466b13a3ae0e29a5d4f485393e516cfbb057d0 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmO1XgoZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVn8nDACU04KbPloLl+if6DQYreESnF9LU8C+qnLC/j5RRuaFNh/ec6C3DzLWqWdmnWA/siV3nUR1bXHfTui95azxJfYvWoXH2R2yam+YhE256B4rDDYWS1LI9kNNM+A33xcPS2HxVowkByhjB5FPKR6I90dX42BYJpTS5s/VPx63wXLznjFWuD7XJ3P0VI7D72j/+6EQCmHaAUEE5bO00Ob2JxmzJlaP+02fYc814PAONE2/ocfR0aExAVS3VA+SJGXnXTVpoaHr7NJKC2sBLFsdnhIRwtCf3rtGEvIJ5v2U2xx0ZEz/mimtGzW5ovkthobV4mojk0DRz7xBtA96pOGSRTD8QndIsdMCUipo8zZ/AGAMByCtsQOX7OYhR6gp+I6+iPh8fTR5oCbkO7cizDDQtXcrR5OT/BDH9xkAF1ghNL8o23a09/wfZ9NPg5zrh/4T/dFfoe2COlkAJJ1ttDPYyQkCfMsoWm3OXk6xJ3ExVbwkZzUDQSzsxGS+oxbFDWJZ64Q=
+8830004967ad865ead89c28a410405a6e71e0796 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmQAsOQZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVl7XC/0W+Wd4gzMUbaot+NVIZTpubNw3KHBDXrlMgwQgCDg7qcqJnVuT1NNEy5sRELjZOO0867k+pBchZaxdmAiFwY1W76+7nwiLBqfCkYgYY0iQe48JHTq9kCgohvx9PSEVbUsScmqAQImd5KzErjhsLj8D2FiFIrcMyqsCBq4ZPs0Ey7lVKu6q3z5eDjlrxUIr0up6yKvgBxhY0GxyTp6DGoinzlFMEadiJlsvlwO4C6UpzKiCGMeKNT5xHK/Hx3ChrOH2Yuu1fHaPLJ+ZpXjR33ileVYlkQrh1D6fWHXcP7ZuwsEKREtgsw1YjYczGFwmhBO362bNi5wy33mBtCvcIAqpsI0rMrExs66qqbfyG+Yp1dvkgzUfdhbYFHA+mvg3/YTSD9dLKzzsb69LM87+dvcLqhBJ0nEAuBmAzU5ECkoArbiwMT96NhhjLPRmJJdHNo0IDos/LBGTgkOZ6iqIx8Xm/tgjBjFJG8B+IVy3laNgun4AZ9Ejc3ahIfhJUIo2j8o=
--- a/.hgtags Thu Mar 02 04:16:47 2023 +0100
+++ b/.hgtags Thu Mar 02 19:02:52 2023 +0100
@@ -254,3 +254,4 @@
0000000000000000000000000000000000000000 6.3.0
c890d8b8bc59b18e5febf60caada629df5356ee2 6.3.1
59466b13a3ae0e29a5d4f485393e516cfbb057d0 6.3.2
+8830004967ad865ead89c28a410405a6e71e0796 6.3.3
--- a/hgext/patchbomb.py Thu Mar 02 04:16:47 2023 +0100
+++ b/hgext/patchbomb.py Thu Mar 02 19:02:52 2023 +0100
@@ -361,7 +361,12 @@
ui.warn(_(b'warning: working directory has uncommitted changes\n'))
output = stringio()
cmdutil.exportfile(
- repo, [r], output, opts=patch.difffeatureopts(ui, opts, git=True)
+ repo,
+ [r],
+ output,
+ opts=patch.difffeatureopts(
+ ui, pycompat.byteskwargs(opts), git=True
+ ),
)
yield output.getvalue().split(b'\n')
--- a/mercurial/configitems.py Thu Mar 02 04:16:47 2023 +0100
+++ b/mercurial/configitems.py Thu Mar 02 19:02:52 2023 +0100
@@ -652,6 +652,15 @@
b'deprec-warn',
default=False,
)
+# possible values:
+# - auto (the default)
+# - force-append
+# - force-new
+coreconfigitem(
+ b'devel',
+ b'dirstate.v2.data_update_mode',
+ default="auto",
+)
coreconfigitem(
b'devel',
b'disableloaddefaultcerts',
@@ -689,6 +698,42 @@
b'serverrequirecert',
default=False,
)
+# Makes the status algorithm wait for the existence of this file
+# (or until a timeout of `devel.sync.status.pre-dirstate-write-file-timeout`
+# seconds) before taking the lock and writing the dirstate.
+# Status signals that it's ready to wait by creating a file
+# with the same name + `.waiting`.
+# Useful when testing race conditions.
+coreconfigitem(
+ b'devel',
+ b'sync.status.pre-dirstate-write-file',
+ default=None,
+)
+coreconfigitem(
+ b'devel',
+ b'sync.status.pre-dirstate-write-file-timeout',
+ default=2,
+)
+coreconfigitem(
+ b'devel',
+ b'sync.dirstate.post-docket-read-file',
+ default=None,
+)
+coreconfigitem(
+ b'devel',
+ b'sync.dirstate.post-docket-read-file-timeout',
+ default=2,
+)
+coreconfigitem(
+ b'devel',
+ b'sync.dirstate.pre-read-file',
+ default=None,
+)
+coreconfigitem(
+ b'devel',
+ b'sync.dirstate.pre-read-file-timeout',
+ default=2,
+)
coreconfigitem(
b'devel',
b'strip-obsmarkers',
--- a/mercurial/context.py Thu Mar 02 04:16:47 2023 +0100
+++ b/mercurial/context.py Thu Mar 02 19:02:52 2023 +0100
@@ -36,6 +36,7 @@
sparse,
subrepo,
subrepoutil,
+ testing,
util,
)
from .utils import (
@@ -1854,6 +1855,7 @@
def _poststatusfixup(self, status, fixup):
"""update dirstate for files that are actually clean"""
+ testing.wait_on_cfg(self._repo.ui, b'status.pre-dirstate-write-file')
dirstate = self._repo.dirstate
poststatus = self._repo.postdsstatus()
if fixup:
--- a/mercurial/diffutil.py Thu Mar 02 04:16:47 2023 +0100
+++ b/mercurial/diffutil.py Thu Mar 02 19:02:52 2023 +0100
@@ -7,6 +7,13 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
+import typing
+
+from typing import (
+ Any,
+ Dict,
+ Optional,
+)
from .i18n import _
@@ -15,10 +22,20 @@
pycompat,
)
+if typing.TYPE_CHECKING:
+ from . import ui as uimod
+
+# TODO: narrow the value after the config module is typed
+_Opts = Dict[bytes, Any]
+
def diffallopts(
- ui, opts=None, untrusted=False, section=b'diff', configprefix=b''
-):
+ ui: "uimod.ui",
+ opts: Optional[_Opts] = None,
+ untrusted: bool = False,
+ section: bytes = b'diff',
+ configprefix: bytes = b'',
+) -> mdiff.diffopts:
'''return diffopts with all features supported and parsed'''
return difffeatureopts(
ui,
@@ -33,15 +50,15 @@
def difffeatureopts(
- ui,
- opts=None,
- untrusted=False,
- section=b'diff',
- git=False,
- whitespace=False,
- formatchanging=False,
- configprefix=b'',
-):
+ ui: "uimod.ui",
+ opts: Optional[_Opts] = None,
+ untrusted: bool = False,
+ section: bytes = b'diff',
+ git: bool = False,
+ whitespace: bool = False,
+ formatchanging: bool = False,
+ configprefix: bytes = b'',
+) -> mdiff.diffopts:
"""return diffopts with only opted-in features parsed
Features:
@@ -51,7 +68,12 @@
with most diff parsers
"""
- def get(key, name=None, getter=ui.configbool, forceplain=None):
+ def get(
+ key: bytes,
+ name: Optional[bytes] = None,
+ getter=ui.configbool,
+ forceplain: Optional[bool] = None,
+ ) -> Any:
if opts:
v = opts.get(key)
# diffopts flags are either None-default (which is passed
--- a/mercurial/dirstatemap.py Thu Mar 02 04:16:47 2023 +0100
+++ b/mercurial/dirstatemap.py Thu Mar 02 19:02:52 2023 +0100
@@ -10,6 +10,7 @@
error,
pathutil,
policy,
+ testing,
txnutil,
util,
)
@@ -31,6 +32,13 @@
rangemask = 0x7FFFFFFF
+WRITE_MODE_AUTO = 0
+WRITE_MODE_FORCE_NEW = 1
+WRITE_MODE_FORCE_APPEND = 2
+
+
+V2_MAX_READ_ATTEMPTS = 5
+
class _dirstatemapcommon:
"""
@@ -54,6 +62,16 @@
self._parents = None
self._dirtyparents = False
self._docket = None
+ write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode")
+ if write_mode == b"auto":
+ self._write_mode = WRITE_MODE_AUTO
+ elif write_mode == b"force-append":
+ self._write_mode = WRITE_MODE_FORCE_APPEND
+ elif write_mode == b"force-new":
+ self._write_mode = WRITE_MODE_FORCE_NEW
+ else:
+ # unknown value, fallback to default
+ self._write_mode = WRITE_MODE_AUTO
# for consistent view between _pl() and _read() invocations
self._pendingmode = None
@@ -132,11 +150,31 @@
raise error.ProgrammingError(
b'dirstate only has a docket in v2 format'
)
+ self._set_identity()
self._docket = docketmod.DirstateDocket.parse(
self._readdirstatefile(), self._nodeconstants
)
return self._docket
+ def _read_v2_data(self):
+ data = None
+ attempts = 0
+ while attempts < V2_MAX_READ_ATTEMPTS:
+ attempts += 1
+ try:
+ # TODO: use mmap when possible
+ data = self._opener.read(self.docket.data_filename())
+ except FileNotFoundError:
+ # read race detected between docket and data file
+ # reload the docket and retry
+ self._docket = None
+ if data is None:
+ assert attempts >= V2_MAX_READ_ATTEMPTS
+ msg = b"dirstate read race happened %d times in a row"
+ msg %= attempts
+ raise error.Abort(msg)
+ return self._opener.read(self.docket.data_filename())
+
def write_v2_no_append(self, tr, st, meta, packed):
old_docket = self.docket
new_docket = docketmod.DirstateDocket.with_new_uuid(
@@ -290,14 +328,15 @@
### disk interaction
def read(self):
- # ignore HG_PENDING because identity is used only for writing
- self._set_identity()
+ testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
+ if self._use_dirstate_v2:
- if self._use_dirstate_v2:
if not self.docket.uuid:
return
- st = self._opener.read(self.docket.data_filename())
+ testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
+ st = self._read_v2_data()
else:
+ self._set_identity()
st = self._readdirstatefile()
if not st:
@@ -556,19 +595,39 @@
# ignore HG_PENDING because identity is used only for writing
self._set_identity()
+ testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
if self._use_dirstate_v2:
- if self.docket.uuid:
- # TODO: use mmap when possible
- data = self._opener.read(self.docket.data_filename())
+ self.docket # load the data if needed
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None
+ and self.identity.stat is not None
+ else None
+ )
+ testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
+ if not self.docket.uuid:
+ data = b''
+ self._map = rustmod.DirstateMap.new_empty()
else:
- data = b''
- self._map = rustmod.DirstateMap.new_v2(
- data, self.docket.data_size, self.docket.tree_metadata
- )
+ data = self._read_v2_data()
+ self._map = rustmod.DirstateMap.new_v2(
+ data,
+ self.docket.data_size,
+ self.docket.tree_metadata,
+ self.docket.uuid,
+ inode,
+ )
parents = self.docket.parents
else:
+ self._set_identity()
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None
+ and self.identity.stat is not None
+ else None
+ )
self._map, parents = rustmod.DirstateMap.new_v1(
- self._readdirstatefile()
+ self._readdirstatefile(), inode
)
if parents and not self._dirtyparents:
@@ -638,8 +697,10 @@
return
# We can only append to an existing data file if there is one
- can_append = self.docket.uuid is not None
- packed, meta, append = self._map.write_v2(can_append)
+ write_mode = self._write_mode
+ if self.docket.uuid is None:
+ write_mode = WRITE_MODE_FORCE_NEW
+ packed, meta, append = self._map.write_v2(write_mode)
if append:
docket = self.docket
data_filename = docket.data_filename()
--- a/mercurial/testing/__init__.py Thu Mar 02 04:16:47 2023 +0100
+++ b/mercurial/testing/__init__.py Thu Mar 02 19:02:52 2023 +0100
@@ -9,6 +9,21 @@
environ = getattr(os, 'environ')
+def wait_on_cfg(ui, cfg, timeout=10):
+ """synchronize on the `cfg` config path
+
+ Use this to synchronize commands during race tests.
+ """
+ full_config = b'sync.' + cfg
+ wait_config = full_config + b'-timeout'
+ sync_path = ui.config(b'devel', full_config)
+ if sync_path is not None:
+ timeout = ui.config(b'devel', wait_config)
+ ready_path = sync_path + b'.waiting'
+ write_file(ready_path)
+ wait_file(sync_path, timeout=timeout)
+
+
def _timeout_factor():
"""return the current modification to timeout"""
default = int(environ.get('HGTEST_TIMEOUT_DEFAULT', 360))
--- a/relnotes/6.3 Thu Mar 02 04:16:47 2023 +0100
+++ b/relnotes/6.3 Thu Mar 02 19:02:52 2023 +0100
@@ -1,3 +1,51 @@
+= Mercurial 6.3.3 =
+
+ * tests: filter out PEP 657 error locations in tracebacks (issue6780)
+ * tests: optional PEP 657 error location in test-extension.t (issue6781)
+ * tests: optional PEP 657 error location in test-lfs-serve-access.t (issue6782)
+ * histedit: byteify the help for the multifold action
+ * sparse: fix a py2 based usage of `map()`
+ * convert: stop passing str to the dateutil API in darcs
+ * convert: turn the last str regex into bytes in cvs.py (issue6789)
+ * convert: change socket mode from b'r+' to 'rwb' in cvs.py (issue6789)
+ * convert: replace repr() by pycompat.byterepr() in cvsps.py (issue6789)
+ * tests: os module is frozen in Python 3.11 (issue6786)
+ * hgweb: unbyteify the 100-continue check
+ * resourceutil: start using importlib.resources.files() when possible
+ * revset: the `random` sort should not depend on sys.maxsize (issue6770)
+ * tests: make sure pygments can detect python script without extension
+ * convert: brz 3.3.0 moved NoSuchFile exception to breezy.transport
+ * tests: pygments 2.14+ highlight whitespace in python code
+ * hghave: make different has_pyoxidizer functions have different names
+ * hghave: refactor checks for pygments versions using checkvers()
+ * rust-narrow: fix loop that never loops
+ * scmutil: make checknewlabel() allow "_" in otherwise numeric names (issue6737)
+ * bundlerepo: enforce the requirements declared by the underlying repository
+ * setup: make the version computation process more resistant
+ * fix: add more information to the debug output
+ * tag: disallow tagging the working directory
+ * dirstate: handle missing backup file on restoration
+ * dirstate-v2: complain early on docket name collision
+ * rust: upgrade minimum `rayon` dependency
+ * setup: support building from an ongoing merge
+ * rust: add debug log about skipping dirstate update
+ * rust-dirstate: trace append/no append to help debugging
+ * dirstate-v2: don't mmap the data file when on NFS
+ * run-tests: make it possible to nest conditionals
+ * dirstate: add some debug output when writing the dirstate
+ * testing: introduce util function to synchronize concurrent commands on files
+ * dirstate: add a way to test races happening during status
+ * dirstate: use more than a bool to control append behavior
+ * dirstate-v2: add devel config option to control write behavior
+ * rhg: fix race when a fixup file is deleted on disk
+ * rhg: fix race when an ambiguous file is deleted on disk
+ * dirstate: deal with read-race for pure python code
+ * dirstate: deal with read-race for python code using rust object
+ * dirstate: deal with read-race for pure rust code path (rhg)
+ * dirstate: set identity whenever we read the dirstate's v2 docket
+ * rust-dirstate-v2: don't write dirstate if data file has changed
+ * rhg: remember the inode of .hg/dirstate
+
= Mercurial 6.3.2 =
* [ecfc84b956a8] tests: expect the message from 1baf0fffd82f in test-hghave.t (issue6762)
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Mar 02 19:02:52 2023 +0100
@@ -38,6 +38,13 @@
V2,
}
+#[derive(Debug, PartialEq, Eq)]
+pub enum DirstateMapWriteMode {
+ Auto,
+ ForceNewDataFile,
+ ForceAppend,
+}
+
#[derive(Debug)]
pub struct DirstateMap<'on_disk> {
/// Contents of the `.hg/dirstate` file
@@ -59,10 +66,28 @@
pub(super) unreachable_bytes: u32,
/// Size of the data used to first load this `DirstateMap`. Used in case
- /// we need to write some new metadata, but no new data on disk.
+ /// we need to write some new metadata, but no new data on disk,
+ /// as well as to detect writes that have happened in another process
+ /// since first read.
pub(super) old_data_size: usize,
+ /// UUID used when first loading this `DirstateMap`. Used to check if
+ /// the UUID has been changed by another process since first read.
+ /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
+ pub(super) old_uuid: Option<Vec<u8>>,
+
+ /// Identity of the dirstate file (for dirstate-v1) or the docket file
+ /// (v2). Used to detect if the file has changed from another process.
+ /// Since it's always written atomically, we can compare the inode to
+ /// check the file identity.
+ ///
+ /// TODO On non-Unix systems, something like hashing is a possibility?
+ pub(super) identity: Option<u64>,
+
pub(super) dirstate_version: DirstateVersion,
+
+ /// Controlled by config option `devel.dirstate.v2.data_update_mode`
+ pub(super) write_mode: DirstateMapWriteMode,
}
/// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -445,7 +470,10 @@
ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
unreachable_bytes: 0,
old_data_size: 0,
+ old_uuid: None,
+ identity: None,
dirstate_version: DirstateVersion::V1,
+ write_mode: DirstateMapWriteMode::Auto,
}
}
@@ -454,9 +482,11 @@
on_disk: &'on_disk [u8],
data_size: usize,
metadata: &[u8],
+ uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<Self, DirstateError> {
if let Some(data) = on_disk.get(..data_size) {
- Ok(on_disk::read(data, metadata)?)
+ Ok(on_disk::read(data, metadata, uuid, identity)?)
} else {
Err(DirstateV2ParseError::new("not enough bytes on disk").into())
}
@@ -465,6 +495,7 @@
#[logging_timer::time("trace")]
pub fn new_v1(
on_disk: &'on_disk [u8],
+ identity: Option<u64>,
) -> Result<(Self, Option<DirstateParents>), DirstateError> {
let mut map = Self::empty(on_disk);
if map.on_disk.is_empty() {
@@ -506,6 +537,7 @@
},
)?;
let parents = Some(*parents);
+ map.identity = identity;
Ok((map, parents))
}
@@ -514,8 +546,15 @@
/// append to the existing data file that contains `self.on_disk` (true),
/// or create a new data file from scratch (false).
pub(super) fn write_should_append(&self) -> bool {
- let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
- ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+ match self.write_mode {
+ DirstateMapWriteMode::ForceAppend => true,
+ DirstateMapWriteMode::ForceNewDataFile => false,
+ DirstateMapWriteMode::Auto => {
+ let ratio =
+ self.unreachable_bytes as f32 / self.on_disk.len() as f32;
+ ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+ }
+ }
}
fn get_node<'tree>(
@@ -911,6 +950,10 @@
*unreachable_bytes += path.len() as u32
}
}
+
+ pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
+ self.write_mode = write_mode;
+ }
}
type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
@@ -1230,11 +1273,11 @@
#[logging_timer::time("trace")]
pub fn pack_v2(
&self,
- can_append: bool,
+ write_mode: DirstateMapWriteMode,
) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
{
let map = self.get_map();
- on_disk::write(map, can_append)
+ on_disk::write(map, write_mode)
}
/// `callback` allows the caller to process and do something with the
@@ -1794,7 +1837,7 @@
)?;
let (packed, metadata, _should_append, _old_data_size) =
- map.pack_v2(false)?;
+ map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
let packed_len = packed.len();
assert!(packed_len > 0);
@@ -1803,6 +1846,8 @@
packed,
packed_len,
metadata.as_bytes(),
+ vec![],
+ None,
)?;
// Check that everything is accounted for
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Mar 02 19:02:52 2023 +0100
@@ -4,7 +4,9 @@
use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
use crate::dirstate_tree::dirstate_map::DirstateVersion;
-use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
+use crate::dirstate_tree::dirstate_map::{
+ self, DirstateMap, DirstateMapWriteMode, NodeRef,
+};
use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
@@ -285,6 +287,8 @@
pub(super) fn read<'on_disk>(
on_disk: &'on_disk [u8],
metadata: &[u8],
+ uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
if on_disk.is_empty() {
let mut map = DirstateMap::empty(on_disk);
@@ -307,7 +311,10 @@
ignore_patterns_hash: meta.ignore_patterns_hash,
unreachable_bytes: meta.unreachable_bytes.get(),
old_data_size: on_disk.len(),
+ old_uuid: Some(uuid),
+ identity,
dirstate_version: DirstateVersion::V2,
+ write_mode: DirstateMapWriteMode::Auto,
};
Ok(dirstate_map)
}
@@ -605,9 +612,18 @@
/// (false), and the previous size of data on disk.
pub(super) fn write(
dirstate_map: &DirstateMap,
- can_append: bool,
+ write_mode: DirstateMapWriteMode,
) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
- let append = can_append && dirstate_map.write_should_append();
+ let append = match write_mode {
+ DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
+ DirstateMapWriteMode::ForceNewDataFile => false,
+ DirstateMapWriteMode::ForceAppend => true,
+ };
+ if append {
+ log::trace!("appending to the dirstate data file");
+ } else {
+ log::trace!("creating new dirstate data file");
+ }
// This ignores the space for paths, and for nodes without an entry.
// TODO: better estimate? Skip the `Vec` and write to a file directly?
--- a/rust/hg-core/src/dirstate_tree/owning.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/owning.rs Thu Mar 02 19:02:52 2023 +0100
@@ -31,6 +31,7 @@
pub fn new_v1<OnDisk>(
on_disk: OnDisk,
+ identity: Option<u64>,
) -> Result<(Self, DirstateParents), DirstateError>
where
OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -42,7 +43,7 @@
OwningDirstateMapTryBuilder {
on_disk,
map_builder: |bytes| {
- DirstateMap::new_v1(bytes).map(|(dmap, p)| {
+ DirstateMap::new_v1(bytes, identity).map(|(dmap, p)| {
parents = p.unwrap_or(DirstateParents::NULL);
dmap
})
@@ -57,6 +58,8 @@
on_disk: OnDisk,
data_size: usize,
metadata: &[u8],
+ uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<Self, DirstateError>
where
OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -66,7 +69,7 @@
OwningDirstateMapTryBuilder {
on_disk,
map_builder: |bytes| {
- DirstateMap::new_v2(bytes, data_size, metadata)
+ DirstateMap::new_v2(bytes, data_size, metadata, uuid, identity)
},
}
.try_build()
@@ -86,4 +89,16 @@
pub fn on_disk(&self) -> &[u8] {
self.borrow_on_disk()
}
+
+ pub fn old_uuid(&self) -> Option<&[u8]> {
+ self.get_map().old_uuid.as_deref()
+ }
+
+ pub fn old_identity(&self) -> Option<u64> {
+ self.get_map().identity
+ }
+
+ pub fn old_data_size(&self) -> usize {
+ self.get_map().old_data_size
+ }
}
--- a/rust/hg-core/src/errors.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/errors.rs Thu Mar 02 19:02:52 2023 +0100
@@ -46,6 +46,9 @@
/// Censored revision data.
CensoredNodeError,
+ /// A race condition has been detected. This *must* be handled locally
+ /// and not directly surface to the user.
+ RaceDetected(String),
}
/// Details about where an I/O error happened
@@ -111,6 +114,9 @@
write!(f, "encountered a censored node")
}
HgError::ConfigValueParseError(error) => error.fmt(f),
+ HgError::RaceDetected(context) => {
+ write!(f, "encountered a race condition {context}")
+ }
}
}
}
--- a/rust/hg-core/src/repo.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/repo.rs Thu Mar 02 19:02:52 2023 +0100
@@ -1,6 +1,7 @@
use crate::changelog::Changelog;
use crate::config::{Config, ConfigError, ConfigParseError};
use crate::dirstate::DirstateParents;
+use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
use crate::dirstate_tree::owning::OwningDirstateMap;
use crate::errors::HgResultExt;
@@ -9,6 +10,7 @@
use crate::manifest::{Manifest, Manifestlog};
use crate::revlog::filelog::Filelog;
use crate::revlog::RevlogError;
+use crate::utils::debug::debug_wait_for_file_or_print;
use crate::utils::files::get_path_from_bytes;
use crate::utils::hg_path::HgPath;
use crate::utils::SliceExt;
@@ -22,6 +24,10 @@
use std::io::Write as IoWrite;
use std::path::{Path, PathBuf};
+const V2_MAX_READ_ATTEMPTS: usize = 5;
+
+type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
+
/// A repository on disk
pub struct Repo {
working_directory: PathBuf,
@@ -30,7 +36,6 @@
requirements: HashSet<String>,
config: Config,
dirstate_parents: LazyCell<DirstateParents>,
- dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
dirstate_map: LazyCell<OwningDirstateMap>,
changelog: LazyCell<Changelog>,
manifestlog: LazyCell<Manifestlog>,
@@ -180,7 +185,6 @@
dot_hg,
config: repo_config,
dirstate_parents: LazyCell::new(),
- dirstate_data_file_uuid: LazyCell::new(),
dirstate_map: LazyCell::new(),
changelog: LazyCell::new(),
manifestlog: LazyCell::new(),
@@ -254,6 +258,15 @@
.unwrap_or_default())
}
+ fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
+ use std::os::unix::fs::MetadataExt;
+ Ok(self
+ .hg_vfs()
+ .symlink_metadata("dirstate")
+ .io_not_found_as_none()?
+ .map(|meta| meta.ino()))
+ }
+
pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
Ok(*self
.dirstate_parents
@@ -263,15 +276,10 @@
fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
let dirstate = self.dirstate_file_contents()?;
let parents = if dirstate.is_empty() {
- if self.has_dirstate_v2() {
- self.dirstate_data_file_uuid.set(None);
- }
DirstateParents::NULL
} else if self.has_dirstate_v2() {
let docket =
crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
- self.dirstate_data_file_uuid
- .set(Some(docket.uuid.to_owned()));
docket.parents()
} else {
*crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
@@ -280,57 +288,179 @@
Ok(parents)
}
- fn read_dirstate_data_file_uuid(
+ /// Returns the information read from the dirstate docket necessary to
+ /// check if the data file has been updated/deleted by another process
+ /// since we last read the dirstate.
+ /// Namely, the inode, data file uuid and the data size.
+ fn get_dirstate_data_file_integrity(
&self,
- ) -> Result<Option<Vec<u8>>, HgError> {
+ ) -> Result<DirstateMapIdentity, HgError> {
assert!(
self.has_dirstate_v2(),
"accessing dirstate data file ID without dirstate-v2"
);
+ // Get the identity before the contents since we could have a race
+ // between the two. Having an identity that is too old is fine, but
+ // one that is younger than the content change is bad.
+ let identity = self.dirstate_identity()?;
let dirstate = self.dirstate_file_contents()?;
if dirstate.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
- Ok(None)
+ Ok((identity, None, 0))
} else {
let docket =
crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
self.dirstate_parents.set(docket.parents());
- Ok(Some(docket.uuid.to_owned()))
+ Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
}
}
fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
+ if self.has_dirstate_v2() {
+ // The v2 dirstate is split into a docket and a data file.
+ // Since we don't always take the `wlock` to read it
+ // (like in `hg status`), it is susceptible to races.
+ // A simple retry method should be enough since full rewrites
+ // only happen when too much garbage data is present and
+ // this race is unlikely.
+ let mut tries = 0;
+
+ while tries < V2_MAX_READ_ATTEMPTS {
+ tries += 1;
+ match self.read_docket_and_data_file() {
+ Ok(m) => {
+ return Ok(m);
+ }
+ Err(e) => match e {
+ DirstateError::Common(HgError::RaceDetected(
+ context,
+ )) => {
+ log::info!(
+ "dirstate read race detected {} (retry {}/{})",
+ context,
+ tries,
+ V2_MAX_READ_ATTEMPTS,
+ );
+ continue;
+ }
+ _ => return Err(e),
+ },
+ }
+ }
+ let error = HgError::abort(
+ format!("dirstate read race happened {tries} times in a row"),
+ 255,
+ None,
+ );
+ Err(DirstateError::Common(error))
+ } else {
+ debug_wait_for_file_or_print(
+ self.config(),
+ "dirstate.pre-read-file",
+ );
+ let identity = self.dirstate_identity()?;
+ let dirstate_file_contents = self.dirstate_file_contents()?;
+ if dirstate_file_contents.is_empty() {
+ self.dirstate_parents.set(DirstateParents::NULL);
+ Ok(OwningDirstateMap::new_empty(Vec::new()))
+ } else {
+ let (map, parents) = OwningDirstateMap::new_v1(
+ dirstate_file_contents,
+ identity,
+ )?;
+ self.dirstate_parents.set(parents);
+ Ok(map)
+ }
+ }
+ }
+
+ fn read_docket_and_data_file(
+ &self,
+ ) -> Result<OwningDirstateMap, DirstateError> {
+ debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
let dirstate_file_contents = self.dirstate_file_contents()?;
+ let identity = self.dirstate_identity()?;
if dirstate_file_contents.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
- if self.has_dirstate_v2() {
- self.dirstate_data_file_uuid.set(None);
- }
- Ok(OwningDirstateMap::new_empty(Vec::new()))
- } else if self.has_dirstate_v2() {
- let docket = crate::dirstate_tree::on_disk::read_docket(
- &dirstate_file_contents,
- )?;
- self.dirstate_parents.set(docket.parents());
- self.dirstate_data_file_uuid
- .set(Some(docket.uuid.to_owned()));
- let data_size = docket.data_size();
- let metadata = docket.tree_metadata();
- if let Some(data_mmap) = self
+ return Ok(OwningDirstateMap::new_empty(Vec::new()));
+ }
+ let docket = crate::dirstate_tree::on_disk::read_docket(
+ &dirstate_file_contents,
+ )?;
+ debug_wait_for_file_or_print(
+ self.config(),
+ "dirstate.post-docket-read-file",
+ );
+ self.dirstate_parents.set(docket.parents());
+ let uuid = docket.uuid.to_owned();
+ let data_size = docket.data_size();
+
+ let context = "between reading dirstate docket and data file";
+ let race_error = HgError::RaceDetected(context.into());
+ let metadata = docket.tree_metadata();
+
+ let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
+ // Don't mmap on NFS to prevent `SIGBUS` error on deletion
+ let contents = self.hg_vfs().read(docket.data_filename());
+ let contents = match contents {
+ Ok(c) => c,
+ Err(HgError::IoError { error, context }) => {
+ match error.raw_os_error().expect("real os error") {
+ // 2 = ENOENT, No such file or directory
+ // 116 = ESTALE, Stale NFS file handle
+ //
+ // TODO match on `error.kind()` when
+ // `ErrorKind::StaleNetworkFileHandle` is stable.
+ 2 | 116 => {
+ // Race where the data file was deleted right after
+ // we read the docket, try again
+ return Err(race_error.into());
+ }
+ _ => {
+ return Err(
+ HgError::IoError { error, context }.into()
+ )
+ }
+ }
+ }
+ Err(e) => return Err(e.into()),
+ };
+ OwningDirstateMap::new_v2(
+ contents, data_size, metadata, uuid, identity,
+ )
+ } else {
+ match self
.hg_vfs()
.mmap_open(docket.data_filename())
- .io_not_found_as_none()?
+ .io_not_found_as_none()
{
- OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
- } else {
- OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
+ Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
+ data_mmap, data_size, metadata, uuid, identity,
+ ),
+ Ok(None) => {
+ // Race where the data file was deleted right after we
+ // read the docket, try again
+ return Err(race_error.into());
+ }
+ Err(e) => return Err(e.into()),
}
- } else {
- let (map, parents) =
- OwningDirstateMap::new_v1(dirstate_file_contents)?;
- self.dirstate_parents.set(parents);
- Ok(map)
- }
+ }?;
+
+ let write_mode_config = self
+ .config()
+ .get_str(b"devel", b"dirstate.v2.data_update_mode")
+ .unwrap_or(Some("auto"))
+ .unwrap_or("auto"); // don't bother for devel options
+ let write_mode = match write_mode_config {
+ "auto" => DirstateMapWriteMode::Auto,
+ "force-new" => DirstateMapWriteMode::ForceNewDataFile,
+ "force-append" => DirstateMapWriteMode::ForceAppend,
+ _ => DirstateMapWriteMode::Auto,
+ };
+
+ map.with_dmap_mut(|m| m.set_write_mode(write_mode));
+
+ Ok(map)
}
pub fn dirstate_map(
@@ -421,13 +551,37 @@
// itโs unset
let parents = self.dirstate_parents()?;
let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
- let uuid_opt = self
- .dirstate_data_file_uuid
- .get_or_init(|| self.read_dirstate_data_file_uuid())?;
- let uuid_opt = uuid_opt.as_ref();
- let can_append = uuid_opt.is_some();
+ let (identity, uuid, data_size) =
+ self.get_dirstate_data_file_integrity()?;
+ let identity_changed = identity != map.old_identity();
+ let uuid_changed = uuid.as_deref() != map.old_uuid();
+ let data_length_changed = data_size != map.old_data_size();
+
+ if identity_changed || uuid_changed || data_length_changed {
+ // If any of identity, uuid or length have changed since
+ // last disk read, don't write.
+ // This is fine because either we're in a command that doesn't
+ // write anything too important (like `hg status`), or we're in
+ // `hg add` and we're supposed to have taken the lock before
+ // reading anyway.
+ //
+ // TODO complain loudly if we've changed anything important
+ // without taking the lock.
+ // (see `hg help config.format.use-dirstate-tracked-hint`)
+ log::debug!(
+ "dirstate has changed since last read, not updating."
+ );
+ return Ok(());
+ }
+
+ let uuid_opt = map.old_uuid();
+ let write_mode = if uuid_opt.is_some() {
+ DirstateMapWriteMode::Auto
+ } else {
+ DirstateMapWriteMode::ForceNewDataFile
+ };
let (data, tree_metadata, append, old_data_size) =
- map.pack_v2(can_append)?;
+ map.pack_v2(write_mode)?;
// Reuse the uuid, or generate a new one, keeping the old for
// deletion.
@@ -470,7 +624,10 @@
// https://github.com/openzfs/zfs/issues/13370
//
if !append {
+ log::trace!("creating a new dirstate data file");
options.create_new(true);
+ } else {
+ log::trace!("appending to the dirstate data file");
}
let data_size = (|| {
@@ -499,6 +656,22 @@
(packed_dirstate, old_uuid)
} else {
+ let identity = self.dirstate_identity()?;
+ if identity != map.old_identity() {
+ // If identity changed since last disk read, don't write.
+ // This is fine because either we're in a command that doesn't
+ // write anything too important (like `hg status`), or we're in
+ // `hg add` and we're supposed to have taken the lock before
+ // reading anyway.
+ //
+ // TODO complain loudly if we've changed anything important
+ // without taking the lock.
+ // (see `hg help config.format.use-dirstate-tracked-hint`)
+ log::debug!(
+ "dirstate has changed since last read, not updating."
+ );
+ return Ok(());
+ }
(map.pack_v1(parents)?, None)
};
--- a/rust/hg-core/src/utils.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/utils.rs Thu Mar 02 19:02:52 2023 +0100
@@ -15,6 +15,7 @@
use std::fmt;
use std::{io::Write, ops::Deref};
+pub mod debug;
pub mod files;
pub mod hg_path;
pub mod path_auditor;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/utils/debug.rs Thu Mar 02 19:02:52 2023 +0100
@@ -0,0 +1,87 @@
+//! Utils for debugging hg-core
+
+use crate::config::Config;
+
+/// Write the file path given by the config option `devel.<config_option>` with
+/// the suffix `.waiting`, then wait for the file path given by the
+/// config option `devel.<config_option>` to appear on disk
+/// up to `devel.<config_option>-timeout` seconds.
+/// Note that the timeout may be higher because we scale it if global
+/// `run-tests` timeouts are raised to prevent flakiness on slower hardware.
+///
+/// Useful for testing race conditions.
+pub fn debug_wait_for_file(
+ config: &Config,
+ config_option: &str,
+) -> Result<(), String> {
+ let path_opt = format!("sync.{config_option}");
+ let file_path = match config.get_str(b"devel", path_opt.as_bytes()).ok() {
+ Some(Some(file_path)) => file_path,
+ _ => return Ok(()),
+ };
+
+ // TODO make it so `configitems` is shared between Rust and Python so that
+ // defaults work out of the box, etc.
+ let default_timeout = 2;
+ let timeout_opt = format!("sync.{config_option}-timeout");
+ let timeout_seconds =
+ match config.get_u32(b"devel", timeout_opt.as_bytes()) {
+ Ok(Some(timeout)) => timeout,
+ Err(e) => {
+ log::debug!("{e}");
+ default_timeout
+ }
+ _ => default_timeout,
+ };
+ let timeout_seconds = timeout_seconds as u64;
+
+ log::debug!(
+ "Config option `{config_option}` found, \
+ waiting for file `{file_path}` to be created"
+ );
+ std::fs::File::create(format!("{file_path}.waiting")).ok();
+ // If the test timeout have been extended, scale the timer relative
+ // to the normal timing.
+ let global_default_timeout: u64 = std::env::var("HGTEST_TIMEOUT_DEFAULT")
+ .map(|t| t.parse())
+ .unwrap_or(Ok(0))
+ .unwrap();
+ let global_timeout_override: u64 = std::env::var("HGTEST_TIMEOUT")
+ .map(|t| t.parse())
+ .unwrap_or(Ok(0))
+ .unwrap();
+ let timeout_seconds = if global_default_timeout < global_timeout_override {
+ timeout_seconds * global_timeout_override / global_default_timeout
+ } else {
+ timeout_seconds
+ };
+ let timeout = std::time::Duration::from_secs(timeout_seconds);
+
+ let start = std::time::Instant::now();
+ let path = std::path::Path::new(file_path);
+ let mut found = false;
+ while start.elapsed() < timeout {
+ if path.exists() {
+ log::debug!("File `{file_path}` was created");
+ found = true;
+ break;
+ } else {
+ std::thread::sleep(std::time::Duration::from_millis(10));
+ }
+ }
+ if !found {
+ let msg = format!(
+ "File `{file_path}` set by `{config_option}` was not found \
+ within the allocated {timeout_seconds} seconds timeout"
+ );
+ Err(msg)
+ } else {
+ Ok(())
+ }
+}
+
+pub fn debug_wait_for_file_or_print(config: &Config, config_option: &str) {
+ if let Err(e) = debug_wait_for_file(config, config_option) {
+ eprintln!("{e}");
+ };
+}
--- a/rust/hg-core/src/vfs.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/vfs.rs Thu Mar 02 19:02:52 2023 +0100
@@ -172,3 +172,24 @@
pub(crate) fn is_file(path: impl AsRef<Path>) -> Result<bool, HgError> {
Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_file()))
}
+
+/// Returns whether the given `path` is on a network file system.
+/// Taken from `cargo`'s codebase.
+#[cfg(target_os = "linux")]
+pub(crate) fn is_on_nfs_mount(path: impl AsRef<Path>) -> bool {
+ use std::ffi::CString;
+ use std::mem;
+ use std::os::unix::prelude::*;
+
+ let path = match CString::new(path.as_ref().as_os_str().as_bytes()) {
+ Ok(path) => path,
+ Err(_) => return false,
+ };
+
+ unsafe {
+ let mut buf: libc::statfs = mem::zeroed();
+ let r = libc::statfs(path.as_ptr(), &mut buf);
+
+ r == 0 && buf.f_type as u32 == libc::NFS_SUPER_MAGIC as u32
+ }
+}
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Mar 02 19:02:52 2023 +0100
@@ -22,7 +22,8 @@
pybytes_deref::PyBytesDeref,
};
use hg::{
- dirstate::StateMapIter, dirstate_tree::on_disk::DirstateV2ParseError,
+ dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMapWriteMode,
+ dirstate_tree::on_disk::DirstateV2ParseError,
dirstate_tree::owning::OwningDirstateMap, revlog::Node,
utils::files::normalize_case, utils::hg_path::HgPath, DirstateEntry,
DirstateError, DirstateParents,
@@ -47,9 +48,10 @@
@staticmethod
def new_v1(
on_disk: PyBytes,
+ identity: Option<u64>,
) -> PyResult<PyObject> {
let on_disk = PyBytesDeref::new(py, on_disk);
- let (map, parents) = OwningDirstateMap::new_v1(on_disk)
+ let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity)
.map_err(|e| dirstate_error(py, e))?;
let map = Self::create_instance(py, map)?;
let p1 = PyBytes::new(py, parents.p1.as_bytes());
@@ -64,18 +66,33 @@
on_disk: PyBytes,
data_size: usize,
tree_metadata: PyBytes,
+ uuid: PyBytes,
+ identity: Option<u64>,
) -> PyResult<PyObject> {
let dirstate_error = |e: DirstateError| {
PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
};
let on_disk = PyBytesDeref::new(py, on_disk);
+ let uuid = uuid.data(py);
let map = OwningDirstateMap::new_v2(
- on_disk, data_size, tree_metadata.data(py),
+ on_disk,
+ data_size,
+ tree_metadata.data(py),
+ uuid.to_owned(),
+ identity,
).map_err(dirstate_error)?;
let map = Self::create_instance(py, map)?;
Ok(map.into_object())
}
+ /// Returns an empty DirstateMap. Only used for a new dirstate.
+ @staticmethod
+ def new_empty() -> PyResult<PyObject> {
+ let map = OwningDirstateMap::new_empty(vec![]);
+ let map = Self::create_instance(py, map)?;
+ Ok(map.into_object())
+ }
+
def clear(&self) -> PyResult<PyObject> {
self.inner(py).borrow_mut().clear();
Ok(py.None())
@@ -236,10 +253,16 @@
/// instead of written to a new data file (False).
def write_v2(
&self,
- can_append: bool,
+ write_mode: usize,
) -> PyResult<PyObject> {
let inner = self.inner(py).borrow();
- let result = inner.pack_v2(can_append);
+ let rust_write_mode = match write_mode {
+ 0 => DirstateMapWriteMode::Auto,
+ 1 => DirstateMapWriteMode::ForceNewDataFile,
+ 2 => DirstateMapWriteMode::ForceAppend,
+ _ => DirstateMapWriteMode::Auto, // XXX should we error out?
+ };
+ let result = inner.pack_v2(rust_write_mode);
match result {
Ok((packed, tree_metadata, append, _old_data_size)) => {
let packed = PyBytes::new(py, &packed);
--- a/rust/rhg/src/commands/status.rs Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/rhg/src/commands/status.rs Thu Mar 02 19:02:52 2023 +0100
@@ -21,6 +21,7 @@
use hg::manifest::Manifest;
use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
use hg::repo::Repo;
+use hg::utils::debug::debug_wait_for_file;
use hg::utils::files::get_bytes_from_os_string;
use hg::utils::files::get_path_from_bytes;
use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
@@ -308,26 +309,46 @@
.unsure
.into_par_iter()
.map(|to_check| {
- unsure_is_modified(
+ // The compiler seems to get a bit confused with complex
+ // inference when using a parallel iterator + map
+ // + map_err + collect, so let's just inline some of the
+ // logic.
+ match unsure_is_modified(
working_directory_vfs,
store_vfs,
check_exec,
&manifest,
&to_check.path,
- )
- .map(|modified| (to_check, modified))
+ ) {
+ Err(HgError::IoError { .. }) => {
+ // IO errors most likely stem from the file being
+ // deleted even though we know it's in the
+ // dirstate.
+ Ok((to_check, UnsureOutcome::Deleted))
+ }
+ Ok(outcome) => Ok((to_check, outcome)),
+ Err(e) => Err(e),
+ }
})
.collect::<Result<_, _>>()?;
- for (status_path, is_modified) in res.into_iter() {
- if is_modified {
- if display_states.modified {
- ds_status.modified.push(status_path);
+ for (status_path, outcome) in res.into_iter() {
+ match outcome {
+ UnsureOutcome::Clean => {
+ if display_states.clean {
+ ds_status.clean.push(status_path.clone());
+ }
+ fixup.push(status_path.path.into_owned())
}
- } else {
- if display_states.clean {
- ds_status.clean.push(status_path.clone());
+ UnsureOutcome::Modified => {
+ if display_states.modified {
+ ds_status.modified.push(status_path);
+ }
}
- fixup.push(status_path.path.into_owned())
+ UnsureOutcome::Deleted => {
+ if display_states.deleted {
+ ds_status.deleted.push(status_path);
+ }
+ }
}
}
}
@@ -401,6 +422,13 @@
after_status,
)?;
+ // Development config option to test write races
+ if let Err(e) =
+ debug_wait_for_file(config, "status.pre-dirstate-write-file")
+ {
+ ui.write_stderr(e.as_bytes()).ok();
+ }
+
if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
&& !dirstate_write_needed
{
@@ -420,9 +448,21 @@
// `unsure_is_clean` which was needed before reading
// contents. Here we access metadata again after reading
// content, in case it changed in the meantime.
- let fs_metadata = repo
+ let metadata_res = repo
.working_directory_vfs()
- .symlink_metadata(&fs_path)?;
+ .symlink_metadata(&fs_path);
+ let fs_metadata = match metadata_res {
+ Ok(meta) => meta,
+ Err(err) => match err {
+ HgError::IoError { .. } => {
+ // The file has probably been deleted. In any
+ // case, it was in the dirstate before, so
+ // let's ignore the error.
+ continue;
+ }
+ _ => return Err(err.into()),
+ },
+ };
if let Some(mtime) =
TruncatedTimestamp::for_reliable_mtime_of(
&fs_metadata,
@@ -449,6 +489,7 @@
// Not updating the dirstate is not ideal but not critical:
// donโt keep our caller waiting until some other Mercurial
// process releases the lock.
+ log::info!("not writing dirstate from `status`: lock is held")
}
Err(LockError::Other(HgError::IoError { error, .. }))
if error.kind() == io::ErrorKind::PermissionDenied =>
@@ -528,6 +569,16 @@
}
}
+/// Outcome of the additional check for an ambiguous tracked file
+enum UnsureOutcome {
+ /// The file is actually clean
+ Clean,
+ /// The file has been modified
+ Modified,
+ /// The file was deleted on disk (or became another type of fs entry)
+ Deleted,
+}
+
/// Check if a file is modified by comparing actual repo store and file system.
///
/// This meant to be used for those that the dirstate cannot resolve, due
@@ -538,7 +589,7 @@
check_exec: bool,
manifest: &Manifest,
hg_path: &HgPath,
-) -> Result<bool, HgError> {
+) -> Result<UnsureOutcome, HgError> {
let vfs = working_directory_vfs;
let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
let fs_metadata = vfs.symlink_metadata(&fs_path)?;
@@ -567,7 +618,7 @@
};
if entry_flags != fs_flags {
- return Ok(true);
+ return Ok(UnsureOutcome::Modified);
}
let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?;
let fs_len = fs_metadata.len();
@@ -581,7 +632,7 @@
if filelog_entry.file_data_len_not_equal_to(fs_len) {
// No need to read file contents:
// it cannot be equal if it has a different length.
- return Ok(true);
+ return Ok(UnsureOutcome::Modified);
}
let p1_filelog_data = filelog_entry.data()?;
@@ -589,7 +640,7 @@
if p1_contents.len() as u64 != fs_len {
// No need to read file contents:
// it cannot be equal if it has a different length.
- return Ok(true);
+ return Ok(UnsureOutcome::Modified);
}
let fs_contents = if is_symlink {
@@ -597,5 +648,10 @@
} else {
vfs.read(fs_path)?
};
- Ok(p1_contents != &*fs_contents)
+
+ Ok(if p1_contents != &*fs_contents {
+ UnsureOutcome::Modified
+ } else {
+ UnsureOutcome::Clean
+ })
}
--- a/tests/run-tests.py Thu Mar 02 04:16:47 2023 +0100
+++ b/tests/run-tests.py Thu Mar 02 19:02:52 2023 +0100
@@ -1831,8 +1831,44 @@
pos = prepos = -1
- # True or False when in a true or false conditional section
- skipping = None
+ # The current stack of conditionnal section.
+ # Each relevant conditionnal section can have the following value:
+ # - True: we should run this block
+ # - False: we should skip this block
+ # - None: The parent block is skipped,
+ # (no branch of this one will ever run)
+ condition_stack = []
+
+ def run_line():
+ """return True if the current line should be run"""
+ if not condition_stack:
+ return True
+ return bool(condition_stack[-1])
+
+ def push_conditional_block(should_run):
+ """Push a new conditional context, with its initial state
+
+ i.e. entry a #if block"""
+ if not run_line():
+ condition_stack.append(None)
+ else:
+ condition_stack.append(should_run)
+
+ def flip_conditional():
+ """reverse the current condition state
+
+ i.e. enter a #else
+ """
+ assert condition_stack
+ if condition_stack[-1] is not None:
+ condition_stack[-1] = not condition_stack[-1]
+
+ def pop_conditional():
+ """exit the current skipping context
+
+ i.e. reach the #endif"""
+ assert condition_stack
+ condition_stack.pop()
# We keep track of whether or not we're in a Python block so we
# can generate the surrounding doctest magic.
@@ -1888,7 +1924,7 @@
after.setdefault(pos, []).append(
b' !!! invalid #require\n'
)
- if not skipping:
+ if run_line():
haveresult, message = self._hghave(lsplit[1:])
if not haveresult:
script = [b'echo "%s"\nexit 80\n' % message]
@@ -1898,21 +1934,19 @@
lsplit = l.split()
if len(lsplit) < 2 or lsplit[0] != b'#if':
after.setdefault(pos, []).append(b' !!! invalid #if\n')
- if skipping is not None:
- after.setdefault(pos, []).append(b' !!! nested #if\n')
- skipping = not self._iftest(lsplit[1:])
+ push_conditional_block(self._iftest(lsplit[1:]))
after.setdefault(pos, []).append(l)
elif l.startswith(b'#else'):
- if skipping is None:
+ if not condition_stack:
after.setdefault(pos, []).append(b' !!! missing #if\n')
- skipping = not skipping
+ flip_conditional()
after.setdefault(pos, []).append(l)
elif l.startswith(b'#endif'):
- if skipping is None:
+ if not condition_stack:
after.setdefault(pos, []).append(b' !!! missing #if\n')
- skipping = None
+ pop_conditional()
after.setdefault(pos, []).append(l)
- elif skipping:
+ elif not run_line():
after.setdefault(pos, []).append(l)
elif l.startswith(b' >>> '): # python inlines
after.setdefault(pos, []).append(l)
@@ -1957,7 +1991,7 @@
if inpython:
script.append(b'EOF\n')
- if skipping is not None:
+ if condition_stack:
after.setdefault(pos, []).append(b' !!! missing #endif\n')
addsalt(n + 1, False)
# Need to end any current per-command trace
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-dirstate-read-race.t Thu Mar 02 19:02:52 2023 +0100
@@ -0,0 +1,410 @@
+==============================================================================
+Check potential race conditions between a dirstate's read and other operations
+==============================================================================
+
+#testcases dirstate-v1 dirstate-v2-append dirstate-v2-rewrite
+#testcases pre-all-read pre-some-read
+
+Some commands, like `hg status`, do not need to take the wlock but need to
+access dirstate data.
+Other commands might update the dirstate data while this happens.
+
+This can create issues if repository data is read in the wrong order, or for
+the dirstate-v2 format where the data is contained in multiple files.
+
+This test file is meant to test various cases where such parallel operations
+happen and make sure the reading process behaves fine. We do so with a `hg
+status` command since it is probably the most advanced of such read-only
+command.
+
+It bears simililarity with `tests/test-dirstate-status-race.t ` but tests a
+different type of race.
+
+Setup
+=====
+
+ $ cat >> $HGRCPATH << EOF
+ > [storage]
+ > dirstate-v2.slow-path=allow
+ > EOF
+
+#if no-dirstate-v1
+ $ cat >> $HGRCPATH << EOF
+ > [format]
+ > use-dirstate-v2=yes
+ > EOF
+#else
+ $ cat >> $HGRCPATH << EOF
+ > [format]
+ > use-dirstate-v2=no
+ > EOF
+#endif
+
+#if dirstate-v2-rewrite
+ $ d2args="--config devel.dirstate.v2.data_update_mode=force-new"
+#endif
+#if dirstate-v2-append
+ $ d2args="--config devel.dirstate.v2.data_update_mode=force-append"
+#endif
+
+
+#if dirstate-v1
+ $ cfg="devel.sync.dirstate.pre-read-file"
+#else
+#if pre-all-read
+ $ cfg="devel.sync.dirstate.pre-read-file"
+#else
+ $ cfg="devel.sync.dirstate.post-docket-read-file"
+#endif
+#endif
+
+ $ directories="dir dir/nested dir2"
+ $ first_files="dir/nested/a dir/b dir/c dir/d dir2/e f"
+ $ second_files="g dir/nested/h dir/i dir/j dir2/k dir2/l dir/nested/m"
+ $ extra_files="dir/n dir/o p q"
+
+ $ hg init reference-repo
+ $ cd reference-repo
+ $ mkdir -p dir/nested dir2
+ $ touch -t 200001010000 $first_files $directories
+ $ hg commit -Aqm "recreate a bunch of files to facilitate dirstate-v2 append"
+ $ touch -t 200001010010 $second_files $directories
+ $ hg commit -Aqm "more files to have two commit"
+ $ hg log -G -v
+ @ changeset: 1:9a86dcbfb938
+ | tag: tip
+ | user: test
+ | date: Thu Jan 01 00:00:00 1970 +0000
+ | files: dir/i dir/j dir/nested/h dir/nested/m dir2/k dir2/l g
+ | description:
+ | more files to have two commit
+ |
+ |
+ o changeset: 0:4f23db756b09
+ user: test
+ date: Thu Jan 01 00:00:00 1970 +0000
+ files: dir/b dir/c dir/d dir/nested/a dir2/e f
+ description:
+ recreate a bunch of files to facilitate dirstate-v2 append
+
+
+ $ hg manifest
+ dir/b
+ dir/c
+ dir/d
+ dir/i
+ dir/j
+ dir/nested/a
+ dir/nested/h
+ dir/nested/m
+ dir2/e
+ dir2/k
+ dir2/l
+ f
+ g
+
+Add some unknown files and refresh the dirstate
+
+ $ touch -t 200001010020 $extra_files
+ $ hg add dir/o
+ $ hg remove dir/nested/m
+
+ $ hg st --config devel.dirstate.v2.data_update_mode=force-new
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ hg debugstate
+ n 644 0 2000-01-01 00:00:00 dir/b
+ n 644 0 2000-01-01 00:00:00 dir/c
+ n 644 0 2000-01-01 00:00:00 dir/d
+ n 644 0 2000-01-01 00:10:00 dir/i
+ n 644 0 2000-01-01 00:10:00 dir/j
+ n 644 0 2000-01-01 00:00:00 dir/nested/a
+ n 644 0 2000-01-01 00:10:00 dir/nested/h
+ r ?????????????????????????????????? dir/nested/m (glob)
+ a ?????????????????????????????????? dir/o (glob)
+ n 644 0 2000-01-01 00:00:00 dir2/e
+ n 644 0 2000-01-01 00:10:00 dir2/k
+ n 644 0 2000-01-01 00:10:00 dir2/l
+ n 644 0 2000-01-01 00:00:00 f
+ n 644 0 2000-01-01 00:10:00 g
+ $ hg debugstate > ../reference
+ $ cd ..
+
+Actual Testing
+==============
+
+Race with a `hg add`
+-------------------
+
+ $ cp -a reference-repo race-with-add
+ $ cd race-with-add
+
+spin a `hg status` with some caches to update
+
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config ${cfg}=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+Add a file
+
+ $ hg $d2args add dir/n
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The file should in a "added" state
+
+ $ hg status
+ A dir/n
+ A dir/o
+ R dir/nested/m
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+#if dirstate-v1
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/n
+ A dir/o
+ R dir/nested/m
+ ? p
+ ? q
+#else
+#if rhg pre-some-read dirstate-v2-append
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+#else
+#if rust no-rhg dirstate-v2-append
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+#else
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/n
+ A dir/o
+ R dir/nested/m
+ ? p
+ ? q
+#endif
+#endif
+#endif
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with a `hg commit`
+-----------------------
+
+ $ cp -a reference-repo race-with-commit
+ $ cd race-with-commit
+
+spin a `hg status with some cache to update
+
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config ${cfg}=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+Add a do a commit
+
+ $ hg status
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ hg $d2args commit -m 'racing commit'
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+commit was created, and status is now clean
+
+ $ hg log -GT '{node|short} {desc}\n'
+ @ 02a67a77ee9b racing commit
+ |
+ o 9a86dcbfb938 more files to have two commit
+ |
+ o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append
+
+ $ hg status
+ ? dir/n
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+#if rust no-rhg dirstate-v2-append
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+#else
+#if rhg pre-some-read dirstate-v2-append
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+#else
+ $ cat $TESTTMP/status-race-lock.out
+ M dir/o (no-rhg known-bad-output !)
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+ warning: ignoring unknown working parent 02a67a77ee9b! (no-rhg !)
+#endif
+#endif
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with a `hg update`
+-----------------------
+
+ $ cp -a reference-repo race-with-update
+ $ cd race-with-update
+
+spin a `hg status` with some caches to update
+
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config ${cfg}=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+do an update
+
+ $ hg status
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ hg log -GT '{node|short} {desc}\n'
+ @ 9a86dcbfb938 more files to have two commit
+ |
+ o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append
+
+ $ hg $d2args update --merge ".~1"
+ 0 files updated, 0 files merged, 6 files removed, 0 files unresolved
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+ $ hg log -GT '{node|short} {desc}\n'
+ o 9a86dcbfb938 more files to have two commit
+ |
+ @ 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append
+
+ $ hg status
+ A dir/o
+ ? dir/n
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+#if rhg dirstate-v2-append pre-some-read
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ! dir/i
+ ! dir/j
+ ! dir/nested/h
+ ! dir2/k
+ ! dir2/l
+ ! g
+ ? dir/n
+ ? p
+ ? q
+#else
+#if rust no-rhg dirstate-v2-append
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ! dir/i
+ ! dir/j
+ ! dir/nested/h
+ ! dir2/k
+ ! dir2/l
+ ! g
+ ? dir/n
+ ? p
+ ? q
+#else
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ ? dir/n
+ ? p
+ ? q
+#endif
+#endif
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with a cache updating `hg status`
+--------------------------------------
+
+It is interesting to race with "read-only" operation (that still update its cache)
+
+ $ cp -a reference-repo race-with-status
+ $ cd race-with-status
+
+spin a `hg status` with some caches to update
+
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config ${cfg}=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+do an update
+
+ $ touch -t 200001020006 f
+ $ hg $d2args status
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-dirstate-status-write-race.t Thu Mar 02 19:02:52 2023 +0100
@@ -0,0 +1,460 @@
+=====================================================================
+Check potential race conditions between a status and other operations
+=====================================================================
+
+#testcases dirstate-v1 dirstate-v2-append dirstate-v2-rewrite
+
+The `hg status` command can run without the wlock, however it might end up
+having to update the on-disk dirstate files, for example to mark ambiguous
+files as clean, or to update directory caches information with dirstate-v2.
+
+
+If another process updates the dirstate in the meantime we might run into
+trouble. Especially, commands doing semantic changes like `hg add` or
+ `hg commit` should not see their update erased by a concurrent status.
+
+Unlike commands like `add` or `commit`, `status` only writes the dirstate
+to update caches, no actual information is lost if we fail to write to disk.
+
+
+This test file is meant to test various cases where such parallel operations
+between a status with reasons to update the dirstate and another semantic
+changes happen.
+
+
+Setup
+=====
+
+ $ cat >> $HGRCPATH << EOF
+ > [storage]
+ > dirstate-v2.slow-path=allow
+ > EOF
+
+#if no-dirstate-v1
+ $ cat >> $HGRCPATH << EOF
+ > [format]
+ > use-dirstate-v2=yes
+ > EOF
+#else
+ $ cat >> $HGRCPATH << EOF
+ > [format]
+ > use-dirstate-v2=no
+ > EOF
+#endif
+
+#if dirstate-v2-rewrite
+ $ d2args="--config devel.dirstate.v2.data_update_mode=force-new"
+#endif
+#if dirstate-v2-append
+ $ d2args="--config devel.dirstate.v2.data_update_mode=force-append"
+#endif
+
+ $ directories="dir dir/nested dir2"
+ $ first_files="dir/nested/a dir/b dir/c dir/d dir2/e f"
+ $ second_files="g dir/nested/h dir/i dir/j dir2/k dir2/l dir/nested/m"
+ $ extra_files="dir/n dir/o p q"
+
+ $ hg init reference-repo
+ $ cd reference-repo
+ $ mkdir -p dir/nested dir2
+ $ touch -t 200001010000 $first_files $directories
+ $ hg commit -Aqm "recreate a bunch of files to facilitate dirstate-v2 append"
+ $ touch -t 200001010010 $second_files $directories
+ $ hg commit -Aqm "more files to have two commits"
+ $ hg log -G -v
+ @ changeset: 1:c349430a1631
+ | tag: tip
+ | user: test
+ | date: Thu Jan 01 00:00:00 1970 +0000
+ | files: dir/i dir/j dir/nested/h dir/nested/m dir2/k dir2/l g
+ | description:
+ | more files to have two commits
+ |
+ |
+ o changeset: 0:4f23db756b09
+ user: test
+ date: Thu Jan 01 00:00:00 1970 +0000
+ files: dir/b dir/c dir/d dir/nested/a dir2/e f
+ description:
+ recreate a bunch of files to facilitate dirstate-v2 append
+
+
+ $ hg manifest
+ dir/b
+ dir/c
+ dir/d
+ dir/i
+ dir/j
+ dir/nested/a
+ dir/nested/h
+ dir/nested/m
+ dir2/e
+ dir2/k
+ dir2/l
+ f
+ g
+
+Add some unknown files and refresh the dirstate
+
+ $ touch -t 200001010020 $extra_files
+ $ hg add dir/o
+ $ hg remove dir/nested/m
+
+ $ hg st --config devel.dirstate.v2.data_update_mode=force-new
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ hg debugstate
+ n 644 0 2000-01-01 00:00:00 dir/b
+ n 644 0 2000-01-01 00:00:00 dir/c
+ n 644 0 2000-01-01 00:00:00 dir/d
+ n 644 0 2000-01-01 00:10:00 dir/i
+ n 644 0 2000-01-01 00:10:00 dir/j
+ n 644 0 2000-01-01 00:00:00 dir/nested/a
+ n 644 0 2000-01-01 00:10:00 dir/nested/h
+ r ?????????????????????????????????? dir/nested/m (glob)
+ a ?????????????????????????????????? dir/o (glob)
+ n 644 0 2000-01-01 00:00:00 dir2/e
+ n 644 0 2000-01-01 00:10:00 dir2/k
+ n 644 0 2000-01-01 00:10:00 dir2/l
+ n 644 0 2000-01-01 00:00:00 f
+ n 644 0 2000-01-01 00:10:00 g
+ $ hg debugstate > ../reference
+ $ cd ..
+
+Explain / verify the test principles
+------------------------------------
+
+First, we can properly copy the reference
+
+ $ cp -a reference-repo sanity-check
+ $ cd sanity-check
+ $ hg debugstate
+ n 644 0 2000-01-01 00:00:00 dir/b
+ n 644 0 2000-01-01 00:00:00 dir/c
+ n 644 0 2000-01-01 00:00:00 dir/d
+ n 644 0 2000-01-01 00:10:00 dir/i
+ n 644 0 2000-01-01 00:10:00 dir/j
+ n 644 0 2000-01-01 00:00:00 dir/nested/a
+ n 644 0 2000-01-01 00:10:00 dir/nested/h
+ r ?????????????????????????????????? dir/nested/m (glob)
+ a ?????????????????????????????????? dir/o (glob)
+ n 644 0 2000-01-01 00:00:00 dir2/e
+ n 644 0 2000-01-01 00:10:00 dir2/k
+ n 644 0 2000-01-01 00:10:00 dir2/l
+ n 644 0 2000-01-01 00:00:00 f
+ n 644 0 2000-01-01 00:10:00 g
+ $ hg debugstate > ../post-copy
+ $ diff ../reference ../post-copy
+
+And status thinks the cache is in a proper state
+
+ $ hg st
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ hg debugstate
+ n 644 0 2000-01-01 00:00:00 dir/b
+ n 644 0 2000-01-01 00:00:00 dir/c
+ n 644 0 2000-01-01 00:00:00 dir/d
+ n 644 0 2000-01-01 00:10:00 dir/i
+ n 644 0 2000-01-01 00:10:00 dir/j
+ n 644 0 2000-01-01 00:00:00 dir/nested/a
+ n 644 0 2000-01-01 00:10:00 dir/nested/h
+ r ?????????????????????????????????? dir/nested/m (glob)
+ a ?????????????????????????????????? dir/o (glob)
+ n 644 0 2000-01-01 00:00:00 dir2/e
+ n 644 0 2000-01-01 00:10:00 dir2/k
+ n 644 0 2000-01-01 00:10:00 dir2/l
+ n 644 0 2000-01-01 00:00:00 f
+ n 644 0 2000-01-01 00:10:00 g
+ $ hg debugstate > ../post-status
+ $ diff ../reference ../post-status
+
+Then we can start a status that:
+- has some update to do (the touch call)
+- will wait AFTER running status, but before updating the cache on disk
+
+ $ touch -t 200001010001 dir/c
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+We check it runs the status first by modifying a file and updating another timestamp
+
+ $ touch -t 200001010003 dir/i
+ $ echo babar > dir/j
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The test process should have reported a status before the change we made,
+and should have missed the timestamp update
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+ $ hg debugstate | grep dir/c
+ n 644 0 2000-01-01 00:01:00 dir/c
+ $ hg debugstate | grep dir/i
+ n 644 0 2000-01-01 00:10:00 dir/i
+ $ hg debugstate | grep dir/j
+ n 644 0 2000-01-01 00:10:00 dir/j
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Actual Testing
+==============
+
+Race with a `hg add`
+-------------------
+
+ $ cp -a reference-repo race-with-add
+ $ cd race-with-add
+
+spin a `hg status` with some caches to update
+
+ $ touch -t 200001020001 f
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+Add a file
+
+ $ hg $d2args add dir/n
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The file should in a "added" state
+
+ $ hg status
+ A dir/n
+ A dir/o
+ R dir/nested/m
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with a `hg commit`
+----------------------
+
+ $ cp -a reference-repo race-with-commit
+ $ cd race-with-commit
+
+spin a `hg status` with some caches to update
+
+ $ touch -t 200001020001 dir/j
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+Add a file and force the data file rewrite
+
+ $ hg $d2args commit -m created-during-status dir/o
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The parent must change and the status should be clean
+
+ $ hg summary
+ parent: 2:2e3b442a2fd4 tip
+ created-during-status
+ branch: default
+ commit: 1 removed, 3 unknown
+ update: (current)
+ phases: 3 draft
+ $ hg status
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with a `hg update`
+----------------------
+
+ $ cp -a reference-repo race-with-update
+ $ cd race-with-update
+
+spin a `hg status` with some caches to update
+
+ $ touch -t 200001020001 dir2/k
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+Add a file and force the data file rewrite
+
+ $ hg $d2args update ".~1"
+ 0 files updated, 0 files merged, 6 files removed, 0 files unresolved
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+The parent must change and the status should be clean
+
+ $ hg summary
+ parent: 0:4f23db756b09
+ recreate a bunch of files to facilitate dirstate-v2 append
+ branch: default
+ commit: 1 added, 3 unknown (new branch head)
+ update: 1 new changesets (update)
+ phases: 2 draft
+ $ hg status
+ A dir/o
+ ? dir/n
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with another status
+------------------------
+
+ $ cp -a reference-repo race-with-status
+ $ cd race-with-status
+
+spin a `hg status` with some caches to update
+
+ $ touch -t 200001010030 dir/nested/h
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+touch g
+
+ $ touch -t 200001010025 g
+ $ hg $d2args status
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+the first update should be on disk
+
+ $ hg debugstate --all | grep "g"
+ n 644 0 2000-01-01 00:25:00 g
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
+
+Race with the removal of an ambiguous file
+----------------------รจ-------------------
+
+ $ cp -a reference-repo race-with-remove
+ $ cd race-with-remove
+
+spin a `hg status` with some caches to update
+
+ $ touch -t 200001010035 dir2/l
+ $ hg st >$TESTTMP/status-race-lock.out 2>$TESTTMP/status-race-lock.log \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.sync.status.pre-dirstate-write-file=$TESTTMP/status-race-lock \
+ > &
+ $ $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/status-race-lock.waiting
+
+remove that same file
+
+ $ hg $d2args remove dir2/l
+ $ touch $TESTTMP/status-race-lock
+ $ wait
+
+file should be marked as removed
+
+ $ hg status
+ A dir/o
+ R dir/nested/m
+ R dir2/l
+ ? dir/n
+ ? p
+ ? q
+
+The status process should return a consistent result and not crash.
+
+ $ cat $TESTTMP/status-race-lock.out
+ A dir/o
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
+ $ cat $TESTTMP/status-race-lock.log
+
+final cleanup
+
+ $ rm $TESTTMP/status-race-lock $TESTTMP/status-race-lock.waiting
+ $ cd ..
--- a/tests/test-dirstate.t Thu Mar 02 04:16:47 2023 +0100
+++ b/tests/test-dirstate.t Thu Mar 02 19:02:52 2023 +0100
@@ -148,7 +148,7 @@
$ hg init append-mostly
$ cd append-mostly
$ mkdir dir dir2
- $ touch dir/a dir/b dir/c dir/d dir/e dir2/f
+ $ touch -t 200001010000 dir/a dir/b dir/c dir/d dir/e dir2/f dir dir2
$ hg commit -Aqm initial
$ hg st
$ dirstate_data_files | wc -l
@@ -163,12 +163,11 @@
$ dirstate_uuid_has_not_changed
not testing because using Python implementation (no-rust no-rhg !)
-Trigger an append with a small change
+Trigger an append with a small change to directory mtime
$ current_data_size=$(find_dirstate_data_size)
- $ rm dir2/f
+ $ touch -t 201001010000 dir2
$ hg st
- ! dir2/f
$ dirstate_data_files | wc -l
*1 (re)
$ dirstate_uuid_has_not_changed
@@ -187,7 +186,6 @@
Trigger a rust/rhg run which updates the unused bytes value
$ hg st
A file
- ! dir2/f
$ dirstate_data_files | wc -l
*1 (re)
$ dirstate_uuid_has_not_changed
@@ -213,8 +211,223 @@
#endif
+(non-Rust always rewrites)
+
+Test the devel option to control write behavior
+==============================================
+
+Sometimes, debugging or testing the dirstate requires making sure that we have
+done a complete rewrite of the data file and have no unreachable data around,
+sometimes it requires we ensure we don't.
+
+We test the option to force this rewrite by creating the situation where an
+append would happen and check that it doesn't happen.
+
+ $ cd ..
+ $ hg init force-base
+ $ cd force-base
+ $ mkdir -p dir/nested dir2
+ $ touch -t 200001010000 f dir/nested/a dir/b dir/c dir/d dir2/e dir/nested dir dir2
+ $ hg commit -Aqm "recreate a bunch of files to facilitate append"
+ $ hg st --config devel.dirstate.v2.data_update_mode=force-new
+ $ cd ..
+
+#if dirstate-v2
+ $ hg -R force-base debugstate --docket | grep unused
+ number of unused bytes: 0
+
+Check with the option in "auto" mode
+------------------------------------
+ $ cp -a force-base append-mostly-no-force-rewrite
+ $ cd append-mostly-no-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID hasn't changed and a non-zero number of unused bytes means we've appended
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+ $ cp -a force-base append-mostly-force-rewrite
+ $ cd append-mostly-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, but we force the rewrite
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and zero unused bytes means a full-rewrite happened
+
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0
+ $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+(should behave the same as "auto" here)
+
+ $ cp -a force-base append-mostly-force-append
+ $ cd append-mostly-force-append
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, which we are forcing here anyway.
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+Check with the option in "auto" mode
+------------------------------------
+ $ cp -a force-base append-mostly-no-force-rewrite
+ $ cd append-mostly-no-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID has changed and zero unused bytes means we've rewritten.
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+ $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+(should be the same as auto)
+
+ $ cp -a force-base append-mostly-force-rewrite
+ $ cd append-mostly-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and a zero number unused bytes means we've rewritten.
+
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0
+ $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+Should append even if "auto" did not
+
+ $ cp -a force-base append-mostly-force-append
+ $ cd append-mostly-force-append
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation is never appending at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+
+
+Get back into a state suitable for the test of the file.
+
+ $ cd ./append-mostly
+
+#else
+ $ cd ./u
+#endif
+
Transaction compatibility
--------------------------
+=========================
The transaction preserves the dirstate.
We should make sure all of it (docket + data) is preserved
--- a/tests/test-patchbomb.t Thu Mar 02 04:16:47 2023 +0100
+++ b/tests/test-patchbomb.t Thu Mar 02 19:02:52 2023 +0100
@@ -183,6 +183,43 @@
+a
+Test --git is respected
+ $ hg email --git --date '1970-1-1 0:1' -n -f quux -t foo -c bar -r tip
+ this patch series consists of 1 patches.
+
+
+ displaying [PATCH] a ...
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: [PATCH] a
+ X-Mercurial-Node: 8580ff50825a50c8f716709acdf8de0deddcd6ab
+ X-Mercurial-Series-Index: 1
+ X-Mercurial-Series-Total: 1
+ Message-Id: <8580ff50825a50c8f716.60@test-hostname>
+ X-Mercurial-Series-Id: <8580ff50825a50c8f716.60@test-hostname>
+ User-Agent: Mercurial-patchbomb/* (glob)
+ Date: Thu, 01 Jan 1970 00:01:00 +0000
+ From: quux
+ To: foo
+ Cc: bar
+
+ # HG changeset patch
+ # User test
+ # Date 1 0
+ # Thu Jan 01 00:00:01 1970 +0000
+ # Node ID 8580ff50825a50c8f716709acdf8de0deddcd6ab
+ # Parent 0000000000000000000000000000000000000000
+ a
+
+ diff --git a/a b/a
+ new file mode 100644
+ --- /dev/null
+ +++ b/a
+ @@ -0,0 +1,1 @@
+ +a
+
+
Test breaking format changes aren't
$ hg --config diff.noprefix=True email --date '1970-1-1 0:1' -n -f quux -t foo -c bar -r tip