dirstate-v2: add devel config option to control write behavior
This will help us to write predictable tests checking behavior in each case.
--- a/mercurial/configitems.py Fri Feb 24 18:21:54 2023 +0100
+++ b/mercurial/configitems.py Tue Feb 28 15:49:53 2023 +0100
@@ -640,6 +640,15 @@
b'deprec-warn',
default=False,
)
+# possible values:
+# - auto (the default)
+# - force-append
+# - force-new
+coreconfigitem(
+ b'devel',
+ b'dirstate.v2.data_update_mode',
+ default="auto",
+)
coreconfigitem(
b'devel',
b'disableloaddefaultcerts',
--- a/mercurial/dirstatemap.py Fri Feb 24 18:21:54 2023 +0100
+++ b/mercurial/dirstatemap.py Tue Feb 28 15:49:53 2023 +0100
@@ -33,6 +33,7 @@
WRITE_MODE_AUTO = 0
WRITE_MODE_FORCE_NEW = 1
+WRITE_MODE_FORCE_APPEND = 2
class _dirstatemapcommon:
@@ -57,6 +58,16 @@
self._parents = None
self._dirtyparents = False
self._docket = None
+ write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode")
+ if write_mode == b"auto":
+ self._write_mode = WRITE_MODE_AUTO
+ elif write_mode == b"force-append":
+ self._write_mode = WRITE_MODE_FORCE_APPEND
+ elif write_mode == b"force-new":
+ self._write_mode = WRITE_MODE_FORCE_NEW
+ else:
+ # unknown value, fallback to default
+ self._write_mode = WRITE_MODE_AUTO
# for consistent view between _pl() and _read() invocations
self._pendingmode = None
@@ -612,7 +623,7 @@
return
# We can only append to an existing data file if there is one
- write_mode = WRITE_MODE_AUTO
+ write_mode = self._write_mode
if self.docket.uuid is None:
write_mode = WRITE_MODE_FORCE_NEW
packed, meta, append = self._map.write_v2(write_mode)
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 28 15:49:53 2023 +0100
@@ -42,6 +42,7 @@
pub enum DirstateMapWriteMode {
Auto,
ForceNewDataFile,
+ ForceAppend,
}
#[derive(Debug)]
@@ -69,6 +70,9 @@
pub(super) old_data_size: usize,
pub(super) dirstate_version: DirstateVersion,
+
+ /// Controlled by config option `devel.dirstate.v2.data_update_mode`
+ pub(super) write_mode: DirstateMapWriteMode,
}
/// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -457,6 +461,7 @@
unreachable_bytes: 0,
old_data_size: 0,
dirstate_version: DirstateVersion::V1,
+ write_mode: DirstateMapWriteMode::Auto,
}
}
@@ -525,8 +530,15 @@
/// append to the existing data file that contains `self.on_disk` (true),
/// or create a new data file from scratch (false).
pub(super) fn write_should_append(&self) -> bool {
- let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
- ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+ match self.write_mode {
+ DirstateMapWriteMode::ForceAppend => true,
+ DirstateMapWriteMode::ForceNewDataFile => false,
+ DirstateMapWriteMode::Auto => {
+ let ratio =
+ self.unreachable_bytes as f32 / self.on_disk.len() as f32;
+ ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+ }
+ }
}
fn get_node<'tree>(
@@ -923,6 +935,10 @@
*unreachable_bytes += path.len() as u32
}
}
+
+ pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
+ self.write_mode = write_mode;
+ }
}
/// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Feb 28 15:49:53 2023 +0100
@@ -313,6 +313,7 @@
unreachable_bytes: meta.unreachable_bytes.get(),
old_data_size: on_disk.len(),
dirstate_version: DirstateVersion::V2,
+ write_mode: DirstateMapWriteMode::Auto,
};
Ok(dirstate_map)
}
@@ -641,6 +642,7 @@
let append = match write_mode {
DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
DirstateMapWriteMode::ForceNewDataFile => false,
+ DirstateMapWriteMode::ForceAppend => true,
};
if append {
log::trace!("appending to the dirstate data file");
--- a/rust/hg-core/src/repo.rs Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/repo.rs Tue Feb 28 15:49:53 2023 +0100
@@ -321,22 +321,39 @@
.set(Some(docket.uuid.to_owned()));
let data_size = docket.data_size();
let metadata = docket.tree_metadata();
- if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
- // Don't mmap on NFS to prevent `SIGBUS` error on deletion
- OwningDirstateMap::new_v2(
- self.hg_vfs().read(docket.data_filename())?,
- data_size,
- metadata,
- )
- } else if let Some(data_mmap) = self
- .hg_vfs()
- .mmap_open(docket.data_filename())
- .io_not_found_as_none()?
- {
- OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
- } else {
- OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
- }
+ let mut map =
+ if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
+ // Don't mmap on NFS to prevent `SIGBUS` error on deletion
+ OwningDirstateMap::new_v2(
+ self.hg_vfs().read(docket.data_filename())?,
+ data_size,
+ metadata,
+ )
+ } else if let Some(data_mmap) = self
+ .hg_vfs()
+ .mmap_open(docket.data_filename())
+ .io_not_found_as_none()?
+ {
+ OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
+ } else {
+ OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
+ }?;
+
+ let write_mode_config = self
+ .config()
+ .get_str(b"devel", b"dirstate.v2.data_update_mode")
+ .unwrap_or(Some("auto"))
+ .unwrap_or("auto"); // don't bother for devel options
+ let write_mode = match write_mode_config {
+ "auto" => DirstateMapWriteMode::Auto,
+ "force-new" => DirstateMapWriteMode::ForceNewDataFile,
+ "force-append" => DirstateMapWriteMode::ForceAppend,
+ _ => DirstateMapWriteMode::Auto,
+ };
+
+ map.with_dmap_mut(|m| m.set_write_mode(write_mode));
+
+ Ok(map)
} else {
let (map, parents) =
OwningDirstateMap::new_v1(dirstate_file_contents)?;
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Tue Feb 28 15:49:53 2023 +0100
@@ -254,6 +254,7 @@
let rust_write_mode = match write_mode {
0 => DirstateMapWriteMode::Auto,
1 => DirstateMapWriteMode::ForceNewDataFile,
+ 2 => DirstateMapWriteMode::ForceAppend,
_ => DirstateMapWriteMode::Auto, // XXX should we error out?
};
let result = inner.pack_v2(rust_write_mode);
--- a/tests/test-dirstate-status-write-race.t Fri Feb 24 18:21:54 2023 +0100
+++ b/tests/test-dirstate-status-write-race.t Tue Feb 28 15:49:53 2023 +0100
@@ -93,7 +93,7 @@
$ hg add dir/o
$ hg remove dir/nested/m
- $ hg st
+ $ hg st --config devel.dirstate.v2.data_update_mode=force-new
A dir/o
R dir/nested/m
? dir/n
--- a/tests/test-dirstate.t Fri Feb 24 18:21:54 2023 +0100
+++ b/tests/test-dirstate.t Tue Feb 28 15:49:53 2023 +0100
@@ -211,8 +211,223 @@
#endif
+(non-Rust always rewrites)
+
+Test the devel option to control write behavior
+==============================================
+
+Sometimes, debugging or testing the dirstate requires making sure that we have
+done a complete rewrite of the data file and have no unreachable data around,
+sometimes it requires we ensure we don't.
+
+We test the option to force this rewrite by creating the situation where an
+append would happen and check that it doesn't happen.
+
+ $ cd ..
+ $ hg init force-base
+ $ cd force-base
+ $ mkdir -p dir/nested dir2
+ $ touch -t 200001010000 f dir/nested/a dir/b dir/c dir/d dir2/e dir/nested dir dir2
+ $ hg commit -Aqm "recreate a bunch of files to facilitate append"
+ $ hg st --config devel.dirstate.v2.data_update_mode=force-new
+ $ cd ..
+
+#if dirstate-v2
+ $ hg -R force-base debugstate --docket | grep unused
+ number of unused bytes: 0
+
+Check with the option in "auto" mode
+------------------------------------
+ $ cp -a force-base append-mostly-no-force-rewrite
+ $ cd append-mostly-no-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID hasn't changed and a non-zero number of unused bytes means we've appended
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+ $ cp -a force-base append-mostly-force-rewrite
+ $ cd append-mostly-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, but we force the rewrite
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and zero unused bytes means a full-rewrite happened
+
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0
+ $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+(should behave the same as "auto" here)
+
+ $ cp -a force-base append-mostly-force-append
+ $ cd append-mostly-force-append
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, which we are forcing here anyway.
+
+ $ touch -t 202212010000 dir2
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+Check with the option in "auto" mode
+------------------------------------
+ $ cp -a force-base append-mostly-no-force-rewrite
+ $ cd append-mostly-no-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID has changed and zero unused bytes means we've rewritten.
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+ $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+(should be the same as auto)
+
+ $ cp -a force-base append-mostly-force-rewrite
+ $ cd append-mostly-force-rewrite
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and a zero number unused bytes means we've rewritten.
+
+
+#if no-rust no-rhg
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation
+#else
+ $ dirstate_uuid_has_not_changed
+ [1]
+#endif
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0
+ $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+Should append even if "auto" did not
+
+ $ cp -a force-base append-mostly-force-append
+ $ cd append-mostly-force-append
+ $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+ $ touch -t 202212010005 `hg files`
+ $ hg st \
+ > --config rhg.on-unsupported=abort \
+ > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+ $ dirstate_uuid_has_not_changed
+ not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation is never appending at the time this is written.
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: 0 (known-bad-output !)
+#else
+ $ hg debugstate --docket | grep unused
+ number of unused bytes: [1-9]\d* (re)
+#endif
+ $ cd ..
+
+
+
+Get back into a state suitable for the test of the file.
+
+ $ cd ./append-mostly
+
+#else
+ $ cd ./u
+#endif
+
Transaction compatibility
--------------------------
+=========================
The transaction preserves the dirstate.
We should make sure all of it (docket + data) is preserved