update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
--- a/hgext/eol.py Wed Oct 16 19:14:30 2024 +0200
+++ b/hgext/eol.py Tue Oct 01 13:49:11 2024 +0200
@@ -101,6 +101,7 @@
error as errormod,
extensions,
match,
+ merge,
pycompat,
registrar,
scmutil,
@@ -376,6 +377,7 @@
def reposetup(ui, repo):
uisetup(repo.ui)
+ merge.MAYBE_USE_RUST_UPDATE = False
if not repo.local():
return
--- a/hgext/journal.py Wed Oct 16 19:14:30 2024 +0200
+++ b/hgext/journal.py Tue Oct 01 13:49:11 2024 +0200
@@ -34,6 +34,7 @@
localrepo,
lock,
logcmdutil,
+ merge,
pycompat,
registrar,
util,
@@ -68,6 +69,7 @@
# Journal recording, register hooks and storage object
def extsetup(ui):
+ merge.MAYBE_USE_RUST_UPDATE = False
extensions.wrapfunction(dispatch, 'runcommand', runcommand)
extensions.wrapfunction(bookmarks.bmstore, '_write', recordbookmarks)
extensions.wrapfilecache(
--- a/hgext/keyword.py Wed Oct 16 19:14:30 2024 +0200
+++ b/hgext/keyword.py Tue Oct 01 13:49:11 2024 +0200
@@ -102,6 +102,7 @@
localrepo,
logcmdutil,
match,
+ merge,
patch,
pathutil,
pycompat,
@@ -818,6 +819,7 @@
extensions.wrapfunction(cmdutil, 'dorecord', kw_dorecord)
for c in nokwwebcommands.split():
extensions.wrapfunction(webcommands, c, kwweb_skip)
+ merge.MAYBE_USE_RUST_UPDATE = False
def reposetup(ui, repo):
--- a/hgext/largefiles/__init__.py Wed Oct 16 19:14:30 2024 +0200
+++ b/hgext/largefiles/__init__.py Tue Oct 01 13:49:11 2024 +0200
@@ -114,6 +114,7 @@
exthelper,
hg,
localrepo,
+ merge,
wireprotov1server,
)
@@ -165,6 +166,7 @@
@eh.uisetup
def _uisetup(ui):
+ merge.MAYBE_USE_RUST_UPDATE = False
localrepo.featuresetupfuncs.add(featuresetup)
hg.wirepeersetupfuncs.append(proto.wirereposetup)
--- a/hgext/lfs/__init__.py Wed Oct 16 19:14:30 2024 +0200
+++ b/hgext/lfs/__init__.py Tue Oct 01 13:49:11 2024 +0200
@@ -138,6 +138,7 @@
filesetlang,
localrepo,
logcmdutil,
+ merge,
minifileset,
pycompat,
revlog,
@@ -234,6 +235,7 @@
@eh.uisetup
def _uisetup(ui):
+ merge.MAYBE_USE_RUST_UPDATE = False
localrepo.featuresetupfuncs.add(featuresetup)
--- a/mercurial/merge.py Wed Oct 16 19:14:30 2024 +0200
+++ b/mercurial/merge.py Tue Oct 01 13:49:11 2024 +0200
@@ -8,6 +8,7 @@
from __future__ import annotations
import collections
+import os
import struct
import typing
from typing import Dict, Optional, Tuple
@@ -34,12 +35,15 @@
pathutil,
policy,
pycompat,
+ requirements,
scmutil,
subrepoutil,
util,
worker,
)
+rust_update_mod = policy.importrust("update")
+
_pack = struct.pack
_unpack = struct.unpack
@@ -147,6 +151,8 @@
dircache = dict()
dirstate = repo.dirstate
wvfs = repo.wvfs
+ # wouldn't it be easier to loop over unknown files (and dirs)?
+
if not force:
def collectconflicts(conflicts, config):
@@ -1835,6 +1841,12 @@
UPDATECHECK_LINEAR = b'linear'
UPDATECHECK_NO_CONFLICT = b'noconflict'
+# Let extensions turn off any Rust code in the update code if that interferes
+# will their patching.
+# This being `True` does not mean that you have Rust extensions installed or
+# that the Rust path will be taken for any given invocation.
+MAYBE_USE_RUST_UPDATE = True
+
def _update(
repo,
@@ -2008,6 +2020,60 @@
if not branchmerge and not wc.dirty(missing=True):
followcopies = False
+ update_from_null = False
+ update_from_null_fallback = False
+ if (
+ MAYBE_USE_RUST_UPDATE
+ and rust_update_mod is not None
+ and p1.rev() == nullrev
+ and not branchmerge
+ # TODO it's probably not too hard to pass down the transaction and
+ # respect the write patterns from Rust. But since it doesn't affect
+ # a simple update from null, then it doesn't matter yet.
+ and repo.currenttransaction() is None
+ and matcher is None
+ and not wc.mergestate().active()
+ and b'.hgsubstate' not in p2
+ ):
+ working_dir_iter = os.scandir(repo.root)
+ maybe_hg_folder = next(working_dir_iter)
+ assert maybe_hg_folder is not None
+ if maybe_hg_folder.name == b".hg":
+ try:
+ next(working_dir_iter)
+ except StopIteration:
+ update_from_null = True
+
+ if update_from_null:
+ # Check the narrowspec and sparsespec here to display warnings
+ # more easily.
+ # TODO figure out of a way of bubbling up warnings to Python
+ # while not polluting the Rust code (probably a channel)
+ repo.narrowmatch()
+ sparse.matcher(repo, [nullrev, p2.rev()])
+ repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
+ # note that we're in the middle of an update
+ repo.vfs.write(b'updatestate', p2.hex())
+ try:
+ updated_count = rust_update_mod.update_from_null(
+ repo.root, p2.rev()
+ )
+ except rust_update_mod.FallbackError:
+ update_from_null_fallback = True
+ else:
+ # We've changed the dirstate from Rust, we need to tell Python
+ repo.dirstate.invalidate()
+ # This includes setting the parents, since they are not read
+ # again on invalidation
+ with repo.dirstate.changing_parents(repo):
+ repo.dirstate.setparents(fp2)
+ repo.dirstate.setbranch(p2.branch(), repo.currenttransaction())
+ sparse.prunetemporaryincludes(repo)
+ repo.hook(b'update', parent1=xp1, parent2=xp2, error=0)
+ # update completed, clear state
+ util.unlink(repo.vfs.join(b'updatestate'))
+ return updateresult(updated_count, 0, 0, 0)
+
### calculate phase
mresult = calculateupdates(
repo,
@@ -2131,11 +2197,13 @@
# the dirstate.
always = matcher is None or matcher.always()
updatedirstate = updatedirstate and always and not wc.isinmemory()
- if updatedirstate:
+ # If we're in the fallback case, we've already done this
+ if updatedirstate and not update_from_null_fallback:
repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
# note that we're in the middle of an update
repo.vfs.write(b'updatestate', p2.hex())
+ # TODO don't run if Rust is available
_advertisefsmonitor(
repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node()
)
@@ -2172,14 +2240,14 @@
mergestatemod.recordupdates(
repo, mresult.actionsdict, branchmerge, getfiledata
)
- # update completed, clear state
- util.unlink(repo.vfs.join(b'updatestate'))
-
if not branchmerge:
repo.dirstate.setbranch(
p2.branch(), repo.currenttransaction()
)
+ # update completed, clear state
+ util.unlink(repo.vfs.join(b'updatestate'))
+
# If we're updating to a location, clean up any stale temporary includes
# (ex: this happens during hg rebase --abort).
if not branchmerge:
--- a/rust/Cargo.lock Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/Cargo.lock Tue Oct 01 13:49:11 2024 +0200
@@ -650,6 +650,7 @@
"thread_local",
"toml",
"twox-hash",
+ "uuid",
"zstd",
]
@@ -1530,6 +1531,15 @@
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
+name = "uuid"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
+dependencies = [
+ "getrandom 0.2.8",
+]
+
+[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/hg-core/Cargo.toml Tue Oct 01 13:49:11 2024 +0200
@@ -44,6 +44,7 @@
chrono = "0.4.34"
dyn-clone = "1.0.16"
filetime = "0.2.23"
+uuid = { version = "1.10", features = ["v4"] }
# We don't use the `miniz-oxide` backend to not change rhg benchmarks and until
# we have a clearer view of which backend is the fastest.
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 01 13:49:11 2024 +0200
@@ -8,7 +8,9 @@
self, DirstateMap, DirstateMapWriteMode, NodeRef,
};
use crate::dirstate_tree::path_with_basename::WithBasename;
-use crate::errors::HgError;
+use crate::errors::{HgError, IoResultExt};
+use crate::repo::Repo;
+use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
use crate::utils::hg_path::HgPath;
use crate::DirstateEntry;
use crate::DirstateError;
@@ -20,6 +22,7 @@
use rand::Rng;
use std::borrow::Cow;
use std::fmt::Write;
+use uuid::Uuid;
use super::dirstate_map::DirstateIdentity;
@@ -916,3 +919,22 @@
}
}
}
+
+/// Write a new tracked key to disk.
+/// See `format.use-dirstate-tracked-hint` config help for more details.
+pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> {
+ // TODO move this to the dirstate itself once it grows a `dirty` flag and
+ // can reason about which context it needs to write this in.
+ // For now, only this fast-path needs to think about the tracked hint.
+ // Use [`crate::dirstate_tree::dirstate_map::DirstateMap::
+ // use_tracked_hint`] instead of looking at the requirements once
+ // refactored.
+ if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) {
+ return Ok(());
+ }
+ // TODO use `hg_vfs` once the `InnerRevlog` is in.
+ let path = repo
+ .working_directory_path()
+ .join(".hg/dirstate-tracked-hint");
+ std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path)
+}
--- a/rust/hg-core/src/lib.rs Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/hg-core/src/lib.rs Tue Oct 01 13:49:11 2024 +0200
@@ -37,6 +37,7 @@
pub mod operations;
pub mod progress;
pub mod revset;
+pub mod update;
pub mod utils;
pub mod vfs;
--- a/rust/hg-core/src/revlog/mod.rs Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs Tue Oct 01 13:49:11 2024 +0200
@@ -1180,7 +1180,7 @@
} else {
if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
return Err(HgError::unsupported(
- "ellipsis revisions are not supported by rhg",
+ "support for ellipsis nodes is missing",
)
.into());
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/update.rs Tue Oct 01 13:49:11 2024 +0200
@@ -0,0 +1,491 @@
+//! Tools for moving the repository to a given revision
+
+use std::{
+ fs::Permissions,
+ io::Write,
+ os::unix::fs::{MetadataExt, PermissionsExt},
+ path::Path,
+ time::Duration,
+};
+
+use crate::{
+ dirstate::{ParentFileData, TruncatedTimestamp},
+ dirstate_tree::{
+ dirstate_map::DirstateEntryReset, on_disk::write_tracked_key,
+ },
+ errors::{HgError, IoResultExt},
+ exit_codes,
+ filelog::Filelog,
+ narrow,
+ node::NULL_NODE,
+ operations::{list_rev_tracked_files, ExpandedManifestEntry},
+ progress::Progress,
+ repo::Repo,
+ sparse,
+ utils::{
+ files::{filesystem_now, get_path_from_bytes},
+ hg_path::{hg_path_to_path_buf, HgPath, HgPathError},
+ path_auditor::PathAuditor,
+ },
+ vfs::{is_on_nfs_mount, VfsImpl},
+ DirstateParents, RevlogError, RevlogOpenOptions, UncheckedRevision,
+};
+use crossbeam_channel::{Receiver, Sender};
+use rayon::prelude::*;
+
+fn write_dirstate(repo: &Repo) -> Result<(), HgError> {
+ repo.write_dirstate()
+ .map_err(|e| HgError::abort(e.to_string(), exit_codes::ABORT, None))?;
+ write_tracked_key(repo)
+}
+
+/// Update the current working copy of `repo` to the given revision `to`, from
+/// the null revision and set + write out the dirstate to reflect that.
+///
+/// Do not call this outside of a Python context. This does *not* handle any
+/// of the checks, hooks, lock taking needed to setup and get out of this
+/// update from the null revision.
+pub fn update_from_null(
+ repo: &Repo,
+ to: UncheckedRevision,
+ progress: &dyn Progress,
+) -> Result<usize, HgError> {
+ // Ignore the warnings, they've been displayed by Python already
+ // TODO non-Python clients: display narrow warnings
+ let (narrow_matcher, _) = narrow::matcher(repo)?;
+
+ let files_for_rev = list_rev_tracked_files(repo, to, narrow_matcher)
+ .map_err(handle_revlog_error)?;
+ repo.manually_set_parents(DirstateParents {
+ p1: repo.node(to).expect("update target should exist"),
+ p2: NULL_NODE,
+ })?;
+
+ // Filter the working copy according to the sparse spec
+ let tracked_files: Result<Vec<_>, _> = if !repo.has_sparse() {
+ files_for_rev.iter().collect()
+ } else {
+ // Ignore the warnings, they've been displayed by Python already
+ // TODO non-Python clients: display sparse warnings
+ let (sparse_matcher, _) = sparse::matcher(repo)?;
+ files_for_rev
+ .iter()
+ .filter(|f| {
+ match f {
+ Ok(f) => sparse_matcher.matches(f.0),
+ Err(_) => true, // Errors stop the update, include them
+ }
+ })
+ .collect()
+ };
+ let tracked_files = tracked_files?;
+
+ if tracked_files.is_empty() {
+ // Still write the dirstate because we might not be in the null
+ // revision.
+ // This can happen in narrow repos where all paths are excluded in
+ // this revision.
+ write_dirstate(repo)?;
+ return Ok(0);
+ }
+ let store_vfs = &repo.store_vfs();
+ let options = repo.default_revlog_options(crate::RevlogType::Filelog)?;
+ let (errors_sender, errors_receiver) = crossbeam_channel::unbounded();
+ let (files_sender, files_receiver) = crossbeam_channel::unbounded();
+ let working_directory_path = &repo.working_directory_path();
+
+ let files_count = tracked_files.len();
+ let chunks = chunk_tracked_files(tracked_files);
+ progress.update(0, Some(files_count as u64));
+
+ create_working_copy(
+ chunks,
+ working_directory_path,
+ store_vfs,
+ options,
+ files_sender,
+ errors_sender,
+ progress,
+ );
+
+ let errors: Vec<HgError> = errors_receiver.iter().collect();
+ if !errors.is_empty() {
+ log::debug!("{} errors during update (see trace logs)", errors.len());
+ for error in errors.iter() {
+ log::trace!("{}", error);
+ }
+ // Best we can do is raise the first error (in order of the channel)
+ return Err(errors.into_iter().next().expect("can never be empty"));
+ }
+
+ // TODO try to run this concurrently to update the dirstate while we're
+ // still writing out the working copy to see if that improves performance.
+ let total = update_dirstate(repo, files_receiver)?;
+
+ write_dirstate(repo)?;
+
+ Ok(total)
+}
+
+fn handle_revlog_error(e: RevlogError) -> HgError {
+ match e {
+ crate::RevlogError::Other(hg_error) => hg_error,
+ e => HgError::abort(
+ format!("revlog error: {}", e),
+ exit_codes::ABORT,
+ None,
+ ),
+ }
+}
+
+/// Preallocated size of Vec holding directory contents. This aims at
+/// preventing the need for re-allocating the Vec in most cases.
+///
+/// The value is arbitrarily picked as a little over an average number of files
+/// per directory done by looking at a few larger open-source repos.
+/// Most of the runtime is IO anyway, so this doesn't matter too much.
+const FILES_PER_DIRECTORY: usize = 16;
+
+/// Chunk files per directory prefix, so almost every directory is handled
+/// in a separate thread, which works around the FS inode mutex.
+/// Chunking less (and doing approximately `files_count`/`threads`) actually
+/// ends up being less performant: my hypothesis is `rayon`'s work stealing
+/// being more efficient with tasks of varying lengths.
+#[logging_timer::time("trace")]
+fn chunk_tracked_files(
+ tracked_files: Vec<ExpandedManifestEntry>,
+) -> Vec<(&HgPath, Vec<ExpandedManifestEntry>)> {
+ let files_count = tracked_files.len();
+
+ let mut chunks = Vec::with_capacity(files_count / FILES_PER_DIRECTORY);
+
+ let mut current_chunk = Vec::with_capacity(FILES_PER_DIRECTORY);
+ let mut last_directory = tracked_files[0].0.parent();
+
+ for file_info in tracked_files {
+ let current_directory = file_info.0.parent();
+ let different_directory = current_directory != last_directory;
+ if different_directory {
+ chunks.push((last_directory, current_chunk));
+ current_chunk = Vec::with_capacity(FILES_PER_DIRECTORY);
+ }
+ current_chunk.push(file_info);
+ last_directory = current_directory;
+ }
+ chunks.push((last_directory, current_chunk));
+ chunks
+}
+
+#[logging_timer::time("trace")]
+fn create_working_copy<'a: 'b, 'b>(
+ chunks: Vec<(&HgPath, Vec<ExpandedManifestEntry<'a>>)>,
+ working_directory_path: &Path,
+ store_vfs: &VfsImpl,
+ options: RevlogOpenOptions,
+ files_sender: Sender<(&'b HgPath, u32, usize, TruncatedTimestamp)>,
+ error_sender: Sender<HgError>,
+ progress: &dyn Progress,
+) {
+ let auditor = PathAuditor::new(working_directory_path);
+ chunks.into_par_iter().for_each(|(dir_path, chunk)| {
+ if let Err(e) = working_copy_worker(
+ dir_path,
+ chunk,
+ working_directory_path,
+ store_vfs,
+ options,
+ &files_sender,
+ progress,
+ &auditor,
+ ) {
+ error_sender
+ .send(e)
+ .expect("channel should not be disconnected")
+ }
+ });
+}
+
+/// Represents a work unit for a single thread, responsible for this set of
+/// files and restoring them to the working copy.
+#[allow(clippy::too_many_arguments)]
+fn working_copy_worker<'a: 'b, 'b>(
+ dir_path: &HgPath,
+ chunk: Vec<ExpandedManifestEntry<'a>>,
+ working_directory_path: &Path,
+ store_vfs: &VfsImpl,
+ options: RevlogOpenOptions,
+ files_sender: &Sender<(&'b HgPath, u32, usize, TruncatedTimestamp)>,
+ progress: &dyn Progress,
+ auditor: &PathAuditor,
+) -> Result<(), HgError> {
+ let dir_path =
+ hg_path_to_path_buf(dir_path).expect("invalid path in manifest");
+ let dir_path = working_directory_path.join(dir_path);
+ std::fs::create_dir_all(&dir_path).when_writing_file(&dir_path)?;
+
+ for (file, file_node, flags) in chunk {
+ auditor.audit_path(file)?;
+ let flags = flags.map(|f| f.into());
+ let path =
+ working_directory_path.join(get_path_from_bytes(file.as_bytes()));
+
+ // Treemanifest is not supported
+ assert!(flags != Some(b't'));
+
+ let filelog = Filelog::open_vfs(store_vfs, file, options)?;
+ let filelog_revision_data = &filelog
+ .data_for_node(file_node)
+ .map_err(handle_revlog_error)?;
+ let file_data = filelog_revision_data.file_data()?;
+
+ if flags == Some(b'l') {
+ let target = get_path_from_bytes(file_data);
+ if let Err(e) = std::os::unix::fs::symlink(target, &path) {
+ // If the path already exists either:
+ // - another process created this file while ignoring the
+ // lock => error
+ // - our check for the fast path is incorrect => error
+ // - this is a malicious repo/bundle and this is symlink that
+ // tries to write things where it shouldn't be able to.
+ match e.kind() {
+ std::io::ErrorKind::AlreadyExists => {
+ let metadata = std::fs::symlink_metadata(&path)
+ .when_reading_file(&path)?;
+ if metadata.is_dir() {
+ return Err(HgError::Path(
+ HgPathError::TraversesSymbolicLink {
+ // Technically it should be one of the
+ // children, but good enough
+ path: file
+ .join(HgPath::new(b"*"))
+ .to_owned(),
+ symlink: file.to_owned(),
+ },
+ ));
+ }
+ return Err(e).when_writing_file(&path);
+ }
+ _ => return Err(e).when_writing_file(&path),
+ }
+ }
+ } else {
+ let mut f =
+ std::fs::File::create(&path).when_writing_file(&path)?;
+ f.write_all(file_data).when_writing_file(&path)?;
+ }
+ if flags == Some(b'x') {
+ std::fs::set_permissions(&path, Permissions::from_mode(0o755))
+ .when_writing_file(&path)?;
+ }
+ let metadata =
+ std::fs::symlink_metadata(&path).when_reading_file(&path)?;
+
+ let mode = metadata.mode();
+
+ files_sender
+ .send((
+ file,
+ mode,
+ file_data.len(),
+ TruncatedTimestamp::for_mtime_of(&metadata)
+ .when_reading_file(&path)?,
+ ))
+ .expect("channel should not be closed");
+ progress.increment(1, None);
+ }
+ Ok(())
+}
+
+#[logging_timer::time("trace")]
+fn update_dirstate(
+ repo: &Repo,
+ files_receiver: Receiver<(&HgPath, u32, usize, TruncatedTimestamp)>,
+) -> Result<usize, HgError> {
+ let mut dirstate = repo
+ .dirstate_map_mut()
+ .map_err(|e| HgError::abort(e.to_string(), exit_codes::ABORT, None))?;
+
+ // (see the comments in `filter_ambiguous_files` in `merge.py` for more)
+ // It turns out that (on Linux at least) the filesystem resolution time
+ // for most filesystems is based on the HZ kernel config. Their internal
+ // clocks do return nanoseconds if the hardware clock is precise enough,
+ // which should be the case on most recent computers but are only updated
+ // every few milliseconds at best (every "jiffy").
+ //
+ // We are still not concerned with fixing the race with other
+ // processes that might modify the working copy right after it was created
+ // within the same tick, because it is impossible to catch.
+ // However, we might as well not race with operations that could run right
+ // after this one, especially other Mercurial operations that could be
+ // waiting for the wlock to change file contents and the dirstate.
+ //
+ // Thus: wait until the filesystem clock has ticked to filter ambiguous
+ // entries and write the dirstate, but only for dirstate-v2, since v1 only
+ // has second-level granularity and waiting for a whole second is too much
+ // of a penalty in the general case.
+ // Although we're assuming that people running dirstate-v2 on Linux
+ // don't have a second-granularity FS (with the exclusion of NFS), users
+ // can be surprising, and at some point in the future dirstate-v2 will
+ // become the default. To that end, we limit the wait time to 100ms and
+ // fall back to the filter method in case of a timeout.
+ //
+ // +------------+------+--------------+
+ // | version | wait | filter level |
+ // +------------+------+--------------+
+ // | V1 | No | Second |
+ // | V2 | Yes | Nanosecond |
+ // | V2-slow-fs | No | Second |
+ // +------------+------+--------------+
+ let dirstate_v2 = repo.use_dirstate_v2();
+
+ // Let's ignore NFS right off the bat
+ let mut fast_enough_fs = !is_on_nfs_mount(repo.working_directory_path());
+ let fs_time_now = if dirstate_v2 && fast_enough_fs {
+ match wait_until_fs_tick(repo.working_directory_path()) {
+ None => None,
+ Some(Ok(time)) => Some(time),
+ Some(Err(time)) => {
+ fast_enough_fs = false;
+ Some(time)
+ }
+ }
+ } else {
+ filesystem_now(repo.working_directory_path())
+ .ok()
+ .map(TruncatedTimestamp::from)
+ };
+
+ let mut total = 0;
+ for (filename, mode, size, mtime) in files_receiver.into_iter() {
+ total += 1;
+ // When using dirstate-v2 on a filesystem with reasonable performance
+ // this is basically always true unless you get a mtime from the
+ // far future.
+ let has_meaningful_mtime = if let Some(fs_time) = fs_time_now {
+ mtime.for_reliable_mtime_of_self(&fs_time).is_some_and(|t| {
+ // Dirstate-v1 only has second-level information
+ !t.second_ambiguous || dirstate_v2 && fast_enough_fs
+ })
+ } else {
+ // We somehow failed to write to the filesystem, so don't store
+ // the cache information.
+ false
+ };
+ let reset = DirstateEntryReset {
+ filename,
+ wc_tracked: true,
+ p1_tracked: true,
+ p2_info: false,
+ has_meaningful_mtime,
+ parent_file_data_opt: Some(ParentFileData {
+ mode_size: Some((
+ mode,
+ size.try_into().expect("invalid file size in manifest"),
+ )),
+ mtime: Some(mtime),
+ }),
+ from_empty: true,
+ };
+ dirstate.reset_state(reset).map_err(|e| {
+ HgError::abort(e.to_string(), exit_codes::ABORT, None)
+ })?;
+ }
+
+ Ok(total)
+}
+
+/// Wait until the next update from the filesystem time by writing in a loop
+/// a new temporary file inside the working directory and checking if its time
+/// differs from the first one observed.
+///
+/// Returns `None` if we are unable to get the filesystem time,
+/// `Some(Err(timestamp))` if we've timed out waiting for the filesystem clock
+/// to tick, and `Some(Ok(timestamp))` if we've waited successfully.
+///
+/// On Linux, your average tick is going to be a "jiffy", or 1/HZ.
+/// HZ is your kernel's tick rate (if it has one configured) and the value
+/// is the one returned by `grep 'CONFIG_HZ=' /boot/config-$(uname -r)`,
+/// again assuming a normal setup.
+///
+/// In my case (Alphare) at the time of writing, I get `CONFIG_HZ=250`,
+/// which equates to 4ms.
+///
+/// This might change with a series that could make it to Linux 6.12:
+/// https://lore.kernel.org/all/20241002-mgtime-v10-8-d1c4717f5284@kernel.org
+fn wait_until_fs_tick(
+ working_directory_path: &Path,
+) -> Option<Result<TruncatedTimestamp, TruncatedTimestamp>> {
+ let start = std::time::Instant::now();
+ let old_fs_time = filesystem_now(working_directory_path).ok()?;
+ let mut fs_time = filesystem_now(working_directory_path).ok()?;
+
+ const FS_TICK_WAIT_TIMEOUT: Duration = Duration::from_millis(100);
+
+ while fs_time == old_fs_time {
+ if std::time::Instant::now() - start > FS_TICK_WAIT_TIMEOUT {
+ log::trace!(
+ "timed out waiting for the fs clock to tick after {:?}",
+ FS_TICK_WAIT_TIMEOUT
+ );
+ return Some(Err(TruncatedTimestamp::from(old_fs_time)));
+ }
+ fs_time = filesystem_now(working_directory_path).ok()?;
+ }
+ log::trace!(
+ "waited for {:?} before writing the dirstate",
+ fs_time.duration_since(old_fs_time)
+ );
+ Some(Ok(TruncatedTimestamp::from(fs_time)))
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn test_chunk_tracked_files() {
+ fn chunk(v: Vec<&'static str>) -> Vec<ExpandedManifestEntry> {
+ v.into_iter()
+ .map(|f| (HgPath::new(f.as_bytes()), NULL_NODE, None))
+ .collect()
+ }
+ let p = HgPath::new;
+
+ let files = chunk(vec!["a"]);
+ let expected = vec![(p(""), chunk(vec!["a"]))];
+ assert_eq!(chunk_tracked_files(files), expected);
+
+ let files = chunk(vec!["a", "b", "c"]);
+ let expected = vec![(p(""), chunk(vec!["a", "b", "c"]))];
+ assert_eq!(chunk_tracked_files(files), expected);
+
+ let files = chunk(vec![
+ "dir/a-new",
+ "dir/a/mut",
+ "dir/a/mut-mut",
+ "dir/albert",
+ "dir/b",
+ "dir/subdir/c",
+ "dir/subdir/d",
+ "file",
+ ]);
+ let expected = vec![
+ (p("dir"), chunk(vec!["dir/a-new"])),
+ (p("dir/a"), chunk(vec!["dir/a/mut", "dir/a/mut-mut"])),
+ (p("dir"), chunk(vec!["dir/albert", "dir/b"])),
+ (p("dir/subdir"), chunk(vec!["dir/subdir/c", "dir/subdir/d"])),
+ (p(""), chunk(vec!["file"])),
+ ];
+ assert_eq!(chunk_tracked_files(files), expected);
+
+ // Doesn't get split
+ let large_dir = vec![
+ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
+ "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
+ ];
+ let files = chunk(large_dir.clone());
+ let expected = vec![(p(""), chunk(large_dir))];
+ assert_eq!(chunk_tracked_files(files), expected);
+ }
+}
--- a/rust/hg-cpython/src/lib.rs Wed Oct 16 19:14:30 2024 +0200
+++ b/rust/hg-cpython/src/lib.rs Tue Oct 01 13:49:11 2024 +0200
@@ -45,6 +45,7 @@
pub mod exceptions;
mod pybytes_deref;
pub mod revlog;
+pub mod update;
pub mod utils;
/// Revision as exposed to/from the Python layer.
@@ -100,6 +101,7 @@
m.add(py, "discovery", discovery::init_module(py, &dotted_name)?)?;
m.add(py, "dirstate", dirstate::init_module(py, &dotted_name)?)?;
m.add(py, "revlog", revlog::init_module(py, &dotted_name)?)?;
+ m.add(py, "update", update::init_module(py, &dotted_name)?)?;
m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
Ok(())
});
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/update.rs Tue Oct 01 13:49:11 2024 +0200
@@ -0,0 +1,53 @@
+// debug.rs
+//
+// Copyright 2024 Mercurial developers
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Module for updating a repository.
+use cpython::{PyDict, PyModule, PyObject, PyResult, Python};
+use hg::{
+ progress::{HgProgressBar, Progress},
+ update::update_from_null,
+ BaseRevision,
+};
+
+use crate::{
+ exceptions::FallbackError,
+ utils::{hgerror_to_pyerr, repo_from_path},
+};
+
+pub fn update_from_null_fast_path(
+ py: Python,
+ repo_path: PyObject,
+ to: BaseRevision,
+) -> PyResult<usize> {
+ log::trace!("Using update from null fastpath");
+ let repo = repo_from_path(py, repo_path)?;
+ let progress: &dyn Progress = &HgProgressBar::new("updating");
+ hgerror_to_pyerr(py, update_from_null(&repo, to.into(), progress))
+}
+
+pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
+ let dotted_name = &format!("{}.update", package);
+ let m = PyModule::new(py, dotted_name)?;
+
+ m.add(py, "__package__", package)?;
+ m.add(py, "__doc__", "Rust module for updating a repository")?;
+ m.add(py, "FallbackError", py.get_type::<FallbackError>())?;
+ m.add(
+ py,
+ "update_from_null",
+ py_fn!(
+ py,
+ update_from_null_fast_path(repo_path: PyObject, to: BaseRevision,)
+ ),
+ )?;
+
+ let sys = PyModule::import(py, "sys")?;
+ let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+ sys_modules.set_item(py, dotted_name, &m)?;
+
+ Ok(m)
+}
--- a/tests/test-audit-path.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-audit-path.t Tue Oct 01 13:49:11 2024 +0200
@@ -91,7 +91,8 @@
$ hg manifest -r0
.hg/test
$ hg update -Cr0
- abort: path contains illegal component: .hg/test
+ abort: path contains illegal component: .hg/test (no-rust !)
+ abort: path '.hg/test' is inside the '.hg' folder (rust !)
[10]
attack foo/.hg/test
@@ -107,6 +108,7 @@
$ hg manifest -r2
back
back/test
+
#if symlink
$ hg update -Cr2
abort: path 'back/test' traverses symbolic link 'back'
@@ -220,17 +222,30 @@
'a' and 'a/b' are taken as good paths. still applyupdates() should fail.
$ hg up -qC null
+#if rust
+ $ hg up 1
+ abort: path 'a/*' traverses symbolic link 'a'
+ [10]
+#endif
+
+#if no-rust
$ hg up 1
abort: path 'a/b' traverses symbolic link 'a'
[255]
+#endif
$ ls ../update-symlink-out
try branch update replacing directory with symlink, and its content: the
path 'a' is audited as a directory first, which should be audited again as
a symlink.
+#if rust
+ $ rm -rf a
+#else
$ rm -f a
+#endif
$ hg up -qC 2
+
$ hg up 1
abort: path 'a/b' traverses symbolic link 'a'
[255]
--- a/tests/test-clone.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-clone.t Tue Oct 01 13:49:11 2024 +0200
@@ -1235,7 +1235,7 @@
#if linuxormacos no-fsmonitor
$ hg clone a nofsmonitor
updating to bookmark @ on branch stable
- (warning: large working directory being used without fsmonitor enabled; enable fsmonitor to improve performance; see "hg help -e fsmonitor")
+ (warning: large working directory being used without fsmonitor enabled; enable fsmonitor to improve performance; see "hg help -e fsmonitor") (no-rust !)
3 files updated, 0 files merged, 0 files removed, 0 files unresolved
#else
$ hg clone a nofsmonitor
@@ -1284,7 +1284,7 @@
#if linuxormacos no-fsmonitor
$ hg up cf0fe1914066
- (warning: large working directory being used without fsmonitor enabled; enable fsmonitor to improve performance; see "hg help -e fsmonitor")
+ (warning: large working directory being used without fsmonitor enabled; enable fsmonitor to improve performance; see "hg help -e fsmonitor") (no-rust !)
2 files updated, 0 files merged, 0 files removed, 0 files unresolved
#else
$ hg up cf0fe1914066
--- a/tests/test-simple-update.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-simple-update.t Tue Oct 01 13:49:11 2024 +0200
@@ -96,11 +96,12 @@
#if no-windows
$ cat <<EOF > forceworker.py
- > from mercurial import extensions, worker
+ > from mercurial import extensions, merge, worker
> def nocost(orig, ui, costperop, nops, threadsafe=True):
> return worker._numworkers(ui) > 1
> def uisetup(ui):
> extensions.wrapfunction(worker, 'worthwhile', nocost)
+ > merge.MAYBE_USE_RUST_UPDATE = False
> EOF
$ hg init worker
--- a/tests/test-stream-bundle-v2.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-stream-bundle-v2.t Tue Oct 01 13:49:11 2024 +0200
@@ -152,19 +152,19 @@
bundle2-input-bundle: 2 parts total
checking for updated bookmarks
updating to branch default
- resolving manifests
- branchmerge: False, force: False, partial: False
- ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041
- A: remote created -> g
- getting A
- B: remote created -> g
- getting B
- C: remote created -> g
- getting C
- D: remote created -> g
- getting D
- E: remote created -> g
- getting E
+ resolving manifests (no-rust !)
+ branchmerge: False, force: False, partial: False (no-rust !)
+ ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041 (no-rust !)
+ A: remote created -> g (no-rust !)
+ getting A (no-rust !)
+ B: remote created -> g (no-rust !)
+ getting B (no-rust !)
+ C: remote created -> g (no-rust !)
+ getting C (no-rust !)
+ D: remote created -> g (no-rust !)
+ getting D (no-rust !)
+ E: remote created -> g (no-rust !)
+ getting E (no-rust !)
5 files updated, 0 files merged, 0 files removed, 0 files unresolved
updating the branch cache
(sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
@@ -215,19 +215,19 @@
bundle2-input-bundle: 2 parts total
checking for updated bookmarks
updating to branch default
- resolving manifests
- branchmerge: False, force: False, partial: False
- ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041
- A: remote created -> g
- getting A
- B: remote created -> g
- getting B
- C: remote created -> g
- getting C
- D: remote created -> g
- getting D
- E: remote created -> g
- getting E
+ resolving manifests (no-rust !)
+ branchmerge: False, force: False, partial: False (no-rust !)
+ ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041 (no-rust !)
+ A: remote created -> g (no-rust !)
+ getting A (no-rust !)
+ B: remote created -> g (no-rust !)
+ getting B (no-rust !)
+ C: remote created -> g (no-rust !)
+ getting C (no-rust !)
+ D: remote created -> g (no-rust !)
+ getting D (no-rust !)
+ E: remote created -> g (no-rust !)
+ getting E (no-rust !)
5 files updated, 0 files merged, 0 files removed, 0 files unresolved
updating the branch cache
(sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
@@ -280,19 +280,19 @@
bundle2-input-bundle: 2 parts total
checking for updated bookmarks
updating to branch default
- resolving manifests
- branchmerge: False, force: False, partial: False
- ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041
- A: remote created -> g
- getting A
- B: remote created -> g
- getting B
- C: remote created -> g
- getting C
- D: remote created -> g
- getting D
- E: remote created -> g
- getting E
+ resolving manifests (no-rust !)
+ branchmerge: False, force: False, partial: False (no-rust !)
+ ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041 (no-rust !)
+ A: remote created -> g (no-rust !)
+ getting A (no-rust !)
+ B: remote created -> g (no-rust !)
+ getting B (no-rust !)
+ C: remote created -> g (no-rust !)
+ getting C (no-rust !)
+ D: remote created -> g (no-rust !)
+ getting D (no-rust !)
+ E: remote created -> g (no-rust !)
+ getting E (no-rust !)
5 files updated, 0 files merged, 0 files removed, 0 files unresolved
updating the branch cache
(sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
@@ -342,19 +342,19 @@
bundle2-input-bundle: 2 parts total
checking for updated bookmarks
updating to branch default
- resolving manifests
- branchmerge: False, force: False, partial: False
- ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041
- A: remote created -> g
- getting A
- B: remote created -> g
- getting B
- C: remote created -> g
- getting C
- D: remote created -> g
- getting D
- E: remote created -> g
- getting E
+ resolving manifests (no-rust !)
+ branchmerge: False, force: False, partial: False (no-rust !)
+ ancestor: 000000000000, local: 000000000000+, remote: 9bc730a19041 (no-rust !)
+ A: remote created -> g (no-rust !)
+ getting A (no-rust !)
+ B: remote created -> g (no-rust !)
+ getting B (no-rust !)
+ C: remote created -> g (no-rust !)
+ getting C (no-rust !)
+ D: remote created -> g (no-rust !)
+ getting D (no-rust !)
+ E: remote created -> g (no-rust !)
+ getting E (no-rust !)
5 files updated, 0 files merged, 0 files removed, 0 files unresolved
updating the branch cache
(sent 4 HTTP requests and * bytes; received * bytes in responses) (glob)
--- a/tests/test-subrepo-deep-nested-change.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-subrepo-deep-nested-change.t Tue Oct 01 13:49:11 2024 +0200
@@ -45,9 +45,9 @@
linking [=======================================> ] 8/9\r (no-eol) (esc) (rust !)
linking [============================================>] 9/9\r (no-eol) (esc) (rust !)
\r (no-eol) (esc)
- \r (no-eol) (esc)
- updating [===========================================>] 1/1\r (no-eol) (esc)
- \r (no-eol) (esc)
+ \r (no-eol) (esc) (no-rust !)
+ updating [===========================================>] 1/1\r (no-eol) (esc) (no-rust !)
+ \r (no-eol) (esc) (no-rust !)
updating to branch default
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ hg add -R sub1
@@ -89,7 +89,6 @@
linking [==================================> ] 7/9\r (no-eol) (esc)
linking [=======================================> ] 8/9\r (no-eol) (esc)
linking [============================================>] 9/9\r (no-eol) (esc)
- updating [===========================================>] 1/1\r (no-eol) (esc)
\r (no-eol) (esc)
updating to branch default
cloning subrepo sub2 from $TESTTMP/sub2
--- a/tests/test-symlink-placeholder.t Wed Oct 16 19:14:30 2024 +0200
+++ b/tests/test-symlink-placeholder.t Tue Oct 01 13:49:11 2024 +0200
@@ -3,12 +3,13 @@
Create extension that can disable symlink support:
$ cat > nolink.py <<EOF
- > from mercurial import extensions, util
+ > from mercurial import extensions, merge, util
> def setflags(orig, f, l, x):
> pass
> def checklink(orig, path):
> return False
> def extsetup(ui):
+ > merge.MAYBE_USE_RUST_UPDATE = False
> extensions.wrapfunction(util, 'setflags', setflags)
> extensions.wrapfunction(util, 'checklink', checklink)
> EOF