rhg: Add support for `rhg status --copies`
Copy sources are collected during `status()` rather than after the fact like
in Python, because `status()` takes a `&mut` exclusive reference to the dirstate map
(in order to potentially mutate it for directory mtimes) and returns `Cow<'_, HgPath>`
that borrow the dirstate map.
Even though with `Cow` only some shared borrows remain, the still extend the same
lifetime of the initial `&mut` so the dirstate map cannot be borrowed again
to access copy sources after the fact:
https://doc.rust-lang.org/nomicon/lifetime-mismatch.html#limits-of-lifetimes
Additionally, collecting copy sources during the dirstate tree traversal that
`status()` already does avoids the cost of another traversal or other lookups
(though I haven’t benchmarked that cost).
Differential Revision: https://phab.mercurial-scm.org/D11899
--- a/rust/hg-core/src/dirstate/status.rs Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate/status.rs Fri Dec 10 16:18:58 2021 +0100
@@ -66,41 +66,43 @@
pub list_clean: bool,
pub list_unknown: bool,
pub list_ignored: bool,
+ /// Whether to populate `StatusPath::copy_source`
+ pub list_copies: bool,
/// Whether to collect traversed dirs for applying a callback later.
/// Used by `hg purge` for example.
pub collect_traversed_dirs: bool,
}
-#[derive(Debug, Default)]
+#[derive(Default)]
pub struct DirstateStatus<'a> {
/// The current time at the start of the `status()` algorithm, as measured
/// and possibly truncated by the filesystem.
pub filesystem_time_at_status_start: Option<std::time::SystemTime>,
/// Tracked files whose contents have changed since the parent revision
- pub modified: Vec<HgPathCow<'a>>,
+ pub modified: Vec<StatusPath<'a>>,
/// Newly-tracked files that were not present in the parent
- pub added: Vec<HgPathCow<'a>>,
+ pub added: Vec<StatusPath<'a>>,
/// Previously-tracked files that have been (re)moved with an hg command
- pub removed: Vec<HgPathCow<'a>>,
+ pub removed: Vec<StatusPath<'a>>,
/// (Still) tracked files that are missing, (re)moved with an non-hg
/// command
- pub deleted: Vec<HgPathCow<'a>>,
+ pub deleted: Vec<StatusPath<'a>>,
/// Tracked files that are up to date with the parent.
/// Only pupulated if `StatusOptions::list_clean` is true.
- pub clean: Vec<HgPathCow<'a>>,
+ pub clean: Vec<StatusPath<'a>>,
/// Files in the working directory that are ignored with `.hgignore`.
/// Only pupulated if `StatusOptions::list_ignored` is true.
- pub ignored: Vec<HgPathCow<'a>>,
+ pub ignored: Vec<StatusPath<'a>>,
/// Files in the working directory that are neither tracked nor ignored.
/// Only pupulated if `StatusOptions::list_unknown` is true.
- pub unknown: Vec<HgPathCow<'a>>,
+ pub unknown: Vec<StatusPath<'a>>,
/// Was explicitly matched but cannot be found/accessed
pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
@@ -108,7 +110,7 @@
/// Either clean or modified, but we can’t tell from filesystem metadata
/// alone. The file contents need to be read and compared with that in
/// the parent.
- pub unsure: Vec<HgPathCow<'a>>,
+ pub unsure: Vec<StatusPath<'a>>,
/// Only filled if `collect_traversed_dirs` is `true`
pub traversed: Vec<HgPathCow<'a>>,
@@ -118,6 +120,12 @@
pub dirty: bool,
}
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct StatusPath<'a> {
+ pub path: HgPathCow<'a>,
+ pub copy_source: Option<HgPathCow<'a>>,
+}
+
#[derive(Debug, derive_more::From)]
pub enum StatusError {
/// Generic IO error
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Dec 10 16:18:58 2021 +0100
@@ -309,6 +309,25 @@
NodeRef::OnDisk(node) => node.copy_source(on_disk),
}
}
+ /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
+ /// HgPath>` detached from `'tree`
+ pub(super) fn copy_source_borrowed(
+ &self,
+ on_disk: &'on_disk [u8],
+ ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
+ {
+ Ok(match self {
+ NodeRef::InMemory(_path, node) => {
+ node.copy_source.as_ref().map(|source| match source {
+ Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
+ Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
+ })
+ }
+ NodeRef::OnDisk(node) => node
+ .copy_source(on_disk)?
+ .map(|source| BorrowedPath::OnDisk(source)),
+ })
+ }
pub(super) fn entry(
&self,
--- a/rust/hg-core/src/dirstate_tree/status.rs Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/status.rs Fri Dec 10 16:18:58 2021 +0100
@@ -1,5 +1,6 @@
use crate::dirstate::entry::TruncatedTimestamp;
use crate::dirstate::status::IgnoreFnType;
+use crate::dirstate::status::StatusPath;
use crate::dirstate_tree::dirstate_map::BorrowedPath;
use crate::dirstate_tree::dirstate_map::ChildNodesRef;
use crate::dirstate_tree::dirstate_map::DirstateMap;
@@ -15,6 +16,7 @@
use crate::DirstateStatus;
use crate::EntryState;
use crate::HgPathBuf;
+use crate::HgPathCow;
use crate::PatternFileWarning;
use crate::StatusError;
use crate::StatusOptions;
@@ -146,7 +148,65 @@
filesystem_time_at_status_start: Option<SystemTime>,
}
+enum Outcome {
+ Modified,
+ Added,
+ Removed,
+ Deleted,
+ Clean,
+ Ignored,
+ Unknown,
+ Unsure,
+}
+
impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
+ fn push_outcome(
+ &self,
+ which: Outcome,
+ dirstate_node: &NodeRef<'tree, 'on_disk>,
+ ) -> Result<(), DirstateV2ParseError> {
+ let path = dirstate_node
+ .full_path_borrowed(self.dmap.on_disk)?
+ .detach_from_tree();
+ let copy_source = if self.options.list_copies {
+ dirstate_node
+ .copy_source_borrowed(self.dmap.on_disk)?
+ .map(|source| source.detach_from_tree())
+ } else {
+ None
+ };
+ self.push_outcome_common(which, path, copy_source);
+ Ok(())
+ }
+
+ fn push_outcome_without_copy_source(
+ &self,
+ which: Outcome,
+ path: &BorrowedPath<'_, 'on_disk>,
+ ) {
+ self.push_outcome_common(which, path.detach_from_tree(), None)
+ }
+
+ fn push_outcome_common(
+ &self,
+ which: Outcome,
+ path: HgPathCow<'on_disk>,
+ copy_source: Option<HgPathCow<'on_disk>>,
+ ) {
+ let mut outcome = self.outcome.lock().unwrap();
+ let vec = match which {
+ Outcome::Modified => &mut outcome.modified,
+ Outcome::Added => &mut outcome.added,
+ Outcome::Removed => &mut outcome.removed,
+ Outcome::Deleted => &mut outcome.deleted,
+ Outcome::Clean => &mut outcome.clean,
+ Outcome::Ignored => &mut outcome.ignored,
+ Outcome::Unknown => &mut outcome.unknown,
+ Outcome::Unsure => &mut outcome.unsure,
+ };
+ vec.push(StatusPath { path, copy_source });
+ }
+
fn read_dir(
&self,
hg_path: &HgPath,
@@ -347,10 +407,7 @@
// If we previously had a file here, it was removed (with
// `hg rm` or similar) or deleted before it could be
// replaced by a directory or something else.
- self.mark_removed_or_deleted_if_file(
- &hg_path,
- dirstate_node.state()?,
- );
+ self.mark_removed_or_deleted_if_file(&dirstate_node)?;
}
if file_type.is_dir() {
if self.options.collect_traversed_dirs {
@@ -381,24 +438,13 @@
if file_or_symlink && self.matcher.matches(hg_path) {
if let Some(state) = dirstate_node.state()? {
match state {
- EntryState::Added => self
- .outcome
- .lock()
- .unwrap()
- .added
- .push(hg_path.detach_from_tree()),
+ EntryState::Added => {
+ self.push_outcome(Outcome::Added, &dirstate_node)?
+ }
EntryState::Removed => self
- .outcome
- .lock()
- .unwrap()
- .removed
- .push(hg_path.detach_from_tree()),
+ .push_outcome(Outcome::Removed, &dirstate_node)?,
EntryState::Merged => self
- .outcome
- .lock()
- .unwrap()
- .modified
- .push(hg_path.detach_from_tree()),
+ .push_outcome(Outcome::Modified, &dirstate_node)?,
EntryState::Normal => self
.handle_normal_file(&dirstate_node, fs_metadata)?,
}
@@ -510,7 +556,6 @@
let entry = dirstate_node
.entry()?
.expect("handle_normal_file called with entry-less node");
- let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let mode_changed =
|| self.options.check_exec && entry.mode_changed(fs_metadata);
let size = entry.size();
@@ -518,20 +563,12 @@
if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
// issue6456: Size returned may be longer due to encryption
// on EXT-4 fscrypt. TODO maybe only do it on EXT4?
- self.outcome
- .lock()
- .unwrap()
- .unsure
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if dirstate_node.has_copy_source()
|| entry.is_from_other_parent()
|| (size >= 0 && (size_changed || mode_changed()))
{
- self.outcome
- .lock()
- .unwrap()
- .modified
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Modified, dirstate_node)?
} else {
let mtime_looks_clean;
if let Some(dirstate_mtime) = entry.truncated_mtime() {
@@ -548,17 +585,9 @@
mtime_looks_clean = false
};
if !mtime_looks_clean {
- self.outcome
- .lock()
- .unwrap()
- .unsure
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if self.options.list_clean {
- self.outcome
- .lock()
- .unwrap()
- .clean
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Clean, dirstate_node)?
}
}
Ok(())
@@ -570,10 +599,7 @@
dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
self.check_for_outdated_directory_cache(&dirstate_node)?;
- self.mark_removed_or_deleted_if_file(
- &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
- dirstate_node.state()?,
- );
+ self.mark_removed_or_deleted_if_file(&dirstate_node)?;
dirstate_node
.children(self.dmap.on_disk)?
.par_iter()
@@ -587,26 +613,19 @@
/// Does nothing on a "directory" node
fn mark_removed_or_deleted_if_file(
&self,
- hg_path: &BorrowedPath<'tree, 'on_disk>,
- dirstate_node_state: Option<EntryState>,
- ) {
- if let Some(state) = dirstate_node_state {
- if self.matcher.matches(hg_path) {
+ dirstate_node: &NodeRef<'tree, 'on_disk>,
+ ) -> Result<(), DirstateV2ParseError> {
+ if let Some(state) = dirstate_node.state()? {
+ let path = dirstate_node.full_path(self.dmap.on_disk)?;
+ if self.matcher.matches(path) {
if let EntryState::Removed = state {
- self.outcome
- .lock()
- .unwrap()
- .removed
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Removed, dirstate_node)?
} else {
- self.outcome
- .lock()
- .unwrap()
- .deleted
- .push(hg_path.detach_from_tree())
+ self.push_outcome(Outcome::Deleted, &dirstate_node)?
}
}
}
+ Ok(())
}
/// Something in the filesystem has no corresponding dirstate node
@@ -684,19 +703,17 @@
let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
if is_ignored {
if self.options.list_ignored {
- self.outcome
- .lock()
- .unwrap()
- .ignored
- .push(hg_path.detach_from_tree())
+ self.push_outcome_without_copy_source(
+ Outcome::Ignored,
+ hg_path,
+ )
}
} else {
if self.options.list_unknown {
- self.outcome
- .lock()
- .unwrap()
- .unknown
- .push(hg_path.detach_from_tree())
+ self.push_outcome_without_copy_source(
+ Outcome::Unknown,
+ hg_path,
+ )
}
}
is_ignored
--- a/rust/hg-cpython/src/dirstate/status.rs Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-cpython/src/dirstate/status.rs Fri Dec 10 16:18:58 2021 +0100
@@ -15,6 +15,7 @@
exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
PyResult, PyTuple, Python, PythonObject, ToPyObject,
};
+use hg::dirstate::status::StatusPath;
use hg::{
matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
parse_pattern_syntax,
@@ -27,15 +28,19 @@
};
use std::borrow::Borrow;
+fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
+ collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
+}
+
/// This will be useless once trait impls for collection are added to `PyBytes`
/// upstream.
fn collect_pybytes_list(
py: Python,
- collection: &[impl AsRef<HgPath>],
+ iter: impl Iterator<Item = impl AsRef<HgPath>>,
) -> PyList {
let list = PyList::new(py, &[]);
- for path in collection.iter() {
+ for path in iter {
list.append(
py,
PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
@@ -121,6 +126,8 @@
})
.collect();
let ignore_files = ignore_files?;
+ // The caller may call `copymap.items()` separately
+ let list_copies = false;
match matcher.get_type(py).name(py).borrow() {
"alwaysmatcher" => {
@@ -135,6 +142,7 @@
list_clean,
list_ignored,
list_unknown,
+ list_copies,
collect_traversed_dirs,
},
)
@@ -171,6 +179,7 @@
list_clean,
list_ignored,
list_unknown,
+ list_copies,
collect_traversed_dirs,
},
)
@@ -222,6 +231,7 @@
list_clean,
list_ignored,
list_unknown,
+ list_copies,
collect_traversed_dirs,
},
)
@@ -241,16 +251,16 @@
status_res: DirstateStatus,
warnings: Vec<PatternFileWarning>,
) -> PyResult<PyTuple> {
- let modified = collect_pybytes_list(py, status_res.modified.as_ref());
- let added = collect_pybytes_list(py, status_res.added.as_ref());
- let removed = collect_pybytes_list(py, status_res.removed.as_ref());
- let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
- let clean = collect_pybytes_list(py, status_res.clean.as_ref());
- let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
- let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
- let unsure = collect_pybytes_list(py, status_res.unsure.as_ref());
- let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
- let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
+ let modified = collect_status_path_list(py, &status_res.modified);
+ let added = collect_status_path_list(py, &status_res.added);
+ let removed = collect_status_path_list(py, &status_res.removed);
+ let deleted = collect_status_path_list(py, &status_res.deleted);
+ let clean = collect_status_path_list(py, &status_res.clean);
+ let ignored = collect_status_path_list(py, &status_res.ignored);
+ let unknown = collect_status_path_list(py, &status_res.unknown);
+ let unsure = collect_status_path_list(py, &status_res.unsure);
+ let bad = collect_bad_matches(py, &status_res.bad)?;
+ let traversed = collect_pybytes_list(py, status_res.traversed.iter());
let dirty = status_res.dirty.to_py_object(py);
let py_warnings = PyList::new(py, &[]);
for warning in warnings.iter() {
--- a/rust/rhg/src/commands/status.rs Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/rhg/src/commands/status.rs Fri Dec 10 16:18:58 2021 +0100
@@ -13,6 +13,7 @@
use hg;
use hg::config::Config;
use hg::dirstate::has_exec_bit;
+use hg::dirstate::status::StatusPath;
use hg::dirstate::TruncatedTimestamp;
use hg::dirstate::RANGE_MASK_31BIT;
use hg::errors::{HgError, IoResultExt};
@@ -23,7 +24,7 @@
use hg::utils::files::get_bytes_from_os_string;
use hg::utils::files::get_path_from_bytes;
use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
-use hg::{HgPathCow, StatusOptions};
+use hg::StatusOptions;
use log::{info, warn};
use std::io;
use std::path::PathBuf;
@@ -89,6 +90,12 @@
.long("--ignored"),
)
.arg(
+ Arg::with_name("copies")
+ .help("show source of copied files (DEFAULT: ui.statuscopies)")
+ .short("-C")
+ .long("--copies"),
+ )
+ .arg(
Arg::with_name("no-status")
.help("hide status prefix")
.short("-n")
@@ -174,7 +181,8 @@
let ui = invocation.ui;
let config = invocation.config;
let args = invocation.subcommand_args;
- let display_states = if args.is_present("all") {
+ let all = args.is_present("all");
+ let display_states = if all {
// TODO when implementing `--quiet`: it excludes clean files
// from `--all`
ALL_DISPLAY_STATES
@@ -195,6 +203,9 @@
}
};
let no_status = args.is_present("no-status");
+ let list_copies = all
+ || args.is_present("copies")
+ || config.get_bool(b"ui", b"statuscopies")?;
let repo = invocation.repo?;
@@ -213,6 +224,7 @@
list_clean: display_states.clean,
list_unknown: display_states.unknown,
list_ignored: display_states.ignored,
+ list_copies,
collect_traversed_dirs: false,
};
let (mut ds_status, pattern_warnings) = dmap.status(
@@ -231,7 +243,7 @@
if !ds_status.unsure.is_empty() {
info!(
"Files to be rechecked by retrieval from filelog: {:?}",
- &ds_status.unsure
+ ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
);
}
let mut fixup = Vec::new();
@@ -243,7 +255,7 @@
CommandError::from((e, &*format!("{:x}", p1.short())))
})?;
for to_check in ds_status.unsure {
- if unsure_is_modified(repo, &manifest, &to_check)? {
+ if unsure_is_modified(repo, &manifest, &to_check.path)? {
if display_states.modified {
ds_status.modified.push(to_check);
}
@@ -251,7 +263,7 @@
if display_states.clean {
ds_status.clean.push(to_check.clone());
}
- fixup.push(to_check.into_owned())
+ fixup.push(to_check.path.into_owned())
}
}
}
@@ -392,10 +404,10 @@
fn display(
&self,
status_prefix: &[u8],
- mut paths: Vec<HgPathCow>,
+ mut paths: Vec<StatusPath<'_>>,
) -> Result<(), CommandError> {
paths.sort_unstable();
- for path in paths {
+ for StatusPath { path, copy_source } in paths {
let relative;
let path = if let Some(relativize) = &self.relativize {
relative = relativize.relativize(&path);
@@ -414,6 +426,12 @@
path
))?
}
+ if let Some(source) = copy_source {
+ self.ui.write_stdout(&format_bytes!(
+ b" {}\n",
+ source.as_bytes()
+ ))?
+ }
}
Ok(())
}
--- a/tests/test-rename-dir-merge.t Fri Dec 10 16:57:39 2021 +0100
+++ b/tests/test-rename-dir-merge.t Fri Dec 10 16:18:58 2021 +0100
@@ -1,7 +1,3 @@
-TODO: fix rhg bugs that make this test fail when status is enabled
- $ unset RHG_STATUS
-
-
$ hg init t
$ cd t
--- a/tests/test-status.t Fri Dec 10 16:57:39 2021 +0100
+++ b/tests/test-status.t Fri Dec 10 16:18:58 2021 +0100
@@ -9,10 +9,6 @@
> EOF
#endif
-TODO: fix rhg bugs that make this test fail when status is enabled
- $ unset RHG_STATUS
-
-
$ hg init repo1
$ cd repo1
$ mkdir a b a/1 b/1 b/2
@@ -223,7 +219,7 @@
? unknown
hg status -n:
- $ env RHG_STATUS=1 RHG_ON_UNSUPPORTED=abort hg status -n
+ $ env RHG_ON_UNSUPPORTED=abort hg status -n
added
removed
deleted