Mercurial > hg
changeset 48454:473af5cbc209
rhg: Add support for `rhg status --copies`
Copy sources are collected during `status()` rather than after the fact like
in Python, because `status()` takes a `&mut` exclusive reference to the dirstate map
(in order to potentially mutate it for directory mtimes) and returns `Cow<'_, HgPath>`
that borrow the dirstate map.
Even though with `Cow` only some shared borrows remain, the still extend the same
lifetime of the initial `&mut` so the dirstate map cannot be borrowed again
to access copy sources after the fact:
https://doc.rust-lang.org/nomicon/lifetime-mismatch.html#limits-of-lifetimes
Additionally, collecting copy sources during the dirstate tree traversal that
`status()` already does avoids the cost of another traversal or other lookups
(though I haven’t benchmarked that cost).
Differential Revision: https://phab.mercurial-scm.org/D11899
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Fri, 10 Dec 2021 16:18:58 +0100 |
parents | 9b0e1f64656f |
children | 04d756b7c0b5 |
files | rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/status.rs rust/hg-cpython/src/dirstate/status.rs rust/rhg/src/commands/status.rs tests/test-rename-dir-merge.t tests/test-status.t |
diffstat | 7 files changed, 171 insertions(+), 107 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs Fri Dec 10 16:57:39 2021 +0100 +++ b/rust/hg-core/src/dirstate/status.rs Fri Dec 10 16:18:58 2021 +0100 @@ -66,41 +66,43 @@ pub list_clean: bool, pub list_unknown: bool, pub list_ignored: bool, + /// Whether to populate `StatusPath::copy_source` + pub list_copies: bool, /// Whether to collect traversed dirs for applying a callback later. /// Used by `hg purge` for example. pub collect_traversed_dirs: bool, } -#[derive(Debug, Default)] +#[derive(Default)] pub struct DirstateStatus<'a> { /// The current time at the start of the `status()` algorithm, as measured /// and possibly truncated by the filesystem. pub filesystem_time_at_status_start: Option<std::time::SystemTime>, /// Tracked files whose contents have changed since the parent revision - pub modified: Vec<HgPathCow<'a>>, + pub modified: Vec<StatusPath<'a>>, /// Newly-tracked files that were not present in the parent - pub added: Vec<HgPathCow<'a>>, + pub added: Vec<StatusPath<'a>>, /// Previously-tracked files that have been (re)moved with an hg command - pub removed: Vec<HgPathCow<'a>>, + pub removed: Vec<StatusPath<'a>>, /// (Still) tracked files that are missing, (re)moved with an non-hg /// command - pub deleted: Vec<HgPathCow<'a>>, + pub deleted: Vec<StatusPath<'a>>, /// Tracked files that are up to date with the parent. /// Only pupulated if `StatusOptions::list_clean` is true. - pub clean: Vec<HgPathCow<'a>>, + pub clean: Vec<StatusPath<'a>>, /// Files in the working directory that are ignored with `.hgignore`. /// Only pupulated if `StatusOptions::list_ignored` is true. - pub ignored: Vec<HgPathCow<'a>>, + pub ignored: Vec<StatusPath<'a>>, /// Files in the working directory that are neither tracked nor ignored. /// Only pupulated if `StatusOptions::list_unknown` is true. - pub unknown: Vec<HgPathCow<'a>>, + pub unknown: Vec<StatusPath<'a>>, /// Was explicitly matched but cannot be found/accessed pub bad: Vec<(HgPathCow<'a>, BadMatch)>, @@ -108,7 +110,7 @@ /// Either clean or modified, but we can’t tell from filesystem metadata /// alone. The file contents need to be read and compared with that in /// the parent. - pub unsure: Vec<HgPathCow<'a>>, + pub unsure: Vec<StatusPath<'a>>, /// Only filled if `collect_traversed_dirs` is `true` pub traversed: Vec<HgPathCow<'a>>, @@ -118,6 +120,12 @@ pub dirty: bool, } +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct StatusPath<'a> { + pub path: HgPathCow<'a>, + pub copy_source: Option<HgPathCow<'a>>, +} + #[derive(Debug, derive_more::From)] pub enum StatusError { /// Generic IO error
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Dec 10 16:57:39 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Dec 10 16:18:58 2021 +0100 @@ -309,6 +309,25 @@ NodeRef::OnDisk(node) => node.copy_source(on_disk), } } + /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk, + /// HgPath>` detached from `'tree` + pub(super) fn copy_source_borrowed( + &self, + on_disk: &'on_disk [u8], + ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError> + { + Ok(match self { + NodeRef::InMemory(_path, node) => { + node.copy_source.as_ref().map(|source| match source { + Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk), + Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory), + }) + } + NodeRef::OnDisk(node) => node + .copy_source(on_disk)? + .map(|source| BorrowedPath::OnDisk(source)), + }) + } pub(super) fn entry( &self,
--- a/rust/hg-core/src/dirstate_tree/status.rs Fri Dec 10 16:57:39 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/status.rs Fri Dec 10 16:18:58 2021 +0100 @@ -1,5 +1,6 @@ use crate::dirstate::entry::TruncatedTimestamp; use crate::dirstate::status::IgnoreFnType; +use crate::dirstate::status::StatusPath; use crate::dirstate_tree::dirstate_map::BorrowedPath; use crate::dirstate_tree::dirstate_map::ChildNodesRef; use crate::dirstate_tree::dirstate_map::DirstateMap; @@ -15,6 +16,7 @@ use crate::DirstateStatus; use crate::EntryState; use crate::HgPathBuf; +use crate::HgPathCow; use crate::PatternFileWarning; use crate::StatusError; use crate::StatusOptions; @@ -146,7 +148,65 @@ filesystem_time_at_status_start: Option<SystemTime>, } +enum Outcome { + Modified, + Added, + Removed, + Deleted, + Clean, + Ignored, + Unknown, + Unsure, +} + impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { + fn push_outcome( + &self, + which: Outcome, + dirstate_node: &NodeRef<'tree, 'on_disk>, + ) -> Result<(), DirstateV2ParseError> { + let path = dirstate_node + .full_path_borrowed(self.dmap.on_disk)? + .detach_from_tree(); + let copy_source = if self.options.list_copies { + dirstate_node + .copy_source_borrowed(self.dmap.on_disk)? + .map(|source| source.detach_from_tree()) + } else { + None + }; + self.push_outcome_common(which, path, copy_source); + Ok(()) + } + + fn push_outcome_without_copy_source( + &self, + which: Outcome, + path: &BorrowedPath<'_, 'on_disk>, + ) { + self.push_outcome_common(which, path.detach_from_tree(), None) + } + + fn push_outcome_common( + &self, + which: Outcome, + path: HgPathCow<'on_disk>, + copy_source: Option<HgPathCow<'on_disk>>, + ) { + let mut outcome = self.outcome.lock().unwrap(); + let vec = match which { + Outcome::Modified => &mut outcome.modified, + Outcome::Added => &mut outcome.added, + Outcome::Removed => &mut outcome.removed, + Outcome::Deleted => &mut outcome.deleted, + Outcome::Clean => &mut outcome.clean, + Outcome::Ignored => &mut outcome.ignored, + Outcome::Unknown => &mut outcome.unknown, + Outcome::Unsure => &mut outcome.unsure, + }; + vec.push(StatusPath { path, copy_source }); + } + fn read_dir( &self, hg_path: &HgPath, @@ -347,10 +407,7 @@ // If we previously had a file here, it was removed (with // `hg rm` or similar) or deleted before it could be // replaced by a directory or something else. - self.mark_removed_or_deleted_if_file( - &hg_path, - dirstate_node.state()?, - ); + self.mark_removed_or_deleted_if_file(&dirstate_node)?; } if file_type.is_dir() { if self.options.collect_traversed_dirs { @@ -381,24 +438,13 @@ if file_or_symlink && self.matcher.matches(hg_path) { if let Some(state) = dirstate_node.state()? { match state { - EntryState::Added => self - .outcome - .lock() - .unwrap() - .added - .push(hg_path.detach_from_tree()), + EntryState::Added => { + self.push_outcome(Outcome::Added, &dirstate_node)? + } EntryState::Removed => self - .outcome - .lock() - .unwrap() - .removed - .push(hg_path.detach_from_tree()), + .push_outcome(Outcome::Removed, &dirstate_node)?, EntryState::Merged => self - .outcome - .lock() - .unwrap() - .modified - .push(hg_path.detach_from_tree()), + .push_outcome(Outcome::Modified, &dirstate_node)?, EntryState::Normal => self .handle_normal_file(&dirstate_node, fs_metadata)?, } @@ -510,7 +556,6 @@ let entry = dirstate_node .entry()? .expect("handle_normal_file called with entry-less node"); - let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?; let mode_changed = || self.options.check_exec && entry.mode_changed(fs_metadata); let size = entry.size(); @@ -518,20 +563,12 @@ if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() { // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? - self.outcome - .lock() - .unwrap() - .unsure - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Unsure, dirstate_node)? } else if dirstate_node.has_copy_source() || entry.is_from_other_parent() || (size >= 0 && (size_changed || mode_changed())) { - self.outcome - .lock() - .unwrap() - .modified - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Modified, dirstate_node)? } else { let mtime_looks_clean; if let Some(dirstate_mtime) = entry.truncated_mtime() { @@ -548,17 +585,9 @@ mtime_looks_clean = false }; if !mtime_looks_clean { - self.outcome - .lock() - .unwrap() - .unsure - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Unsure, dirstate_node)? } else if self.options.list_clean { - self.outcome - .lock() - .unwrap() - .clean - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Clean, dirstate_node)? } } Ok(()) @@ -570,10 +599,7 @@ dirstate_node: NodeRef<'tree, 'on_disk>, ) -> Result<(), DirstateV2ParseError> { self.check_for_outdated_directory_cache(&dirstate_node)?; - self.mark_removed_or_deleted_if_file( - &dirstate_node.full_path_borrowed(self.dmap.on_disk)?, - dirstate_node.state()?, - ); + self.mark_removed_or_deleted_if_file(&dirstate_node)?; dirstate_node .children(self.dmap.on_disk)? .par_iter() @@ -587,26 +613,19 @@ /// Does nothing on a "directory" node fn mark_removed_or_deleted_if_file( &self, - hg_path: &BorrowedPath<'tree, 'on_disk>, - dirstate_node_state: Option<EntryState>, - ) { - if let Some(state) = dirstate_node_state { - if self.matcher.matches(hg_path) { + dirstate_node: &NodeRef<'tree, 'on_disk>, + ) -> Result<(), DirstateV2ParseError> { + if let Some(state) = dirstate_node.state()? { + let path = dirstate_node.full_path(self.dmap.on_disk)?; + if self.matcher.matches(path) { if let EntryState::Removed = state { - self.outcome - .lock() - .unwrap() - .removed - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Removed, dirstate_node)? } else { - self.outcome - .lock() - .unwrap() - .deleted - .push(hg_path.detach_from_tree()) + self.push_outcome(Outcome::Deleted, &dirstate_node)? } } } + Ok(()) } /// Something in the filesystem has no corresponding dirstate node @@ -684,19 +703,17 @@ let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); if is_ignored { if self.options.list_ignored { - self.outcome - .lock() - .unwrap() - .ignored - .push(hg_path.detach_from_tree()) + self.push_outcome_without_copy_source( + Outcome::Ignored, + hg_path, + ) } } else { if self.options.list_unknown { - self.outcome - .lock() - .unwrap() - .unknown - .push(hg_path.detach_from_tree()) + self.push_outcome_without_copy_source( + Outcome::Unknown, + hg_path, + ) } } is_ignored
--- a/rust/hg-cpython/src/dirstate/status.rs Fri Dec 10 16:57:39 2021 +0100 +++ b/rust/hg-cpython/src/dirstate/status.rs Fri Dec 10 16:18:58 2021 +0100 @@ -15,6 +15,7 @@ exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject, PyResult, PyTuple, Python, PythonObject, ToPyObject, }; +use hg::dirstate::status::StatusPath; use hg::{ matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher}, parse_pattern_syntax, @@ -27,15 +28,19 @@ }; use std::borrow::Borrow; +fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList { + collect_pybytes_list(py, paths.iter().map(|item| &*item.path)) +} + /// This will be useless once trait impls for collection are added to `PyBytes` /// upstream. fn collect_pybytes_list( py: Python, - collection: &[impl AsRef<HgPath>], + iter: impl Iterator<Item = impl AsRef<HgPath>>, ) -> PyList { let list = PyList::new(py, &[]); - for path in collection.iter() { + for path in iter { list.append( py, PyBytes::new(py, path.as_ref().as_bytes()).into_object(), @@ -121,6 +126,8 @@ }) .collect(); let ignore_files = ignore_files?; + // The caller may call `copymap.items()` separately + let list_copies = false; match matcher.get_type(py).name(py).borrow() { "alwaysmatcher" => { @@ -135,6 +142,7 @@ list_clean, list_ignored, list_unknown, + list_copies, collect_traversed_dirs, }, ) @@ -171,6 +179,7 @@ list_clean, list_ignored, list_unknown, + list_copies, collect_traversed_dirs, }, ) @@ -222,6 +231,7 @@ list_clean, list_ignored, list_unknown, + list_copies, collect_traversed_dirs, }, ) @@ -241,16 +251,16 @@ status_res: DirstateStatus, warnings: Vec<PatternFileWarning>, ) -> PyResult<PyTuple> { - let modified = collect_pybytes_list(py, status_res.modified.as_ref()); - let added = collect_pybytes_list(py, status_res.added.as_ref()); - let removed = collect_pybytes_list(py, status_res.removed.as_ref()); - let deleted = collect_pybytes_list(py, status_res.deleted.as_ref()); - let clean = collect_pybytes_list(py, status_res.clean.as_ref()); - let ignored = collect_pybytes_list(py, status_res.ignored.as_ref()); - let unknown = collect_pybytes_list(py, status_res.unknown.as_ref()); - let unsure = collect_pybytes_list(py, status_res.unsure.as_ref()); - let bad = collect_bad_matches(py, status_res.bad.as_ref())?; - let traversed = collect_pybytes_list(py, status_res.traversed.as_ref()); + let modified = collect_status_path_list(py, &status_res.modified); + let added = collect_status_path_list(py, &status_res.added); + let removed = collect_status_path_list(py, &status_res.removed); + let deleted = collect_status_path_list(py, &status_res.deleted); + let clean = collect_status_path_list(py, &status_res.clean); + let ignored = collect_status_path_list(py, &status_res.ignored); + let unknown = collect_status_path_list(py, &status_res.unknown); + let unsure = collect_status_path_list(py, &status_res.unsure); + let bad = collect_bad_matches(py, &status_res.bad)?; + let traversed = collect_pybytes_list(py, status_res.traversed.iter()); let dirty = status_res.dirty.to_py_object(py); let py_warnings = PyList::new(py, &[]); for warning in warnings.iter() {
--- a/rust/rhg/src/commands/status.rs Fri Dec 10 16:57:39 2021 +0100 +++ b/rust/rhg/src/commands/status.rs Fri Dec 10 16:18:58 2021 +0100 @@ -13,6 +13,7 @@ use hg; use hg::config::Config; use hg::dirstate::has_exec_bit; +use hg::dirstate::status::StatusPath; use hg::dirstate::TruncatedTimestamp; use hg::dirstate::RANGE_MASK_31BIT; use hg::errors::{HgError, IoResultExt}; @@ -23,7 +24,7 @@ use hg::utils::files::get_bytes_from_os_string; use hg::utils::files::get_path_from_bytes; use hg::utils::hg_path::{hg_path_to_path_buf, HgPath}; -use hg::{HgPathCow, StatusOptions}; +use hg::StatusOptions; use log::{info, warn}; use std::io; use std::path::PathBuf; @@ -89,6 +90,12 @@ .long("--ignored"), ) .arg( + Arg::with_name("copies") + .help("show source of copied files (DEFAULT: ui.statuscopies)") + .short("-C") + .long("--copies"), + ) + .arg( Arg::with_name("no-status") .help("hide status prefix") .short("-n") @@ -174,7 +181,8 @@ let ui = invocation.ui; let config = invocation.config; let args = invocation.subcommand_args; - let display_states = if args.is_present("all") { + let all = args.is_present("all"); + let display_states = if all { // TODO when implementing `--quiet`: it excludes clean files // from `--all` ALL_DISPLAY_STATES @@ -195,6 +203,9 @@ } }; let no_status = args.is_present("no-status"); + let list_copies = all + || args.is_present("copies") + || config.get_bool(b"ui", b"statuscopies")?; let repo = invocation.repo?; @@ -213,6 +224,7 @@ list_clean: display_states.clean, list_unknown: display_states.unknown, list_ignored: display_states.ignored, + list_copies, collect_traversed_dirs: false, }; let (mut ds_status, pattern_warnings) = dmap.status( @@ -231,7 +243,7 @@ if !ds_status.unsure.is_empty() { info!( "Files to be rechecked by retrieval from filelog: {:?}", - &ds_status.unsure + ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>() ); } let mut fixup = Vec::new(); @@ -243,7 +255,7 @@ CommandError::from((e, &*format!("{:x}", p1.short()))) })?; for to_check in ds_status.unsure { - if unsure_is_modified(repo, &manifest, &to_check)? { + if unsure_is_modified(repo, &manifest, &to_check.path)? { if display_states.modified { ds_status.modified.push(to_check); } @@ -251,7 +263,7 @@ if display_states.clean { ds_status.clean.push(to_check.clone()); } - fixup.push(to_check.into_owned()) + fixup.push(to_check.path.into_owned()) } } } @@ -392,10 +404,10 @@ fn display( &self, status_prefix: &[u8], - mut paths: Vec<HgPathCow>, + mut paths: Vec<StatusPath<'_>>, ) -> Result<(), CommandError> { paths.sort_unstable(); - for path in paths { + for StatusPath { path, copy_source } in paths { let relative; let path = if let Some(relativize) = &self.relativize { relative = relativize.relativize(&path); @@ -414,6 +426,12 @@ path ))? } + if let Some(source) = copy_source { + self.ui.write_stdout(&format_bytes!( + b" {}\n", + source.as_bytes() + ))? + } } Ok(()) }
--- a/tests/test-rename-dir-merge.t Fri Dec 10 16:57:39 2021 +0100 +++ b/tests/test-rename-dir-merge.t Fri Dec 10 16:18:58 2021 +0100 @@ -1,7 +1,3 @@ -TODO: fix rhg bugs that make this test fail when status is enabled - $ unset RHG_STATUS - - $ hg init t $ cd t
--- a/tests/test-status.t Fri Dec 10 16:57:39 2021 +0100 +++ b/tests/test-status.t Fri Dec 10 16:18:58 2021 +0100 @@ -9,10 +9,6 @@ > EOF #endif -TODO: fix rhg bugs that make this test fail when status is enabled - $ unset RHG_STATUS - - $ hg init repo1 $ cd repo1 $ mkdir a b a/1 b/1 b/2 @@ -223,7 +219,7 @@ ? unknown hg status -n: - $ env RHG_STATUS=1 RHG_ON_UNSUPPORTED=abort hg status -n + $ env RHG_ON_UNSUPPORTED=abort hg status -n added removed deleted