changeset 48454:473af5cbc209

rhg: Add support for `rhg status --copies` Copy sources are collected during `status()` rather than after the fact like in Python, because `status()` takes a `&mut` exclusive reference to the dirstate map (in order to potentially mutate it for directory mtimes) and returns `Cow<'_, HgPath>` that borrow the dirstate map. Even though with `Cow` only some shared borrows remain, the still extend the same lifetime of the initial `&mut` so the dirstate map cannot be borrowed again to access copy sources after the fact: https://doc.rust-lang.org/nomicon/lifetime-mismatch.html#limits-of-lifetimes Additionally, collecting copy sources during the dirstate tree traversal that `status()` already does avoids the cost of another traversal or other lookups (though I haven’t benchmarked that cost). Differential Revision: https://phab.mercurial-scm.org/D11899
author Simon Sapin <simon.sapin@octobus.net>
date Fri, 10 Dec 2021 16:18:58 +0100
parents 9b0e1f64656f
children 04d756b7c0b5
files rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/status.rs rust/hg-cpython/src/dirstate/status.rs rust/rhg/src/commands/status.rs tests/test-rename-dir-merge.t tests/test-status.t
diffstat 7 files changed, 171 insertions(+), 107 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs	Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate/status.rs	Fri Dec 10 16:18:58 2021 +0100
@@ -66,41 +66,43 @@
     pub list_clean: bool,
     pub list_unknown: bool,
     pub list_ignored: bool,
+    /// Whether to populate `StatusPath::copy_source`
+    pub list_copies: bool,
     /// Whether to collect traversed dirs for applying a callback later.
     /// Used by `hg purge` for example.
     pub collect_traversed_dirs: bool,
 }
 
-#[derive(Debug, Default)]
+#[derive(Default)]
 pub struct DirstateStatus<'a> {
     /// The current time at the start of the `status()` algorithm, as measured
     /// and possibly truncated by the filesystem.
     pub filesystem_time_at_status_start: Option<std::time::SystemTime>,
 
     /// Tracked files whose contents have changed since the parent revision
-    pub modified: Vec<HgPathCow<'a>>,
+    pub modified: Vec<StatusPath<'a>>,
 
     /// Newly-tracked files that were not present in the parent
-    pub added: Vec<HgPathCow<'a>>,
+    pub added: Vec<StatusPath<'a>>,
 
     /// Previously-tracked files that have been (re)moved with an hg command
-    pub removed: Vec<HgPathCow<'a>>,
+    pub removed: Vec<StatusPath<'a>>,
 
     /// (Still) tracked files that are missing, (re)moved with an non-hg
     /// command
-    pub deleted: Vec<HgPathCow<'a>>,
+    pub deleted: Vec<StatusPath<'a>>,
 
     /// Tracked files that are up to date with the parent.
     /// Only pupulated if `StatusOptions::list_clean` is true.
-    pub clean: Vec<HgPathCow<'a>>,
+    pub clean: Vec<StatusPath<'a>>,
 
     /// Files in the working directory that are ignored with `.hgignore`.
     /// Only pupulated if `StatusOptions::list_ignored` is true.
-    pub ignored: Vec<HgPathCow<'a>>,
+    pub ignored: Vec<StatusPath<'a>>,
 
     /// Files in the working directory that are neither tracked nor ignored.
     /// Only pupulated if `StatusOptions::list_unknown` is true.
-    pub unknown: Vec<HgPathCow<'a>>,
+    pub unknown: Vec<StatusPath<'a>>,
 
     /// Was explicitly matched but cannot be found/accessed
     pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
@@ -108,7 +110,7 @@
     /// Either clean or modified, but we can’t tell from filesystem metadata
     /// alone. The file contents need to be read and compared with that in
     /// the parent.
-    pub unsure: Vec<HgPathCow<'a>>,
+    pub unsure: Vec<StatusPath<'a>>,
 
     /// Only filled if `collect_traversed_dirs` is `true`
     pub traversed: Vec<HgPathCow<'a>>,
@@ -118,6 +120,12 @@
     pub dirty: bool,
 }
 
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct StatusPath<'a> {
+    pub path: HgPathCow<'a>,
+    pub copy_source: Option<HgPathCow<'a>>,
+}
+
 #[derive(Debug, derive_more::From)]
 pub enum StatusError {
     /// Generic IO error
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Dec 10 16:18:58 2021 +0100
@@ -309,6 +309,25 @@
             NodeRef::OnDisk(node) => node.copy_source(on_disk),
         }
     }
+    /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
+    /// HgPath>` detached from `'tree`
+    pub(super) fn copy_source_borrowed(
+        &self,
+        on_disk: &'on_disk [u8],
+    ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
+    {
+        Ok(match self {
+            NodeRef::InMemory(_path, node) => {
+                node.copy_source.as_ref().map(|source| match source {
+                    Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
+                    Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
+                })
+            }
+            NodeRef::OnDisk(node) => node
+                .copy_source(on_disk)?
+                .map(|source| BorrowedPath::OnDisk(source)),
+        })
+    }
 
     pub(super) fn entry(
         &self,
--- a/rust/hg-core/src/dirstate_tree/status.rs	Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Fri Dec 10 16:18:58 2021 +0100
@@ -1,5 +1,6 @@
 use crate::dirstate::entry::TruncatedTimestamp;
 use crate::dirstate::status::IgnoreFnType;
+use crate::dirstate::status::StatusPath;
 use crate::dirstate_tree::dirstate_map::BorrowedPath;
 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
 use crate::dirstate_tree::dirstate_map::DirstateMap;
@@ -15,6 +16,7 @@
 use crate::DirstateStatus;
 use crate::EntryState;
 use crate::HgPathBuf;
+use crate::HgPathCow;
 use crate::PatternFileWarning;
 use crate::StatusError;
 use crate::StatusOptions;
@@ -146,7 +148,65 @@
     filesystem_time_at_status_start: Option<SystemTime>,
 }
 
+enum Outcome {
+    Modified,
+    Added,
+    Removed,
+    Deleted,
+    Clean,
+    Ignored,
+    Unknown,
+    Unsure,
+}
+
 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
+    fn push_outcome(
+        &self,
+        which: Outcome,
+        dirstate_node: &NodeRef<'tree, 'on_disk>,
+    ) -> Result<(), DirstateV2ParseError> {
+        let path = dirstate_node
+            .full_path_borrowed(self.dmap.on_disk)?
+            .detach_from_tree();
+        let copy_source = if self.options.list_copies {
+            dirstate_node
+                .copy_source_borrowed(self.dmap.on_disk)?
+                .map(|source| source.detach_from_tree())
+        } else {
+            None
+        };
+        self.push_outcome_common(which, path, copy_source);
+        Ok(())
+    }
+
+    fn push_outcome_without_copy_source(
+        &self,
+        which: Outcome,
+        path: &BorrowedPath<'_, 'on_disk>,
+    ) {
+        self.push_outcome_common(which, path.detach_from_tree(), None)
+    }
+
+    fn push_outcome_common(
+        &self,
+        which: Outcome,
+        path: HgPathCow<'on_disk>,
+        copy_source: Option<HgPathCow<'on_disk>>,
+    ) {
+        let mut outcome = self.outcome.lock().unwrap();
+        let vec = match which {
+            Outcome::Modified => &mut outcome.modified,
+            Outcome::Added => &mut outcome.added,
+            Outcome::Removed => &mut outcome.removed,
+            Outcome::Deleted => &mut outcome.deleted,
+            Outcome::Clean => &mut outcome.clean,
+            Outcome::Ignored => &mut outcome.ignored,
+            Outcome::Unknown => &mut outcome.unknown,
+            Outcome::Unsure => &mut outcome.unsure,
+        };
+        vec.push(StatusPath { path, copy_source });
+    }
+
     fn read_dir(
         &self,
         hg_path: &HgPath,
@@ -347,10 +407,7 @@
             // If we previously had a file here, it was removed (with
             // `hg rm` or similar) or deleted before it could be
             // replaced by a directory or something else.
-            self.mark_removed_or_deleted_if_file(
-                &hg_path,
-                dirstate_node.state()?,
-            );
+            self.mark_removed_or_deleted_if_file(&dirstate_node)?;
         }
         if file_type.is_dir() {
             if self.options.collect_traversed_dirs {
@@ -381,24 +438,13 @@
             if file_or_symlink && self.matcher.matches(hg_path) {
                 if let Some(state) = dirstate_node.state()? {
                     match state {
-                        EntryState::Added => self
-                            .outcome
-                            .lock()
-                            .unwrap()
-                            .added
-                            .push(hg_path.detach_from_tree()),
+                        EntryState::Added => {
+                            self.push_outcome(Outcome::Added, &dirstate_node)?
+                        }
                         EntryState::Removed => self
-                            .outcome
-                            .lock()
-                            .unwrap()
-                            .removed
-                            .push(hg_path.detach_from_tree()),
+                            .push_outcome(Outcome::Removed, &dirstate_node)?,
                         EntryState::Merged => self
-                            .outcome
-                            .lock()
-                            .unwrap()
-                            .modified
-                            .push(hg_path.detach_from_tree()),
+                            .push_outcome(Outcome::Modified, &dirstate_node)?,
                         EntryState::Normal => self
                             .handle_normal_file(&dirstate_node, fs_metadata)?,
                     }
@@ -510,7 +556,6 @@
         let entry = dirstate_node
             .entry()?
             .expect("handle_normal_file called with entry-less node");
-        let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
         let mode_changed =
             || self.options.check_exec && entry.mode_changed(fs_metadata);
         let size = entry.size();
@@ -518,20 +563,12 @@
         if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
             // issue6456: Size returned may be longer due to encryption
             // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
-            self.outcome
-                .lock()
-                .unwrap()
-                .unsure
-                .push(hg_path.detach_from_tree())
+            self.push_outcome(Outcome::Unsure, dirstate_node)?
         } else if dirstate_node.has_copy_source()
             || entry.is_from_other_parent()
             || (size >= 0 && (size_changed || mode_changed()))
         {
-            self.outcome
-                .lock()
-                .unwrap()
-                .modified
-                .push(hg_path.detach_from_tree())
+            self.push_outcome(Outcome::Modified, dirstate_node)?
         } else {
             let mtime_looks_clean;
             if let Some(dirstate_mtime) = entry.truncated_mtime() {
@@ -548,17 +585,9 @@
                 mtime_looks_clean = false
             };
             if !mtime_looks_clean {
-                self.outcome
-                    .lock()
-                    .unwrap()
-                    .unsure
-                    .push(hg_path.detach_from_tree())
+                self.push_outcome(Outcome::Unsure, dirstate_node)?
             } else if self.options.list_clean {
-                self.outcome
-                    .lock()
-                    .unwrap()
-                    .clean
-                    .push(hg_path.detach_from_tree())
+                self.push_outcome(Outcome::Clean, dirstate_node)?
             }
         }
         Ok(())
@@ -570,10 +599,7 @@
         dirstate_node: NodeRef<'tree, 'on_disk>,
     ) -> Result<(), DirstateV2ParseError> {
         self.check_for_outdated_directory_cache(&dirstate_node)?;
-        self.mark_removed_or_deleted_if_file(
-            &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
-            dirstate_node.state()?,
-        );
+        self.mark_removed_or_deleted_if_file(&dirstate_node)?;
         dirstate_node
             .children(self.dmap.on_disk)?
             .par_iter()
@@ -587,26 +613,19 @@
     /// Does nothing on a "directory" node
     fn mark_removed_or_deleted_if_file(
         &self,
-        hg_path: &BorrowedPath<'tree, 'on_disk>,
-        dirstate_node_state: Option<EntryState>,
-    ) {
-        if let Some(state) = dirstate_node_state {
-            if self.matcher.matches(hg_path) {
+        dirstate_node: &NodeRef<'tree, 'on_disk>,
+    ) -> Result<(), DirstateV2ParseError> {
+        if let Some(state) = dirstate_node.state()? {
+            let path = dirstate_node.full_path(self.dmap.on_disk)?;
+            if self.matcher.matches(path) {
                 if let EntryState::Removed = state {
-                    self.outcome
-                        .lock()
-                        .unwrap()
-                        .removed
-                        .push(hg_path.detach_from_tree())
+                    self.push_outcome(Outcome::Removed, dirstate_node)?
                 } else {
-                    self.outcome
-                        .lock()
-                        .unwrap()
-                        .deleted
-                        .push(hg_path.detach_from_tree())
+                    self.push_outcome(Outcome::Deleted, &dirstate_node)?
                 }
             }
         }
+        Ok(())
     }
 
     /// Something in the filesystem has no corresponding dirstate node
@@ -684,19 +703,17 @@
         let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
         if is_ignored {
             if self.options.list_ignored {
-                self.outcome
-                    .lock()
-                    .unwrap()
-                    .ignored
-                    .push(hg_path.detach_from_tree())
+                self.push_outcome_without_copy_source(
+                    Outcome::Ignored,
+                    hg_path,
+                )
             }
         } else {
             if self.options.list_unknown {
-                self.outcome
-                    .lock()
-                    .unwrap()
-                    .unknown
-                    .push(hg_path.detach_from_tree())
+                self.push_outcome_without_copy_source(
+                    Outcome::Unknown,
+                    hg_path,
+                )
             }
         }
         is_ignored
--- a/rust/hg-cpython/src/dirstate/status.rs	Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/hg-cpython/src/dirstate/status.rs	Fri Dec 10 16:18:58 2021 +0100
@@ -15,6 +15,7 @@
     exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
     PyResult, PyTuple, Python, PythonObject, ToPyObject,
 };
+use hg::dirstate::status::StatusPath;
 use hg::{
     matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
     parse_pattern_syntax,
@@ -27,15 +28,19 @@
 };
 use std::borrow::Borrow;
 
+fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
+    collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
+}
+
 /// This will be useless once trait impls for collection are added to `PyBytes`
 /// upstream.
 fn collect_pybytes_list(
     py: Python,
-    collection: &[impl AsRef<HgPath>],
+    iter: impl Iterator<Item = impl AsRef<HgPath>>,
 ) -> PyList {
     let list = PyList::new(py, &[]);
 
-    for path in collection.iter() {
+    for path in iter {
         list.append(
             py,
             PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
@@ -121,6 +126,8 @@
         })
         .collect();
     let ignore_files = ignore_files?;
+    // The caller may call `copymap.items()` separately
+    let list_copies = false;
 
     match matcher.get_type(py).name(py).borrow() {
         "alwaysmatcher" => {
@@ -135,6 +142,7 @@
                         list_clean,
                         list_ignored,
                         list_unknown,
+                        list_copies,
                         collect_traversed_dirs,
                     },
                 )
@@ -171,6 +179,7 @@
                         list_clean,
                         list_ignored,
                         list_unknown,
+                        list_copies,
                         collect_traversed_dirs,
                     },
                 )
@@ -222,6 +231,7 @@
                         list_clean,
                         list_ignored,
                         list_unknown,
+                        list_copies,
                         collect_traversed_dirs,
                     },
                 )
@@ -241,16 +251,16 @@
     status_res: DirstateStatus,
     warnings: Vec<PatternFileWarning>,
 ) -> PyResult<PyTuple> {
-    let modified = collect_pybytes_list(py, status_res.modified.as_ref());
-    let added = collect_pybytes_list(py, status_res.added.as_ref());
-    let removed = collect_pybytes_list(py, status_res.removed.as_ref());
-    let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
-    let clean = collect_pybytes_list(py, status_res.clean.as_ref());
-    let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
-    let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
-    let unsure = collect_pybytes_list(py, status_res.unsure.as_ref());
-    let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
-    let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
+    let modified = collect_status_path_list(py, &status_res.modified);
+    let added = collect_status_path_list(py, &status_res.added);
+    let removed = collect_status_path_list(py, &status_res.removed);
+    let deleted = collect_status_path_list(py, &status_res.deleted);
+    let clean = collect_status_path_list(py, &status_res.clean);
+    let ignored = collect_status_path_list(py, &status_res.ignored);
+    let unknown = collect_status_path_list(py, &status_res.unknown);
+    let unsure = collect_status_path_list(py, &status_res.unsure);
+    let bad = collect_bad_matches(py, &status_res.bad)?;
+    let traversed = collect_pybytes_list(py, status_res.traversed.iter());
     let dirty = status_res.dirty.to_py_object(py);
     let py_warnings = PyList::new(py, &[]);
     for warning in warnings.iter() {
--- a/rust/rhg/src/commands/status.rs	Fri Dec 10 16:57:39 2021 +0100
+++ b/rust/rhg/src/commands/status.rs	Fri Dec 10 16:18:58 2021 +0100
@@ -13,6 +13,7 @@
 use hg;
 use hg::config::Config;
 use hg::dirstate::has_exec_bit;
+use hg::dirstate::status::StatusPath;
 use hg::dirstate::TruncatedTimestamp;
 use hg::dirstate::RANGE_MASK_31BIT;
 use hg::errors::{HgError, IoResultExt};
@@ -23,7 +24,7 @@
 use hg::utils::files::get_bytes_from_os_string;
 use hg::utils::files::get_path_from_bytes;
 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
-use hg::{HgPathCow, StatusOptions};
+use hg::StatusOptions;
 use log::{info, warn};
 use std::io;
 use std::path::PathBuf;
@@ -89,6 +90,12 @@
                 .long("--ignored"),
         )
         .arg(
+            Arg::with_name("copies")
+                .help("show source of copied files (DEFAULT: ui.statuscopies)")
+                .short("-C")
+                .long("--copies"),
+        )
+        .arg(
             Arg::with_name("no-status")
                 .help("hide status prefix")
                 .short("-n")
@@ -174,7 +181,8 @@
     let ui = invocation.ui;
     let config = invocation.config;
     let args = invocation.subcommand_args;
-    let display_states = if args.is_present("all") {
+    let all = args.is_present("all");
+    let display_states = if all {
         // TODO when implementing `--quiet`: it excludes clean files
         // from `--all`
         ALL_DISPLAY_STATES
@@ -195,6 +203,9 @@
         }
     };
     let no_status = args.is_present("no-status");
+    let list_copies = all
+        || args.is_present("copies")
+        || config.get_bool(b"ui", b"statuscopies")?;
 
     let repo = invocation.repo?;
 
@@ -213,6 +224,7 @@
         list_clean: display_states.clean,
         list_unknown: display_states.unknown,
         list_ignored: display_states.ignored,
+        list_copies,
         collect_traversed_dirs: false,
     };
     let (mut ds_status, pattern_warnings) = dmap.status(
@@ -231,7 +243,7 @@
     if !ds_status.unsure.is_empty() {
         info!(
             "Files to be rechecked by retrieval from filelog: {:?}",
-            &ds_status.unsure
+            ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
         );
     }
     let mut fixup = Vec::new();
@@ -243,7 +255,7 @@
             CommandError::from((e, &*format!("{:x}", p1.short())))
         })?;
         for to_check in ds_status.unsure {
-            if unsure_is_modified(repo, &manifest, &to_check)? {
+            if unsure_is_modified(repo, &manifest, &to_check.path)? {
                 if display_states.modified {
                     ds_status.modified.push(to_check);
                 }
@@ -251,7 +263,7 @@
                 if display_states.clean {
                     ds_status.clean.push(to_check.clone());
                 }
-                fixup.push(to_check.into_owned())
+                fixup.push(to_check.path.into_owned())
             }
         }
     }
@@ -392,10 +404,10 @@
     fn display(
         &self,
         status_prefix: &[u8],
-        mut paths: Vec<HgPathCow>,
+        mut paths: Vec<StatusPath<'_>>,
     ) -> Result<(), CommandError> {
         paths.sort_unstable();
-        for path in paths {
+        for StatusPath { path, copy_source } in paths {
             let relative;
             let path = if let Some(relativize) = &self.relativize {
                 relative = relativize.relativize(&path);
@@ -414,6 +426,12 @@
                     path
                 ))?
             }
+            if let Some(source) = copy_source {
+                self.ui.write_stdout(&format_bytes!(
+                    b"  {}\n",
+                    source.as_bytes()
+                ))?
+            }
         }
         Ok(())
     }
--- a/tests/test-rename-dir-merge.t	Fri Dec 10 16:57:39 2021 +0100
+++ b/tests/test-rename-dir-merge.t	Fri Dec 10 16:18:58 2021 +0100
@@ -1,7 +1,3 @@
-TODO: fix rhg bugs that make this test fail when status is enabled
-  $ unset RHG_STATUS
-
-
   $ hg init t
   $ cd t
 
--- a/tests/test-status.t	Fri Dec 10 16:57:39 2021 +0100
+++ b/tests/test-status.t	Fri Dec 10 16:18:58 2021 +0100
@@ -9,10 +9,6 @@
   > EOF
 #endif
 
-TODO: fix rhg bugs that make this test fail when status is enabled
-  $ unset RHG_STATUS
-
-
   $ hg init repo1
   $ cd repo1
   $ mkdir a b a/1 b/1 b/2
@@ -223,7 +219,7 @@
   ? unknown
 
 hg status -n:
-  $ env RHG_STATUS=1 RHG_ON_UNSUPPORTED=abort hg status -n
+  $ env RHG_ON_UNSUPPORTED=abort hg status -n
   added
   removed
   deleted