view rust/rhg/src/commands/status.rs @ 51212:13f58ce70299

rust-revlog: teach the revlog opening code to read the repo options This will become necessary as we start writing revlog data from Rust.
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 18 Sep 2023 17:11:11 +0200
parents ac3859a8b796
children eab5b061cd48
line wrap: on
line source

// status.rs
//
// Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

use crate::error::CommandError;
use crate::ui::{
    format_pattern_file_warning, print_narrow_sparse_warnings, relative_paths,
    RelativePaths, Ui,
};
use crate::utils::path_utils::RelativizePaths;
use clap::Arg;
use format_bytes::format_bytes;
use hg::config::Config;
use hg::dirstate::has_exec_bit;
use hg::dirstate::status::StatusPath;
use hg::dirstate::TruncatedTimestamp;
use hg::errors::{HgError, IoResultExt};
use hg::filepatterns::parse_pattern_args;
use hg::lock::LockError;
use hg::manifest::Manifest;
use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
use hg::repo::Repo;
use hg::utils::debug::debug_wait_for_file;
use hg::utils::files::{
    get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
};
use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
use hg::PatternFileWarning;
use hg::Revision;
use hg::StatusError;
use hg::StatusOptions;
use hg::{self, narrow, sparse};
use hg::{DirstateStatus, RevlogOpenOptions};
use log::info;
use rayon::prelude::*;
use std::borrow::Cow;
use std::io;
use std::mem::take;
use std::path::PathBuf;

pub const HELP_TEXT: &str = "
Show changed files in the working directory

This is a pure Rust version of `hg status`.

Some options might be missing, check the list below.
";

pub fn args() -> clap::Command {
    clap::command!("status")
        .alias("st")
        .about(HELP_TEXT)
        .arg(
            Arg::new("file")
                .value_parser(clap::value_parser!(std::ffi::OsString))
                .help("show only these files")
                .action(clap::ArgAction::Append),
        )
        .arg(
            Arg::new("all")
                .help("show status of all files")
                .short('A')
                .action(clap::ArgAction::SetTrue)
                .long("all"),
        )
        .arg(
            Arg::new("modified")
                .help("show only modified files")
                .short('m')
                .action(clap::ArgAction::SetTrue)
                .long("modified"),
        )
        .arg(
            Arg::new("added")
                .help("show only added files")
                .short('a')
                .action(clap::ArgAction::SetTrue)
                .long("added"),
        )
        .arg(
            Arg::new("removed")
                .help("show only removed files")
                .short('r')
                .action(clap::ArgAction::SetTrue)
                .long("removed"),
        )
        .arg(
            Arg::new("clean")
                .help("show only clean files")
                .short('c')
                .action(clap::ArgAction::SetTrue)
                .long("clean"),
        )
        .arg(
            Arg::new("deleted")
                .help("show only deleted files")
                .short('d')
                .action(clap::ArgAction::SetTrue)
                .long("deleted"),
        )
        .arg(
            Arg::new("unknown")
                .help("show only unknown (not tracked) files")
                .short('u')
                .action(clap::ArgAction::SetTrue)
                .long("unknown"),
        )
        .arg(
            Arg::new("ignored")
                .help("show only ignored files")
                .short('i')
                .action(clap::ArgAction::SetTrue)
                .long("ignored"),
        )
        .arg(
            Arg::new("copies")
                .help("show source of copied files (DEFAULT: ui.statuscopies)")
                .short('C')
                .action(clap::ArgAction::SetTrue)
                .long("copies"),
        )
        .arg(
            Arg::new("print0")
                .help("end filenames with NUL, for use with xargs")
                .short('0')
                .action(clap::ArgAction::SetTrue)
                .long("print0"),
        )
        .arg(
            Arg::new("no-status")
                .help("hide status prefix")
                .short('n')
                .action(clap::ArgAction::SetTrue)
                .long("no-status"),
        )
        .arg(
            Arg::new("verbose")
                .help("enable additional output")
                .short('v')
                .action(clap::ArgAction::SetTrue)
                .long("verbose"),
        )
        .arg(
            Arg::new("rev")
                .help("show difference from/to revision")
                .long("rev")
                .num_args(1)
                .action(clap::ArgAction::Append)
                .value_name("REV"),
        )
}

fn parse_revpair(
    repo: &Repo,
    revs: Option<Vec<String>>,
) -> Result<Option<(Revision, Revision)>, CommandError> {
    let revs = match revs {
        None => return Ok(None),
        Some(revs) => revs,
    };
    if revs.is_empty() {
        return Ok(None);
    }
    if revs.len() != 2 {
        return Err(CommandError::unsupported("expected 0 or 2 --rev flags"));
    }

    let rev1 = &revs[0];
    let rev2 = &revs[1];
    let rev1 = hg::revset::resolve_single(rev1, repo)
        .map_err(|e| (e, rev1.as_str()))?;
    let rev2 = hg::revset::resolve_single(rev2, repo)
        .map_err(|e| (e, rev2.as_str()))?;
    Ok(Some((rev1, rev2)))
}

/// Pure data type allowing the caller to specify file states to display
#[derive(Copy, Clone, Debug)]
pub struct DisplayStates {
    pub modified: bool,
    pub added: bool,
    pub removed: bool,
    pub clean: bool,
    pub deleted: bool,
    pub unknown: bool,
    pub ignored: bool,
}

pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
    modified: true,
    added: true,
    removed: true,
    clean: false,
    deleted: true,
    unknown: true,
    ignored: false,
};

pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
    modified: true,
    added: true,
    removed: true,
    clean: true,
    deleted: true,
    unknown: true,
    ignored: true,
};

impl DisplayStates {
    pub fn is_empty(&self) -> bool {
        !(self.modified
            || self.added
            || self.removed
            || self.clean
            || self.deleted
            || self.unknown
            || self.ignored)
    }
}

fn has_unfinished_merge(repo: &Repo) -> Result<bool, CommandError> {
    Ok(repo.dirstate_parents()?.is_merge())
}

fn has_unfinished_state(repo: &Repo) -> Result<bool, CommandError> {
    // These are all the known values for the [fname] argument of
    // [addunfinished] function in [state.py]
    let known_state_files: &[&str] = &[
        "bisect.state",
        "graftstate",
        "histedit-state",
        "rebasestate",
        "shelvedstate",
        "transplant/journal",
        "updatestate",
    ];
    if has_unfinished_merge(repo)? {
        return Ok(true);
    };
    for f in known_state_files {
        if repo.hg_vfs().join(f).exists() {
            return Ok(true);
        }
    }
    Ok(false)
}

pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
    // TODO: lift these limitations
    if invocation
        .config
        .get(b"commands", b"status.terse")
        .is_some()
    {
        return Err(CommandError::unsupported(
            "status.terse is not yet supported with rhg status",
        ));
    }

    let ui = invocation.ui;
    let config = invocation.config;
    let args = invocation.subcommand_args;

    let revs = args.get_many::<String>("rev");
    let print0 = args.get_flag("print0");
    let verbose = args.get_flag("verbose")
        || config.get_bool(b"ui", b"verbose")?
        || config.get_bool(b"commands", b"status.verbose")?;
    let verbose = verbose && !print0;

    let all = args.get_flag("all");
    let display_states = if all {
        // TODO when implementing `--quiet`: it excludes clean files
        // from `--all`
        ALL_DISPLAY_STATES
    } else {
        let requested = DisplayStates {
            modified: args.get_flag("modified"),
            added: args.get_flag("added"),
            removed: args.get_flag("removed"),
            clean: args.get_flag("clean"),
            deleted: args.get_flag("deleted"),
            unknown: args.get_flag("unknown"),
            ignored: args.get_flag("ignored"),
        };
        if requested.is_empty() {
            DEFAULT_DISPLAY_STATES
        } else {
            requested
        }
    };
    let no_status = args.get_flag("no-status");
    let list_copies = all
        || args.get_flag("copies")
        || config.get_bool(b"ui", b"statuscopies")?;

    let repo = invocation.repo?;
    let revpair = parse_revpair(repo, revs.map(|i| i.cloned().collect()))?;

    if verbose && has_unfinished_state(repo)? {
        return Err(CommandError::unsupported(
            "verbose status output is not supported by rhg (and is needed because we're in an unfinished operation)",
        ));
    }

    let mut dmap = repo.dirstate_map_mut()?;

    let check_exec = hg::checkexec::check_exec(repo.working_directory_path());

    let options = StatusOptions {
        check_exec,
        list_clean: display_states.clean,
        list_unknown: display_states.unknown,
        list_ignored: display_states.ignored,
        list_copies,
        collect_traversed_dirs: false,
    };

    type StatusResult<'a> =
        Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>;

    let relative_status = config
        .get_option(b"commands", b"status.relative")?
        .expect("commands.status.relative should have a default value");

    let relativize_paths = relative_status || {
        // See in Python code with `getuipathfn` usage in `commands.py`.
        let legacy_relative_behavior = args.contains_id("file");
        match relative_paths(invocation.config)? {
            RelativePaths::Legacy => legacy_relative_behavior,
            RelativePaths::Bool(v) => v,
        }
    };

    let mut output = DisplayStatusPaths {
        ui,
        no_status,
        relativize: if relativize_paths {
            Some(RelativizePaths::new(repo)?)
        } else {
            None
        },
        print0,
    };

    let after_status = |res: StatusResult| -> Result<_, CommandError> {
        let (mut ds_status, pattern_warnings) = res?;
        for warning in pattern_warnings {
            ui.write_stderr(&format_pattern_file_warning(&warning, repo))?;
        }

        for (path, error) in take(&mut ds_status.bad) {
            let error = match error {
                hg::BadMatch::OsError(code) => {
                    std::io::Error::from_raw_os_error(code).to_string()
                }
                hg::BadMatch::BadType(ty) => {
                    format!("unsupported file type (type is {})", ty)
                }
            };
            ui.write_stderr(&format_bytes!(
                b"{}: {}\n",
                path.as_bytes(),
                error.as_bytes()
            ))?
        }
        if !ds_status.unsure.is_empty() {
            info!(
                "Files to be rechecked by retrieval from filelog: {:?}",
                ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
            );
        }
        let mut fixup = Vec::new();
        if !ds_status.unsure.is_empty()
            && (display_states.modified || display_states.clean)
        {
            let p1 = repo.dirstate_parents()?.p1;
            let manifest = repo.manifest_for_node(p1).map_err(|e| {
                CommandError::from((e, &*format!("{:x}", p1.short())))
            })?;
            let working_directory_vfs = repo.working_directory_vfs();
            let store_vfs = repo.store_vfs();
            let revlog_open_options = repo.default_revlog_options(false)?;
            let res: Vec<_> = take(&mut ds_status.unsure)
                .into_par_iter()
                .map(|to_check| {
                    // The compiler seems to get a bit confused with complex
                    // inference when using a parallel iterator + map
                    // + map_err + collect, so let's just inline some of the
                    // logic.
                    match unsure_is_modified(
                        working_directory_vfs,
                        store_vfs,
                        check_exec,
                        &manifest,
                        &to_check.path,
                        revlog_open_options,
                    ) {
                        Err(HgError::IoError { .. }) => {
                            // IO errors most likely stem from the file being
                            // deleted even though we know it's in the
                            // dirstate.
                            Ok((to_check, UnsureOutcome::Deleted))
                        }
                        Ok(outcome) => Ok((to_check, outcome)),
                        Err(e) => Err(e),
                    }
                })
                .collect::<Result<_, _>>()?;
            for (status_path, outcome) in res.into_iter() {
                match outcome {
                    UnsureOutcome::Clean => {
                        if display_states.clean {
                            ds_status.clean.push(status_path.clone());
                        }
                        fixup.push(status_path.path.into_owned())
                    }
                    UnsureOutcome::Modified => {
                        if display_states.modified {
                            ds_status.modified.push(status_path);
                        }
                    }
                    UnsureOutcome::Deleted => {
                        if display_states.deleted {
                            ds_status.deleted.push(status_path);
                        }
                    }
                }
            }
        }

        let dirstate_write_needed = ds_status.dirty;
        let filesystem_time_at_status_start =
            ds_status.filesystem_time_at_status_start;

        output.output(display_states, ds_status)?;

        Ok((
            fixup,
            dirstate_write_needed,
            filesystem_time_at_status_start,
        ))
    };
    let (narrow_matcher, narrow_warnings) = narrow::matcher(repo)?;

    match revpair {
        Some((rev1, rev2)) => {
            let mut ds_status = DirstateStatus::default();
            if list_copies {
                return Err(CommandError::unsupported(
                    "status --rev --rev with copy information is not implemented yet",
                ));
            }

            let stat = hg::operations::status_rev_rev_no_copies(
                repo,
                rev1,
                rev2,
                narrow_matcher,
            )?;
            for entry in stat.iter() {
                let (path, status) = entry?;
                let path = StatusPath {
                    path: Cow::Borrowed(path),
                    copy_source: None,
                };
                match status {
                    hg::operations::DiffStatus::Removed => {
                        if display_states.removed {
                            ds_status.removed.push(path)
                        }
                    }
                    hg::operations::DiffStatus::Added => {
                        if display_states.added {
                            ds_status.added.push(path)
                        }
                    }
                    hg::operations::DiffStatus::Modified => {
                        if display_states.modified {
                            ds_status.modified.push(path)
                        }
                    }
                    hg::operations::DiffStatus::Matching => {
                        if display_states.clean {
                            ds_status.clean.push(path)
                        }
                    }
                }
            }
            output.output(display_states, ds_status)?;
            return Ok(());
        }
        None => (),
    }

    let (sparse_matcher, sparse_warnings) = sparse::matcher(repo)?;
    let matcher = match (repo.has_narrow(), repo.has_sparse()) {
        (true, true) => {
            Box::new(IntersectionMatcher::new(narrow_matcher, sparse_matcher))
        }
        (true, false) => narrow_matcher,
        (false, true) => sparse_matcher,
        (false, false) => Box::new(AlwaysMatcher),
    };
    let matcher = match args.get_many::<std::ffi::OsString>("file") {
        None => matcher,
        Some(files) => {
            let patterns: Vec<Vec<u8>> = files
                .filter(|s| !s.is_empty())
                .map(get_bytes_from_os_str)
                .collect();
            for file in &patterns {
                if file.starts_with(b"set:") {
                    return Err(CommandError::unsupported("fileset"));
                }
            }
            let cwd = hg::utils::current_dir()?;
            let root = repo.working_directory_path();
            let ignore_patterns = parse_pattern_args(patterns, &cwd, root)?;
            let files_matcher =
                hg::matchers::PatternMatcher::new(ignore_patterns)?;
            Box::new(IntersectionMatcher::new(
                Box::new(files_matcher),
                matcher,
            ))
        }
    };

    print_narrow_sparse_warnings(
        &narrow_warnings,
        &sparse_warnings,
        ui,
        repo,
    )?;
    let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) =
        dmap.with_status(
            matcher.as_ref(),
            repo.working_directory_path().to_owned(),
            ignore_files(repo, config),
            options,
            after_status,
        )?;

    // Development config option to test write races
    if let Err(e) =
        debug_wait_for_file(config, "status.pre-dirstate-write-file")
    {
        ui.write_stderr(e.as_bytes()).ok();
    }

    if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
        && !dirstate_write_needed
    {
        // Nothing to update
        return Ok(());
    }

    // Update the dirstate on disk if we can
    let with_lock_result =
        repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
            if let Some(mtime_boundary) = filesystem_time_at_status_start {
                for hg_path in fixup {
                    use std::os::unix::fs::MetadataExt;
                    let fs_path = hg_path_to_path_buf(&hg_path)
                        .expect("HgPath conversion");
                    // Specifically do not reuse `fs_metadata` from
                    // `unsure_is_clean` which was needed before reading
                    // contents. Here we access metadata again after reading
                    // content, in case it changed in the meantime.
                    let metadata_res = repo
                        .working_directory_vfs()
                        .symlink_metadata(&fs_path);
                    let fs_metadata = match metadata_res {
                        Ok(meta) => meta,
                        Err(err) => match err {
                            HgError::IoError { .. } => {
                                // The file has probably been deleted. In any
                                // case, it was in the dirstate before, so
                                // let's ignore the error.
                                continue;
                            }
                            _ => return Err(err.into()),
                        },
                    };
                    if let Some(mtime) =
                        TruncatedTimestamp::for_reliable_mtime_of(
                            &fs_metadata,
                            &mtime_boundary,
                        )
                        .when_reading_file(&fs_path)?
                    {
                        let mode = fs_metadata.mode();
                        let size = fs_metadata.len();
                        dmap.set_clean(&hg_path, mode, size as u32, mtime)?;
                        dirstate_write_needed = true
                    }
                }
            }
            drop(dmap); // Avoid "already mutably borrowed" RefCell panics
            if dirstate_write_needed {
                repo.write_dirstate()?
            }
            Ok(())
        });
    match with_lock_result {
        Ok(closure_result) => closure_result?,
        Err(LockError::AlreadyHeld) => {
            // Not updating the dirstate is not ideal but not critical:
            // don’t keep our caller waiting until some other Mercurial
            // process releases the lock.
            log::info!("not writing dirstate from `status`: lock is held")
        }
        Err(LockError::Other(HgError::IoError { error, .. }))
            if error.kind() == io::ErrorKind::PermissionDenied =>
        {
            // `hg status` on a read-only repository is fine
        }
        Err(LockError::Other(error)) => {
            // Report other I/O errors
            Err(error)?
        }
    }
    Ok(())
}

fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
    let mut ignore_files = Vec::new();
    let repo_ignore = repo.working_directory_vfs().join(".hgignore");
    if repo_ignore.exists() {
        ignore_files.push(repo_ignore)
    }
    for (key, value) in config.iter_section(b"ui") {
        if key == b"ignore" || key.starts_with(b"ignore.") {
            let path = get_path_from_bytes(value);
            // TODO: expand "~/" and environment variable here, like Python
            // does with `os.path.expanduser` and `os.path.expandvars`

            let joined = repo.working_directory_path().join(path);
            ignore_files.push(joined);
        }
    }
    ignore_files
}

struct DisplayStatusPaths<'a> {
    ui: &'a Ui,
    no_status: bool,
    relativize: Option<RelativizePaths>,
    print0: bool,
}

impl DisplayStatusPaths<'_> {
    // Probably more elegant to use a Deref or Borrow trait rather than
    // harcode HgPathBuf, but probably not really useful at this point
    fn display(
        &self,
        status_prefix: &[u8],
        label: &'static str,
        mut paths: Vec<StatusPath<'_>>,
    ) -> Result<(), CommandError> {
        paths.sort_unstable();
        // TODO: get the stdout lock once for the whole loop
        // instead of in each write
        for StatusPath { path, copy_source } in paths {
            let relative_path;
            let relative_source;
            let (path, copy_source) = if let Some(relativize) =
                &self.relativize
            {
                relative_path = relativize.relativize(&path);
                relative_source =
                    copy_source.as_ref().map(|s| relativize.relativize(s));
                (&*relative_path, relative_source.as_deref())
            } else {
                (path.as_bytes(), copy_source.as_ref().map(|s| s.as_bytes()))
            };
            // TODO: Add a way to use `write_bytes!` instead of `format_bytes!`
            // in order to stream to stdout instead of allocating an
            // itermediate `Vec<u8>`.
            if !self.no_status {
                self.ui.write_stdout_labelled(status_prefix, label)?
            }
            let linebreak = if self.print0 { b"\x00" } else { b"\n" };
            self.ui.write_stdout_labelled(
                &format_bytes!(b"{}{}", path, linebreak),
                label,
            )?;
            if let Some(source) = copy_source.filter(|_| !self.no_status) {
                let label = "status.copied";
                self.ui.write_stdout_labelled(
                    &format_bytes!(b"  {}{}", source, linebreak),
                    label,
                )?
            }
        }
        Ok(())
    }

    fn output(
        &mut self,
        display_states: DisplayStates,
        ds_status: DirstateStatus,
    ) -> Result<(), CommandError> {
        if display_states.modified {
            self.display(b"M ", "status.modified", ds_status.modified)?;
        }
        if display_states.added {
            self.display(b"A ", "status.added", ds_status.added)?;
        }
        if display_states.removed {
            self.display(b"R ", "status.removed", ds_status.removed)?;
        }
        if display_states.deleted {
            self.display(b"! ", "status.deleted", ds_status.deleted)?;
        }
        if display_states.unknown {
            self.display(b"? ", "status.unknown", ds_status.unknown)?;
        }
        if display_states.ignored {
            self.display(b"I ", "status.ignored", ds_status.ignored)?;
        }
        if display_states.clean {
            self.display(b"C ", "status.clean", ds_status.clean)?;
        }
        Ok(())
    }
}

/// Outcome of the additional check for an ambiguous tracked file
enum UnsureOutcome {
    /// The file is actually clean
    Clean,
    /// The file has been modified
    Modified,
    /// The file was deleted on disk (or became another type of fs entry)
    Deleted,
}

/// Check if a file is modified by comparing actual repo store and file system.
///
/// This meant to be used for those that the dirstate cannot resolve, due
/// to time resolution limits.
fn unsure_is_modified(
    working_directory_vfs: hg::vfs::Vfs,
    store_vfs: hg::vfs::Vfs,
    check_exec: bool,
    manifest: &Manifest,
    hg_path: &HgPath,
    revlog_open_options: RevlogOpenOptions,
) -> Result<UnsureOutcome, HgError> {
    let vfs = working_directory_vfs;
    let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
    let fs_metadata = vfs.symlink_metadata(&fs_path)?;
    let is_symlink = fs_metadata.file_type().is_symlink();

    let entry = manifest
        .find_by_path(hg_path)?
        .expect("ambgious file not in p1");

    // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
    // dirstate
    let fs_flags = if is_symlink {
        Some(b'l')
    } else if check_exec && has_exec_bit(&fs_metadata) {
        Some(b'x')
    } else {
        None
    };

    let entry_flags = if check_exec {
        entry.flags
    } else if entry.flags == Some(b'x') {
        None
    } else {
        entry.flags
    };

    if entry_flags != fs_flags {
        return Ok(UnsureOutcome::Modified);
    }
    let filelog = hg::filelog::Filelog::open_vfs(
        &store_vfs,
        hg_path,
        revlog_open_options,
    )?;
    let fs_len = fs_metadata.len();
    let file_node = entry.node_id()?;
    let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
        HgError::corrupted(format!(
            "filelog {:?} missing node {:?} from manifest",
            hg_path, file_node
        ))
    })?;
    if filelog_entry.file_data_len_not_equal_to(fs_len) {
        // No need to read file contents:
        // it cannot be equal if it has a different length.
        return Ok(UnsureOutcome::Modified);
    }

    let p1_filelog_data = filelog_entry.data()?;
    let p1_contents = p1_filelog_data.file_data()?;
    if p1_contents.len() as u64 != fs_len {
        // No need to read file contents:
        // it cannot be equal if it has a different length.
        return Ok(UnsureOutcome::Modified);
    }

    let fs_contents = if is_symlink {
        get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
    } else {
        vfs.read(fs_path)?
    };

    Ok(if p1_contents != &*fs_contents {
        UnsureOutcome::Modified
    } else {
        UnsureOutcome::Clean
    })
}