# HG changeset patch # User Raphaël Gomès # Date 1667387134 -3600 # Node ID 363923bd51cdd83a04571707e2d2f4c213dffef7 # Parent ca19335e86e586d4efa801317af5b8b48b9796e8 dirstate-v2: hash the source of the ignore patterns as well Fixes the test introduced in the last changeset. This caused the hash to change, which means that the check in the test had to be adapted. Since this hash is only done as a caching mechanism, invalidation does not pose any backwards compatibility issues. diff -r ca19335e86e5 -r 363923bd51cd mercurial/helptext/internals/dirstate-v2.txt --- a/mercurial/helptext/internals/dirstate-v2.txt Wed Nov 02 15:24:39 2022 +0100 +++ b/mercurial/helptext/internals/dirstate-v2.txt Wed Nov 02 12:05:34 2022 +0100 @@ -283,8 +283,16 @@ in inclusion order. This definition is recursive, as included files can themselves include more files. -This hash is defined as the SHA-1 of the concatenation (in sorted -order) of the "expanded contents" of each "root" ignore file. +* "filepath" as the bytes of the ignore file path + relative to the root of the repository if inside the repository, + or the untouched path as defined in the configuration. + +This hash is defined as the SHA-1 of the following line format: + + \n + +for each "root" ignore file. (in sorted order) + (Note that computing this does not require actually concatenating into a single contiguous byte sequence. Instead a SHA-1 hasher object can be created diff -r ca19335e86e5 -r 363923bd51cd rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs Wed Nov 02 15:24:39 2022 +0100 +++ b/rust/hg-core/src/dirstate_tree/status.rs Wed Nov 02 12:05:34 2022 +0100 @@ -10,6 +10,7 @@ use crate::matchers::get_ignore_function; use crate::matchers::Matcher; use crate::utils::files::get_bytes_from_os_string; +use crate::utils::files::get_bytes_from_path; use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::BadMatch; @@ -66,7 +67,7 @@ let (ignore_fn, warnings) = get_ignore_function( ignore_files, &root_dir, - &mut |_pattern_bytes| {}, + &mut |_source, _pattern_bytes| {}, )?; (ignore_fn, warnings, None) } @@ -75,7 +76,24 @@ let (ignore_fn, warnings) = get_ignore_function( ignore_files, &root_dir, - &mut |pattern_bytes| hasher.update(pattern_bytes), + &mut |source, pattern_bytes| { + // If inside the repo, use the relative version to + // make it deterministic inside tests. + // The performance hit should be negligible. + let source = source + .strip_prefix(&root_dir) + .unwrap_or(source); + let source = get_bytes_from_path(source); + + let mut subhasher = Sha1::new(); + subhasher.update(pattern_bytes); + let patterns_hash = subhasher.finalize(); + + hasher.update(source); + hasher.update(b" "); + hasher.update(patterns_hash); + hasher.update(b"\n"); + }, )?; let new_hash = *hasher.finalize().as_ref(); let changed = new_hash != dmap.ignore_patterns_hash; diff -r ca19335e86e5 -r 363923bd51cd rust/hg-core/src/filepatterns.rs --- a/rust/hg-core/src/filepatterns.rs Wed Nov 02 15:24:39 2022 +0100 +++ b/rust/hg-core/src/filepatterns.rs Wed Nov 02 12:05:34 2022 +0100 @@ -412,11 +412,11 @@ pub fn read_pattern_file( file_path: &Path, warn: bool, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> Result<(Vec, Vec), PatternError> { match std::fs::read(file_path) { Ok(contents) => { - inspect_pattern_bytes(&contents); + inspect_pattern_bytes(file_path, &contents); parse_pattern_file_contents(&contents, file_path, None, warn) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( @@ -455,7 +455,7 @@ pub fn get_patterns_from_file( pattern_file: &Path, root_dir: &Path, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> PatternResult<(Vec, Vec)> { let (patterns, mut warnings) = read_pattern_file(pattern_file, true, inspect_pattern_bytes)?; diff -r ca19335e86e5 -r 363923bd51cd rust/hg-core/src/matchers.rs --- a/rust/hg-core/src/matchers.rs Wed Nov 02 15:24:39 2022 +0100 +++ b/rust/hg-core/src/matchers.rs Wed Nov 02 12:05:34 2022 +0100 @@ -838,7 +838,7 @@ pub fn get_ignore_matcher<'a>( mut all_pattern_files: Vec, root_dir: &Path, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> PatternResult<(IncludeMatcher<'a>, Vec)> { let mut all_patterns = vec![]; let mut all_warnings = vec![]; @@ -871,7 +871,7 @@ pub fn get_ignore_function<'a>( all_pattern_files: Vec, root_dir: &Path, - inspect_pattern_bytes: &mut impl FnMut(&[u8]), + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), ) -> PatternResult<(IgnoreFnType<'a>, Vec)> { let res = get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes); diff -r ca19335e86e5 -r 363923bd51cd rust/rhg/src/commands/debugignorerhg.rs --- a/rust/rhg/src/commands/debugignorerhg.rs Wed Nov 02 15:24:39 2022 +0100 +++ b/rust/rhg/src/commands/debugignorerhg.rs Wed Nov 02 12:05:34 2022 +0100 @@ -25,7 +25,7 @@ let (ignore_matcher, warnings) = get_ignore_matcher( vec![ignore_file], &repo.working_directory_path().to_owned(), - &mut |_pattern_bytes| (), + &mut |_source, _pattern_bytes| (), ) .map_err(|e| StatusError::from(e))?; diff -r ca19335e86e5 -r 363923bd51cd tests/test-hgignore.t --- a/tests/test-hgignore.t Wed Nov 02 15:24:39 2022 +0100 +++ b/tests/test-hgignore.t Wed Nov 02 12:05:34 2022 +0100 @@ -421,18 +421,24 @@ Check the hash of ignore patterns written in the dirstate This is an optimization that is only relevant when using the Rust extensions + $ cat_filename_and_hash () { + > for i in "$@"; do + > printf "$i " + > cat "$i" | "$TESTDIR"/f --raw-sha1 | sed 's/^raw-sha1=//' + > done + > } $ hg status > /dev/null - $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 - sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff + $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 + sha1=c0beb296395d48ced8e14f39009c4ea6e409bfe6 $ hg debugstate --docket | grep ignore - ignore pattern hash: 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff + ignore pattern hash: c0beb296395d48ced8e14f39009c4ea6e409bfe6 $ echo rel > .hg/testhgignorerel $ hg status > /dev/null - $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 - sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e + $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 + sha1=b8e63d3428ec38abc68baa27631516d5ec46b7fa $ hg debugstate --docket | grep ignore - ignore pattern hash: dea19cc7119213f24b6b582a4bae7b0cb063e34e + ignore pattern hash: b8e63d3428ec38abc68baa27631516d5ec46b7fa $ cd .. Check that the hash depends on the source of the hgignore patterns @@ -460,6 +466,6 @@ $ hg status M dir1/.hgignore M dir2/.hgignore - ? dir1/subdir/ignored1 (missing-correct-output !) + ? dir1/subdir/ignored1 #endif