Mercurial > hg
changeset 47335:ed1583a845d2
dirstate-v2: Make more APIs fallible, returning Result
When parsing becomes lazy, parse error will potentially happen in more places.
This propagates such errors to callers.
Differential Revision: https://phab.mercurial-scm.org/D10749
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 13:15:00 +0200 |
parents | 18b3060fe598 |
children | 8d0260d0dbc9 |
files | rust/hg-core/src/dirstate.rs rust/hg-core/src/dirstate/dirs_multiset.rs rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/parsers.rs rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/dispatch.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/status.rs rust/hg-core/src/lib.rs rust/hg-cpython/src/dirstate/copymap.rs rust/hg-cpython/src/dirstate/dirs_multiset.rs rust/hg-cpython/src/dirstate/dirstate_map.rs rust/hg-cpython/src/dirstate/dispatch.rs rust/hg-cpython/src/dirstate/non_normal_entries.rs |
diffstat | 15 files changed, 636 insertions(+), 336 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate.rs Wed May 19 13:15:00 2021 +0200 @@ -5,6 +5,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::errors::HgError; use crate::revlog::Node; use crate::utils::hg_path::{HgPath, HgPathBuf}; @@ -76,12 +77,19 @@ pub const SIZE_FROM_OTHER_PARENT: i32 = -2; pub type StateMap = FastHashMap<HgPathBuf, DirstateEntry>; -pub type StateMapIter<'a> = - Box<dyn Iterator<Item = (&'a HgPath, DirstateEntry)> + Send + 'a>; +pub type StateMapIter<'a> = Box< + dyn Iterator< + Item = Result<(&'a HgPath, DirstateEntry), DirstateV2ParseError>, + > + Send + + 'a, +>; pub type CopyMap = FastHashMap<HgPathBuf, HgPathBuf>; -pub type CopyMapIter<'a> = - Box<dyn Iterator<Item = (&'a HgPath, &'a HgPath)> + Send + 'a>; +pub type CopyMapIter<'a> = Box< + dyn Iterator<Item = Result<(&'a HgPath, &'a HgPath), DirstateV2ParseError>> + + Send + + 'a, +>; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum EntryState {
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs Wed May 19 13:15:00 2021 +0200 @@ -8,13 +8,14 @@ //! A multiset of directory names. //! //! Used to counts the references to directories in a manifest or dirstate. +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::{ dirstate::EntryState, utils::{ files, hg_path::{HgPath, HgPathBuf, HgPathError}, }, - DirstateEntry, DirstateMapError, FastHashMap, + DirstateEntry, DirstateError, DirstateMapError, FastHashMap, }; use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet}; @@ -33,15 +34,18 @@ pub fn from_dirstate<I, P>( dirstate: I, skip_state: Option<EntryState>, - ) -> Result<Self, DirstateMapError> + ) -> Result<Self, DirstateError> where - I: IntoIterator<Item = (P, DirstateEntry)>, + I: IntoIterator< + Item = Result<(P, DirstateEntry), DirstateV2ParseError>, + >, P: AsRef<HgPath>, { let mut multiset = DirsMultiset { inner: FastHashMap::default(), }; - for (filename, entry) in dirstate { + for item in dirstate { + let (filename, entry) = item?; let filename = filename.as_ref(); // This `if` is optimized out of the loop if let Some(skip) = skip_state { @@ -337,8 +341,11 @@ }; assert_eq!(expected, new); - let new = - DirsMultiset::from_dirstate(StateMap::default(), None).unwrap(); + let new = DirsMultiset::from_dirstate( + StateMap::default().into_iter().map(Ok), + None, + ) + .unwrap(); let expected = DirsMultiset { inner: FastHashMap::default(), }; @@ -362,20 +369,17 @@ }; assert_eq!(expected, new); - let input_map: HashMap<_, _> = ["b/x", "a/c", "a/d/x"] - .iter() - .map(|f| { - ( - HgPathBuf::from_bytes(f.as_bytes()), - DirstateEntry { - state: EntryState::Normal, - mode: 0, - mtime: 0, - size: 0, - }, - ) - }) - .collect(); + let input_map = ["b/x", "a/c", "a/d/x"].iter().map(|f| { + Ok(( + HgPathBuf::from_bytes(f.as_bytes()), + DirstateEntry { + state: EntryState::Normal, + mode: 0, + mtime: 0, + size: 0, + }, + )) + }); let expected_inner = [("", 2), ("a", 2), ("b", 1), ("a/d", 1)] .iter() .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v)) @@ -390,7 +394,7 @@ #[test] fn test_dirsmultiset_new_skip() { - let input_map: HashMap<_, _> = [ + let input_map = [ ("a/", EntryState::Normal), ("a/b", EntryState::Normal), ("a/c", EntryState::Removed), @@ -398,7 +402,7 @@ ] .iter() .map(|(f, state)| { - ( + Ok(( HgPathBuf::from_bytes(f.as_bytes()), DirstateEntry { state: *state, @@ -406,9 +410,8 @@ mtime: 0, size: 0, }, - ) - }) - .collect(); + )) + }); // "a" incremented with "a/c" and "a/d/" let expected_inner = [("", 1), ("a", 2)]
--- a/rust/hg-core/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -10,8 +10,8 @@ dirstate::EntryState, pack_dirstate, parse_dirstate, utils::hg_path::{HgPath, HgPathBuf}, - CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError, - DirstateParents, StateMap, + CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateParents, + StateMap, }; use micro_timer::timed; use std::collections::HashSet; @@ -66,7 +66,7 @@ filename: &HgPath, old_state: EntryState, entry: DirstateEntry, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { if old_state == EntryState::Unknown || old_state == EntryState::Removed { if let Some(ref mut dirs) = self.dirs { @@ -104,7 +104,7 @@ filename: &HgPath, old_state: EntryState, size: i32, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { if old_state != EntryState::Unknown && old_state != EntryState::Removed { if let Some(ref mut dirs) = self.dirs { @@ -138,7 +138,7 @@ &mut self, filename: &HgPath, old_state: EntryState, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { let exists = self.state_map.remove(filename).is_some(); if exists { @@ -246,20 +246,20 @@ /// emulate a Python lazy property, but it is ugly and unidiomatic. /// TODO One day, rewriting this struct using the typestate might be a /// good idea. - pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + pub fn set_all_dirs(&mut self) -> Result<(), DirstateError> { if self.all_dirs.is_none() { self.all_dirs = Some(DirsMultiset::from_dirstate( - self.state_map.iter().map(|(k, v)| (k, *v)), + self.state_map.iter().map(|(k, v)| Ok((k, *v))), None, )?); } Ok(()) } - pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + pub fn set_dirs(&mut self) -> Result<(), DirstateError> { if self.dirs.is_none() { self.dirs = Some(DirsMultiset::from_dirstate( - self.state_map.iter().map(|(k, v)| (k, *v)), + self.state_map.iter().map(|(k, v)| Ok((k, *v))), Some(EntryState::Removed), )?); } @@ -269,7 +269,7 @@ pub fn has_tracked_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.set_dirs()?; Ok(self.dirs.as_ref().unwrap().contains(directory)) } @@ -277,7 +277,7 @@ pub fn has_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.set_all_dirs()?; Ok(self.all_dirs.as_ref().unwrap().contains(directory)) }
--- a/rust/hg-core/src/dirstate/parsers.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate/parsers.rs Wed May 19 13:15:00 2021 +0200 @@ -43,13 +43,18 @@ copies.push((path, source)); } entries.push((path, *entry)); + Ok(()) })?; Ok((parents, entries, copies)) } pub fn parse_dirstate_entries<'a>( mut contents: &'a [u8], - mut each_entry: impl FnMut(&'a HgPath, &DirstateEntry, Option<&'a HgPath>), + mut each_entry: impl FnMut( + &'a HgPath, + &DirstateEntry, + Option<&'a HgPath>, + ) -> Result<(), HgError>, ) -> Result<&'a DirstateParents, HgError> { let (parents, rest) = DirstateParents::from_bytes(contents) .map_err(|_| HgError::corrupted("Too little data for dirstate."))?; @@ -75,7 +80,7 @@ iter.next().expect("splitn always yields at least one item"), ); let copy_source = iter.next().map(HgPath::new); - each_entry(path, &entry, copy_source); + each_entry(path, &entry, copy_source)?; contents = rest; }
--- a/rust/hg-core/src/dirstate/status.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate/status.rs Wed May 19 13:15:00 2021 +0200 @@ -9,6 +9,7 @@ //! It is currently missing a lot of functionality compared to the Python one //! and will only be triggered in narrow cases. +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::utils::path_auditor::PathAuditor; use crate::{ dirstate::SIZE_FROM_OTHER_PARENT, @@ -302,6 +303,8 @@ Path(HgPathError), /// An invalid "ignore" pattern was found Pattern(PatternError), + /// Corrupted dirstate + DirstateV2ParseError(DirstateV2ParseError), } pub type StatusResult<T> = Result<T, StatusError>; @@ -312,6 +315,9 @@ StatusError::IO(error) => error.fmt(f), StatusError::Path(error) => error.fmt(f), StatusError::Pattern(error) => error.fmt(f), + StatusError::DirstateV2ParseError(_) => { + f.write_str("dirstate-v2 parse error") + } } } }
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -5,6 +5,7 @@ use std::path::PathBuf; use super::on_disk; +use super::on_disk::DirstateV2ParseError; use super::path_with_basename::WithBasename; use crate::dirstate::parsers::pack_entry; use crate::dirstate::parsers::packed_entry_size; @@ -15,7 +16,6 @@ use crate::CopyMapIter; use crate::DirstateEntry; use crate::DirstateError; -use crate::DirstateMapError; use crate::DirstateParents; use crate::DirstateStatus; use crate::EntryState; @@ -81,9 +81,12 @@ pub(super) fn make_mut( &mut self, - ) -> &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>> { + ) -> Result< + &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>, + DirstateV2ParseError, + > { match self { - ChildNodes::InMemory(nodes) => nodes, + ChildNodes::InMemory(nodes) => Ok(nodes), } } } @@ -92,11 +95,11 @@ pub(super) fn get( &self, base_name: &HgPath, - ) -> Option<NodeRef<'tree, 'on_disk>> { + ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> { match self { - ChildNodesRef::InMemory(nodes) => nodes + ChildNodesRef::InMemory(nodes) => Ok(nodes .get_key_value(base_name) - .map(|(k, v)| NodeRef::InMemory(k, v)), + .map(|(k, v)| NodeRef::InMemory(k, v))), } } @@ -131,9 +134,14 @@ .iter() .map(|(k, v)| NodeRef::InMemory(k, v)) .collect(); + fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath { + match node { + NodeRef::InMemory(path, _node) => path.base_name(), + } + } // `sort_unstable_by_key` doesn’t allow keys borrowing from the // value: https://github.com/rust-lang/rust/issues/34162 - vec.sort_unstable_by(|a, b| a.base_name().cmp(b.base_name())); + vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b))); vec } } @@ -141,35 +149,51 @@ } impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> { - pub(super) fn full_path(&self) -> &'tree HgPath { + pub(super) fn full_path( + &self, + ) -> Result<&'tree HgPath, DirstateV2ParseError> { match self { - NodeRef::InMemory(path, _node) => path.full_path(), + NodeRef::InMemory(path, _node) => Ok(path.full_path()), } } /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree - pub(super) fn full_path_cow(&self) -> Cow<'on_disk, HgPath> { + pub(super) fn full_path_cow( + &self, + ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> { match self { - NodeRef::InMemory(path, _node) => path.full_path().clone(), + NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()), + } + } + + pub(super) fn base_name( + &self, + ) -> Result<&'tree HgPath, DirstateV2ParseError> { + match self { + NodeRef::InMemory(path, _node) => Ok(path.base_name()), } } - pub(super) fn base_name(&self) -> &'tree HgPath { + pub(super) fn children( + &self, + ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> { match self { - NodeRef::InMemory(path, _node) => path.base_name(), + NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()), } } - pub(super) fn children(&self) -> ChildNodesRef<'tree, 'on_disk> { + pub(super) fn has_copy_source(&self) -> bool { match self { - NodeRef::InMemory(_path, node) => node.children.as_ref(), + NodeRef::InMemory(_path, node) => node.copy_source.is_some(), } } - pub(super) fn copy_source(&self) -> Option<&'tree HgPath> { + pub(super) fn copy_source( + &self, + ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => { - node.copy_source.as_ref().map(|s| &**s) + Ok(node.copy_source.as_ref().map(|s| &**s)) } } } @@ -180,15 +204,20 @@ } } - pub(super) fn entry(&self) -> Option<DirstateEntry> { + pub(super) fn entry( + &self, + ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { match self { - NodeRef::InMemory(_path, node) => node.entry, + NodeRef::InMemory(_path, node) => Ok(node.entry), } } - pub(super) fn state(&self) -> Option<EntryState> { + + pub(super) fn state( + &self, + ) -> Result<Option<EntryState>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => { - node.entry.as_ref().map(|entry| entry.state) + Ok(node.entry.as_ref().map(|entry| entry.state)) } } } @@ -253,7 +282,7 @@ ancestor.tracked_descendants_count += 1 } }, - ); + )?; assert!( node.entry.is_none(), "duplicate dirstate entry in read" @@ -268,6 +297,7 @@ if copy_source.is_some() { map.nodes_with_copy_source_count += 1 } + Ok(()) }, )?; let parents = Some(parents.clone()); @@ -278,18 +308,21 @@ fn get_node<'tree>( &'tree self, path: &HgPath, - ) -> Option<NodeRef<'tree, 'on_disk>> { + ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> { let mut children = self.root.as_ref(); let mut components = path.components(); let mut component = components.next().expect("expected at least one components"); loop { - let child = children.get(component)?; - if let Some(next_component) = components.next() { - component = next_component; - children = child.children(); + if let Some(child) = children.get(component)? { + if let Some(next_component) = components.next() { + component = next_component; + children = child.children()?; + } else { + return Ok(Some(child)); + } } else { - return Some(child); + return Ok(None); } } } @@ -301,18 +334,21 @@ fn get_node_mut<'tree>( root: &'tree mut ChildNodes<'on_disk>, path: &HgPath, - ) -> Option<&'tree mut Node<'on_disk>> { + ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> { let mut children = root; let mut components = path.components(); let mut component = components.next().expect("expected at least one components"); loop { - let child = children.make_mut().get_mut(component)?; - if let Some(next_component) = components.next() { - component = next_component; - children = &mut child.children; + if let Some(child) = children.make_mut()?.get_mut(component) { + if let Some(next_component) = components.next() { + component = next_component; + children = &mut child.children; + } else { + return Ok(Some(child)); + } } else { - return Some(child); + return Ok(None); } } } @@ -324,7 +360,7 @@ WithBasename<&'path HgPath>, ) -> WithBasename<Cow<'on_disk, HgPath>>, mut each_ancestor: impl FnMut(&mut Node), - ) -> &'tree mut Node<'on_disk> { + ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { let mut child_nodes = root; let mut inclusive_ancestor_paths = WithBasename::inclusive_ancestors_of(path); @@ -336,7 +372,7 @@ // map already contains that key, without introducing double // lookup? let child_node = child_nodes - .make_mut() + .make_mut()? .entry(to_cow(ancestor_path)) .or_default(); if let Some(next) = inclusive_ancestor_paths.next() { @@ -344,7 +380,7 @@ ancestor_path = next; child_nodes = &mut child_node.children; } else { - return child_node; + return Ok(child_node); } } } @@ -354,7 +390,7 @@ path: &HgPath, old_state: EntryState, new_entry: DirstateEntry, - ) { + ) -> Result<(), DirstateV2ParseError> { let tracked_count_increment = match (old_state.is_tracked(), new_entry.state.is_tracked()) { (false, true) => 1, @@ -376,16 +412,19 @@ _ => {} } }, - ); + )?; if node.entry.is_none() { self.nodes_with_entry_count += 1 } - node.entry = Some(new_entry) + node.entry = Some(new_entry); + Ok(()) } fn iter_nodes<'tree>( &'tree self, - ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> + 'tree { + ) -> impl Iterator< + Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>, + > + 'tree { // Depth first tree traversal. // // If we could afford internal iteration and recursion, @@ -409,8 +448,12 @@ let mut iter = self.root.as_ref().iter(); std::iter::from_fn(move || { while let Some(child_node) = iter.next() { + let children = match child_node.children() { + Ok(children) => children, + Err(error) => return Some(Err(error)), + }; // Pseudo-recursion - let new_iter = child_node.children().iter(); + let new_iter = children.iter(); let old_iter = std::mem::replace(&mut iter, new_iter); stack.push((child_node, old_iter)); } @@ -420,7 +463,7 @@ // explicit stack iter = next_iter; - Some(child_node) + Some(Ok(child_node)) } else { // Reached the bottom of the stack, we’re done None @@ -428,17 +471,62 @@ }) } - fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) { + fn clear_known_ambiguous_mtimes( + &mut self, + paths: &[impl AsRef<HgPath>], + ) -> Result<(), DirstateV2ParseError> { for path in paths { if let Some(node) = - Self::get_node_mut(&mut self.root, path.as_ref()) + Self::get_node_mut(&mut self.root, path.as_ref())? { if let Some(entry) = node.entry.as_mut() { entry.clear_mtime(); } } } + Ok(()) } + + /// Return a faillilble iterator of full paths of nodes that have an + /// `entry` for which the given `predicate` returns true. + /// + /// Fallibility means that each iterator item is a `Result`, which may + /// indicate a parse error of the on-disk dirstate-v2 format. Such errors + /// should only happen if Mercurial is buggy or a repository is corrupted. + fn filter_full_paths<'tree>( + &'tree self, + predicate: impl Fn(&DirstateEntry) -> bool + 'tree, + ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree + { + filter_map_results(self.iter_nodes(), move |node| { + if let Some(entry) = node.entry()? { + if predicate(&entry) { + return Ok(Some(node.full_path()?)); + } + } + Ok(None) + }) + } +} + +/// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. +/// +/// The callback is only called for incoming `Ok` values. Errors are passed +/// through as-is. In order to let it use the `?` operator the callback is +/// expected to return a `Result` of `Option`, instead of an `Option` of +/// `Result`. +fn filter_map_results<'a, I, F, A, B, E>( + iter: I, + f: F, +) -> impl Iterator<Item = Result<B, E>> + 'a +where + I: Iterator<Item = Result<A, E>> + 'a, + F: Fn(A) -> Result<Option<B>, E> + 'a, +{ + iter.filter_map(move |result| match result { + Ok(node) => f(node).transpose(), + Err(e) => Some(Err(e)), + }) } impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { @@ -453,9 +541,8 @@ filename: &HgPath, old_state: EntryState, entry: DirstateEntry, - ) -> Result<(), DirstateMapError> { - self.add_or_remove_file(filename, old_state, entry); - Ok(()) + ) -> Result<(), DirstateError> { + Ok(self.add_or_remove_file(filename, old_state, entry)?) } fn remove_file( @@ -463,36 +550,48 @@ filename: &HgPath, old_state: EntryState, size: i32, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { let entry = DirstateEntry { state: EntryState::Removed, mode: 0, size, mtime: 0, }; - self.add_or_remove_file(filename, old_state, entry); - Ok(()) + Ok(self.add_or_remove_file(filename, old_state, entry)?) } fn drop_file( &mut self, filename: &HgPath, old_state: EntryState, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { struct Dropped { was_tracked: bool, had_entry: bool, had_copy_source: bool, } - fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> { + fn recur( + nodes: &mut ChildNodes, + path: &HgPath, + ) -> Result<Option<Dropped>, DirstateV2ParseError> { let (first_path_component, rest_of_path) = path.split_first_component(); - let node = nodes.make_mut().get_mut(first_path_component)?; + let node = if let Some(node) = + nodes.make_mut()?.get_mut(first_path_component) + { + node + } else { + return Ok(None); + }; let dropped; if let Some(rest) = rest_of_path { - dropped = recur(&mut node.children, rest)?; - if dropped.was_tracked { - node.tracked_descendants_count -= 1; + if let Some(d) = recur(&mut node.children, rest)? { + dropped = d; + if dropped.was_tracked { + node.tracked_descendants_count -= 1; + } + } else { + return Ok(None); } } else { dropped = Dropped { @@ -510,12 +609,12 @@ && node.copy_source.is_none() && node.children.is_empty() { - nodes.make_mut().remove(first_path_component); + nodes.make_mut()?.remove(first_path_component); } - Some(dropped) + Ok(Some(dropped)) } - if let Some(dropped) = recur(&mut self.root, filename) { + if let Some(dropped) = recur(&mut self.root, filename)? { if dropped.had_entry { self.nodes_with_entry_count -= 1 } @@ -529,20 +628,31 @@ } } - fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { + fn clear_ambiguous_times( + &mut self, + filenames: Vec<HgPathBuf>, + now: i32, + ) -> Result<(), DirstateV2ParseError> { for filename in filenames { - if let Some(node) = Self::get_node_mut(&mut self.root, &filename) { + if let Some(node) = Self::get_node_mut(&mut self.root, &filename)? + { if let Some(entry) = node.entry.as_mut() { entry.clear_ambiguous_mtime(now); } } } + Ok(()) } - fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool { - self.get_node(key) - .and_then(|node| node.entry()) - .map_or(false, |entry| entry.is_non_normal()) + fn non_normal_entries_contains( + &mut self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { + Ok(if let Some(node) = self.get_node(key)? { + node.entry()?.map_or(false, |entry| entry.is_non_normal()) + } else { + false + }) } fn non_normal_entries_remove(&mut self, _key: &HgPath) { @@ -552,13 +662,10 @@ fn non_normal_or_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + '_> { - Box::new(self.iter_nodes().filter_map(|node| { - node.entry() - .filter(|entry| { - entry.is_non_normal() || entry.is_from_other_parent() - }) - .map(|_| node.full_path()) + ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_> + { + Box::new(self.filter_full_paths(|entry| { + entry.is_non_normal() || entry.is_from_other_parent() })) } @@ -569,35 +676,33 @@ fn iter_non_normal_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { self.iter_non_normal_paths_panic() } fn iter_non_normal_paths_panic( &self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { - Box::new(self.iter_nodes().filter_map(|node| { - node.entry() - .filter(|entry| entry.is_non_normal()) - .map(|_| node.full_path()) - })) + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { + Box::new(self.filter_full_paths(|entry| entry.is_non_normal())) } fn iter_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { - Box::new(self.iter_nodes().filter_map(|node| { - node.entry() - .filter(|entry| entry.is_from_other_parent()) - .map(|_| node.full_path()) - })) + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { + Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent())) } fn has_tracked_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError> { - if let Some(node) = self.get_node(directory) { + ) -> Result<bool, DirstateError> { + if let Some(node) = self.get_node(directory)? { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. Ok(!node.has_entry() && node.tracked_descendants_count() > 0) @@ -606,11 +711,8 @@ } } - fn has_dir( - &mut self, - directory: &HgPath, - ) -> Result<bool, DirstateMapError> { - if let Some(node) = self.get_node(directory) { + fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> { + if let Some(node) = self.get_node(directory)? { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. Ok(!node.has_entry()) @@ -631,25 +733,27 @@ // reallocations let mut size = parents.as_bytes().len(); for node in self.iter_nodes() { - if let Some(entry) = node.entry() { + let node = node?; + if let Some(entry) = node.entry()? { size += - packed_entry_size(node.full_path(), node.copy_source()); + packed_entry_size(node.full_path()?, node.copy_source()?); if entry.mtime_is_ambiguous(now) { - ambiguous_mtimes.push(node.full_path_cow()) + ambiguous_mtimes.push(node.full_path_cow()?) } } } - self.clear_known_ambiguous_mtimes(&ambiguous_mtimes); + self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?; let mut packed = Vec::with_capacity(size); packed.extend(parents.as_bytes()); for node in self.iter_nodes() { - if let Some(entry) = node.entry() { + let node = node?; + if let Some(entry) = node.entry()? { pack_entry( - node.full_path(), + node.full_path()?, &entry, - node.copy_source(), + node.copy_source()?, &mut packed, ); } @@ -667,26 +771,27 @@ let now: i32 = now.0.try_into().expect("time overflow"); let mut paths = Vec::new(); for node in self.iter_nodes() { - if let Some(entry) = node.entry() { + let node = node?; + if let Some(entry) = node.entry()? { if entry.mtime_is_ambiguous(now) { - paths.push(node.full_path_cow()) + paths.push(node.full_path_cow()?) } } } // Borrow of `self` ends here since we collect cloned paths - self.clear_known_ambiguous_mtimes(&paths); + self.clear_known_ambiguous_mtimes(&paths)?; on_disk::write(self, parents) } - fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_all_dirs(&mut self) -> Result<(), DirstateError> { // Do nothing, this `DirstateMap` does not a separate `all_dirs` that // needs to be recomputed Ok(()) } - fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_dirs(&mut self) -> Result<(), DirstateError> { // Do nothing, this `DirstateMap` does not a separate `dirs` that needs // to be recomputed Ok(()) @@ -708,66 +813,97 @@ } fn copy_map_iter(&self) -> CopyMapIter<'_> { - Box::new(self.iter_nodes().filter_map(|node| { - node.copy_source() - .map(|copy_source| (node.full_path(), copy_source)) + Box::new(filter_map_results(self.iter_nodes(), |node| { + Ok(if let Some(source) = node.copy_source()? { + Some((node.full_path()?, source)) + } else { + None + }) })) } - fn copy_map_contains_key(&self, key: &HgPath) -> bool { - if let Some(node) = self.get_node(key) { - node.copy_source().is_some() + fn copy_map_contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { + Ok(if let Some(node) = self.get_node(key)? { + node.has_copy_source() } else { false - } + }) } - fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> { - self.get_node(key)?.copy_source() + fn copy_map_get( + &self, + key: &HgPath, + ) -> Result<Option<&HgPath>, DirstateV2ParseError> { + if let Some(node) = self.get_node(key)? { + if let Some(source) = node.copy_source()? { + return Ok(Some(source)); + } + } + Ok(None) } - fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> { + fn copy_map_remove( + &mut self, + key: &HgPath, + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { let count = &mut self.nodes_with_copy_source_count; - Self::get_node_mut(&mut self.root, key).and_then(|node| { + Ok(Self::get_node_mut(&mut self.root, key)?.and_then(|node| { if node.copy_source.is_some() { *count -= 1 } node.copy_source.take().map(Cow::into_owned) - }) + })) } fn copy_map_insert( &mut self, key: HgPathBuf, value: HgPathBuf, - ) -> Option<HgPathBuf> { + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { let node = Self::get_or_insert_node( &mut self.root, &key, WithBasename::to_cow_owned, |_ancestor| {}, - ); + )?; if node.copy_source.is_none() { self.nodes_with_copy_source_count += 1 } - node.copy_source.replace(value.into()).map(Cow::into_owned) + Ok(node.copy_source.replace(value.into()).map(Cow::into_owned)) } fn len(&self) -> usize { self.nodes_with_entry_count as usize } - fn contains_key(&self, key: &HgPath) -> bool { - self.get(key).is_some() + fn contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { + Ok(self.get(key)?.is_some()) } - fn get(&self, key: &HgPath) -> Option<DirstateEntry> { - self.get_node(key)?.entry() + fn get( + &self, + key: &HgPath, + ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { + Ok(if let Some(node) = self.get_node(key)? { + node.entry()? + } else { + None + }) } fn iter(&self) -> StateMapIter<'_> { - Box::new(self.iter_nodes().filter_map(|node| { - node.entry().map(|entry| (node.full_path(), entry)) + Box::new(filter_map_results(self.iter_nodes(), |node| { + Ok(if let Some(entry) = node.entry()? { + Some((node.full_path()?, entry)) + } else { + None + }) })) } }
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200 @@ -1,13 +1,13 @@ use std::path::PathBuf; use crate::dirstate::parsers::Timestamp; +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::matchers::Matcher; use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::CopyMapIter; use crate::DirstateEntry; use crate::DirstateError; use crate::DirstateMap; -use crate::DirstateMapError; use crate::DirstateParents; use crate::DirstateStatus; use crate::EntryState; @@ -24,54 +24,64 @@ filename: &HgPath, old_state: EntryState, entry: DirstateEntry, - ) -> Result<(), DirstateMapError>; + ) -> Result<(), DirstateError>; fn remove_file( &mut self, filename: &HgPath, old_state: EntryState, size: i32, - ) -> Result<(), DirstateMapError>; + ) -> Result<(), DirstateError>; fn drop_file( &mut self, filename: &HgPath, old_state: EntryState, - ) -> Result<bool, DirstateMapError>; + ) -> Result<bool, DirstateError>; - fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32); + fn clear_ambiguous_times( + &mut self, + filenames: Vec<HgPathBuf>, + now: i32, + ) -> Result<(), DirstateV2ParseError>; - fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool; + fn non_normal_entries_contains( + &mut self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError>; fn non_normal_entries_remove(&mut self, key: &HgPath); fn non_normal_or_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + '_>; + ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>; fn set_non_normal_other_parent_entries(&mut self, force: bool); fn iter_non_normal_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_>; + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + >; fn iter_non_normal_paths_panic( &self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_>; + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + >; fn iter_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_>; + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + >; fn has_tracked_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError>; + ) -> Result<bool, DirstateError>; - fn has_dir( - &mut self, - directory: &HgPath, - ) -> Result<bool, DirstateMapError>; + fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError>; fn pack_v1( &mut self, @@ -85,9 +95,9 @@ now: Timestamp, ) -> Result<Vec<u8>, DirstateError>; - fn set_all_dirs(&mut self) -> Result<(), DirstateMapError>; + fn set_all_dirs(&mut self) -> Result<(), DirstateError>; - fn set_dirs(&mut self) -> Result<(), DirstateMapError>; + fn set_dirs(&mut self) -> Result<(), DirstateError>; fn status<'a>( &'a mut self, @@ -101,23 +111,36 @@ fn copy_map_iter(&self) -> CopyMapIter<'_>; - fn copy_map_contains_key(&self, key: &HgPath) -> bool; + fn copy_map_contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError>; - fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath>; + fn copy_map_get( + &self, + key: &HgPath, + ) -> Result<Option<&HgPath>, DirstateV2ParseError>; - fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf>; + fn copy_map_remove( + &mut self, + key: &HgPath, + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError>; fn copy_map_insert( &mut self, key: HgPathBuf, value: HgPathBuf, - ) -> Option<HgPathBuf>; + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError>; fn len(&self) -> usize; - fn contains_key(&self, key: &HgPath) -> bool; + fn contains_key(&self, key: &HgPath) + -> Result<bool, DirstateV2ParseError>; - fn get(&self, key: &HgPath) -> Option<DirstateEntry>; + fn get( + &self, + key: &HgPath, + ) -> Result<Option<DirstateEntry>, DirstateV2ParseError>; fn iter(&self) -> StateMapIter<'_>; } @@ -132,7 +155,7 @@ filename: &HgPath, old_state: EntryState, entry: DirstateEntry, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { self.add_file(filename, old_state, entry) } @@ -141,7 +164,7 @@ filename: &HgPath, old_state: EntryState, size: i32, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { self.remove_file(filename, old_state, size) } @@ -149,18 +172,25 @@ &mut self, filename: &HgPath, old_state: EntryState, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.drop_file(filename, old_state) } - fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { - self.clear_ambiguous_times(filenames, now) + fn clear_ambiguous_times( + &mut self, + filenames: Vec<HgPathBuf>, + now: i32, + ) -> Result<(), DirstateV2ParseError> { + Ok(self.clear_ambiguous_times(filenames, now)) } - fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool { + fn non_normal_entries_contains( + &mut self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { let (non_normal, _other_parent) = self.get_non_normal_other_parent_entries(); - non_normal.contains(key) + Ok(non_normal.contains(key)) } fn non_normal_entries_remove(&mut self, key: &HgPath) { @@ -169,10 +199,11 @@ fn non_normal_or_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + '_> { + ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_> + { let (non_normal, other_parent) = self.get_non_normal_other_parent_entries(); - Box::new(non_normal.union(other_parent).map(|p| &**p)) + Box::new(non_normal.union(other_parent).map(|p| Ok(&**p))) } fn set_non_normal_other_parent_entries(&mut self, force: bool) { @@ -181,39 +212,42 @@ fn iter_non_normal_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { let (non_normal, _other_parent) = self.get_non_normal_other_parent_entries(); - Box::new(non_normal.iter().map(|p| &**p)) + Box::new(non_normal.iter().map(|p| Ok(&**p))) } fn iter_non_normal_paths_panic( &self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { let (non_normal, _other_parent) = self.get_non_normal_other_parent_entries_panic(); - Box::new(non_normal.iter().map(|p| &**p)) + Box::new(non_normal.iter().map(|p| Ok(&**p))) } fn iter_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { let (_non_normal, other_parent) = self.get_non_normal_other_parent_entries(); - Box::new(other_parent.iter().map(|p| &**p)) + Box::new(other_parent.iter().map(|p| Ok(&**p))) } fn has_tracked_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.has_tracked_dir(directory) } - fn has_dir( - &mut self, - directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> { self.has_dir(directory) } @@ -235,11 +269,11 @@ ) } - fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_all_dirs(&mut self) -> Result<(), DirstateError> { self.set_all_dirs() } - fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_dirs(&mut self) -> Result<(), DirstateError> { self.set_dirs() } @@ -259,42 +293,61 @@ } fn copy_map_iter(&self) -> CopyMapIter<'_> { - Box::new(self.copy_map.iter().map(|(key, value)| (&**key, &**value))) - } - - fn copy_map_contains_key(&self, key: &HgPath) -> bool { - self.copy_map.contains_key(key) + Box::new( + self.copy_map + .iter() + .map(|(key, value)| Ok((&**key, &**value))), + ) } - fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> { - self.copy_map.get(key).map(|p| &**p) + fn copy_map_contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { + Ok(self.copy_map.contains_key(key)) } - fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> { - self.copy_map.remove(key) + fn copy_map_get( + &self, + key: &HgPath, + ) -> Result<Option<&HgPath>, DirstateV2ParseError> { + Ok(self.copy_map.get(key).map(|p| &**p)) + } + + fn copy_map_remove( + &mut self, + key: &HgPath, + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { + Ok(self.copy_map.remove(key)) } fn copy_map_insert( &mut self, key: HgPathBuf, value: HgPathBuf, - ) -> Option<HgPathBuf> { - self.copy_map.insert(key, value) + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { + Ok(self.copy_map.insert(key, value)) } fn len(&self) -> usize { (&**self).len() } - fn contains_key(&self, key: &HgPath) -> bool { - (&**self).contains_key(key) + fn contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { + Ok((&**self).contains_key(key)) } - fn get(&self, key: &HgPath) -> Option<DirstateEntry> { - (&**self).get(key).cloned() + fn get( + &self, + key: &HgPath, + ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { + Ok((&**self).get(key).cloned()) } fn iter(&self) -> StateMapIter<'_> { - Box::new((&**self).iter().map(|(key, value)| (&**key, *value))) + Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value)))) } }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 @@ -109,7 +109,10 @@ } /// Unexpected file format found in `.hg/dirstate` with the "v2" format. -pub(crate) struct DirstateV2ParseError; +/// +/// This should only happen if Mercurial is buggy or a repository is corrupted. +#[derive(Debug)] +pub struct DirstateV2ParseError; impl From<DirstateV2ParseError> for HgError { fn from(_: DirstateV2ParseError) -> Self { @@ -295,9 +298,9 @@ // First accumulate serialized nodes in a `Vec` let mut on_disk_nodes = Vec::with_capacity(nodes.len()); for node in nodes { - let children = write_nodes(node.children(), out)?; - let full_path = write_slice::<u8>(node.full_path().as_bytes(), out); - let copy_source = if let Some(source) = node.copy_source() { + let children = write_nodes(node.children()?, out)?; + let full_path = write_slice::<u8>(node.full_path()?.as_bytes(), out); + let copy_source = if let Some(source) = node.copy_source()? { write_slice::<u8>(source.as_bytes(), out) } else { Slice {
--- a/rust/hg-core/src/dirstate_tree/status.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/status.rs Wed May 19 13:15:00 2021 +0200 @@ -2,6 +2,7 @@ use crate::dirstate_tree::dirstate_map::ChildNodesRef; use crate::dirstate_tree::dirstate_map::DirstateMap; use crate::dirstate_tree::dirstate_map::NodeRef; +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::matchers::get_ignore_function; use crate::matchers::Matcher; use crate::utils::files::get_bytes_from_os_string; @@ -60,7 +61,7 @@ hg_path, &root_dir, is_at_repo_root, - ); + )?; Ok((common.outcome.into_inner().unwrap(), warnings)) } @@ -97,7 +98,7 @@ directory_hg_path: &'tree HgPath, directory_fs_path: &Path, is_at_repo_root: bool, - ) { + ) -> Result<(), DirstateV2ParseError> { let mut fs_entries = if let Ok(entries) = self.read_dir( directory_hg_path, directory_fs_path, @@ -105,7 +106,7 @@ ) { entries } else { - return; + return Ok(()); }; // `merge_join_by` requires both its input iterators to be sorted: @@ -115,34 +116,41 @@ // https://github.com/rust-lang/rust/issues/34162 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); + // Propagate here any error that would happen inside the comparison + // callback below + for dirstate_node in &dirstate_nodes { + dirstate_node.base_name()?; + } itertools::merge_join_by( dirstate_nodes, &fs_entries, |dirstate_node, fs_entry| { - dirstate_node.base_name().cmp(&fs_entry.base_name) + // This `unwrap` never panics because we already propagated + // those errors above + dirstate_node.base_name().unwrap().cmp(&fs_entry.base_name) }, ) .par_bridge() - .for_each(|pair| { + .map(|pair| { use itertools::EitherOrBoth::*; match pair { - Both(dirstate_node, fs_entry) => { - self.traverse_fs_and_dirstate( + Both(dirstate_node, fs_entry) => self + .traverse_fs_and_dirstate( fs_entry, dirstate_node, has_ignored_ancestor, - ); - } + ), Left(dirstate_node) => { self.traverse_dirstate_only(dirstate_node) } - Right(fs_entry) => self.traverse_fs_only( + Right(fs_entry) => Ok(self.traverse_fs_only( has_ignored_ancestor, directory_hg_path, fs_entry, - ), + )), } }) + .collect() } fn traverse_fs_and_dirstate( @@ -150,8 +158,8 @@ fs_entry: &DirEntry, dirstate_node: NodeRef<'tree, '_>, has_ignored_ancestor: bool, - ) { - let hg_path = dirstate_node.full_path(); + ) -> Result<(), DirstateV2ParseError> { + let hg_path = dirstate_node.full_path()?; let file_type = fs_entry.metadata.file_type(); let file_or_symlink = file_type.is_file() || file_type.is_symlink(); if !file_or_symlink { @@ -159,8 +167,8 @@ // `hg rm` or similar) or deleted before it could be // replaced by a directory or something else. self.mark_removed_or_deleted_if_file( - dirstate_node.full_path(), - dirstate_node.state(), + hg_path, + dirstate_node.state()?, ); } if file_type.is_dir() { @@ -171,15 +179,15 @@ let is_at_repo_root = false; self.traverse_fs_directory_and_dirstate( is_ignored, - dirstate_node.children(), + dirstate_node.children()?, hg_path, &fs_entry.full_path, is_at_repo_root, - ); + )? } else { if file_or_symlink && self.matcher.matches(hg_path) { let full_path = Cow::from(hg_path); - if let Some(state) = dirstate_node.state() { + if let Some(state) = dirstate_node.state()? { match state { EntryState::Added => { self.outcome.lock().unwrap().added.push(full_path) @@ -197,7 +205,7 @@ .modified .push(full_path), EntryState::Normal => { - self.handle_normal_file(&dirstate_node, fs_entry); + self.handle_normal_file(&dirstate_node, fs_entry)? } // This variant is not used in DirstateMap // nodes @@ -213,10 +221,11 @@ } } - for child_node in dirstate_node.children().iter() { - self.traverse_dirstate_only(child_node) + for child_node in dirstate_node.children()?.iter() { + self.traverse_dirstate_only(child_node)? } } + Ok(()) } /// A file with `EntryState::Normal` in the dirstate was found in the @@ -225,7 +234,7 @@ &self, dirstate_node: &NodeRef<'tree, '_>, fs_entry: &DirEntry, - ) { + ) -> Result<(), DirstateV2ParseError> { // Keep the low 31 bits fn truncate_u64(value: u64) -> i32 { (value & 0x7FFF_FFFF) as i32 @@ -235,9 +244,9 @@ } let entry = dirstate_node - .entry() + .entry()? .expect("handle_normal_file called with entry-less node"); - let full_path = Cow::from(dirstate_node.full_path()); + let full_path = Cow::from(dirstate_node.full_path()?); let mode_changed = || { self.options.check_exec && entry.mode_changed(&fs_entry.metadata) }; @@ -249,7 +258,7 @@ // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? self.outcome.lock().unwrap().unsure.push(full_path) - } else if dirstate_node.copy_source().is_some() + } else if dirstate_node.has_copy_source() || entry.is_from_other_parent() || (entry.size >= 0 && (size_changed || mode_changed())) { @@ -264,18 +273,23 @@ self.outcome.lock().unwrap().clean.push(full_path) } } + Ok(()) } /// A node in the dirstate tree has no corresponding filesystem entry - fn traverse_dirstate_only(&self, dirstate_node: NodeRef<'tree, '_>) { + fn traverse_dirstate_only( + &self, + dirstate_node: NodeRef<'tree, '_>, + ) -> Result<(), DirstateV2ParseError> { self.mark_removed_or_deleted_if_file( - dirstate_node.full_path(), - dirstate_node.state(), + dirstate_node.full_path()?, + dirstate_node.state()?, ); dirstate_node - .children() + .children()? .par_iter() - .for_each(|child_node| self.traverse_dirstate_only(child_node)) + .map(|child_node| self.traverse_dirstate_only(child_node)) + .collect() } /// A node in the dirstate tree has no corresponding *file* on the
--- a/rust/hg-core/src/lib.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/lib.rs Wed May 19 13:15:00 2021 +0200 @@ -83,6 +83,15 @@ Common(errors::HgError), } +impl fmt::Display for DirstateError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DirstateError::Map(error) => error.fmt(f), + DirstateError::Common(error) => error.fmt(f), + } + } +} + #[derive(Debug, derive_more::From)] pub enum PatternError { #[from]
--- a/rust/hg-cpython/src/dirstate/copymap.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/copymap.rs Wed May 19 13:15:00 2021 +0200 @@ -13,7 +13,9 @@ }; use std::cell::RefCell; +use crate::dirstate::dirstate_map::v2_error; use crate::dirstate::dirstate_map::DirstateMap; +use hg::dirstate_tree::on_disk::DirstateV2ParseError; use hg::utils::hg_path::HgPath; use hg::CopyMapIter; @@ -88,15 +90,16 @@ } fn translate_key( py: Python, - res: (&HgPath, &HgPath), + res: Result<(&HgPath, &HgPath), DirstateV2ParseError>, ) -> PyResult<Option<PyBytes>> { - Ok(Some(PyBytes::new(py, res.0.as_bytes()))) + let (k, _v) = res.map_err(|e| v2_error(py, e))?; + Ok(Some(PyBytes::new(py, k.as_bytes()))) } fn translate_key_value( py: Python, - res: (&HgPath, &HgPath), + res: Result<(&HgPath, &HgPath), DirstateV2ParseError>, ) -> PyResult<Option<(PyBytes, PyBytes)>> { - let (k, v) = res; + let (k, v) = res.map_err(|e| v2_error(py, e))?; Ok(Some(( PyBytes::new(py, k.as_bytes()), PyBytes::new(py, v.as_bytes()),
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs Wed May 19 13:15:00 2021 +0200 @@ -20,7 +20,8 @@ use hg::{ errors::HgError, utils::hg_path::{HgPath, HgPathBuf}, - DirsMultiset, DirsMultisetIter, DirstateMapError, EntryState, + DirsMultiset, DirsMultisetIter, DirstateError, DirstateMapError, + EntryState, }; py_class!(pub class Dirs |py| { @@ -45,9 +46,9 @@ } let inner = if let Ok(map) = map.cast_as::<PyDict>(py) { let dirstate = extract_dirstate(py, &map)?; - let dirstate = dirstate.iter().map(|(k, v)| (k, *v)); + let dirstate = dirstate.iter().map(|(k, v)| Ok((k, *v))); DirsMultiset::from_dirstate(dirstate, skip_state) - .map_err(|e: DirstateMapError| { + .map_err(|e: DirstateError| { PyErr::new::<exc::ValueError, _>(py, e.to_string()) })? } else {
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -29,13 +29,13 @@ use hg::{ dirstate::parsers::Timestamp, dirstate_tree::dispatch::DirstateMapMethods, + dirstate_tree::on_disk::DirstateV2ParseError, errors::HgError, revlog::Node, utils::files::normalize_case, utils::hg_path::{HgPath, HgPathBuf}, DirsMultiset, DirstateEntry, DirstateError, - DirstateMap as RustDirstateMap, DirstateMapError, DirstateParents, - EntryState, StateMapIter, + DirstateMap as RustDirstateMap, DirstateParents, EntryState, StateMapIter, }; // TODO @@ -90,7 +90,12 @@ default: Option<PyObject> = None ) -> PyResult<Option<PyObject>> { let key = key.extract::<PyBytes>(py)?; - match self.inner(py).borrow().get(HgPath::new(key.data(py))) { + match self + .inner(py) + .borrow() + .get(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e))? + { Some(entry) => { Ok(Some(make_dirstate_tuple(py, &entry)?)) }, @@ -124,7 +129,7 @@ size: size.extract(py)?, mtime: mtime.extract(py)?, }, - ).and(Ok(py.None())).or_else(|e: DirstateMapError| { + ).and(Ok(py.None())).or_else(|e: DirstateError| { Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())) }) } @@ -190,8 +195,10 @@ )) }) .collect(); - self.inner(py).borrow_mut() - .clear_ambiguous_times(files?, now.extract(py)?); + self.inner(py) + .borrow_mut() + .clear_ambiguous_times(files?, now.extract(py)?) + .map_err(|e| v2_error(py, e))?; Ok(py.None()) } @@ -199,6 +206,7 @@ let mut inner_shared = self.inner(py).borrow_mut(); let set = PySet::empty(py)?; for path in inner_shared.iter_other_parent_paths() { + let path = path.map_err(|e| v2_error(py, e))?; set.add(py, PyBytes::new(py, path.as_bytes()))?; } Ok(set.into_object()) @@ -210,28 +218,20 @@ def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> { let key = key.extract::<PyBytes>(py)?; - Ok(self - .inner(py) + self.inner(py) .borrow_mut() - .non_normal_entries_contains(HgPath::new(key.data(py)))) + .non_normal_entries_contains(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e)) } def non_normal_entries_display(&self) -> PyResult<PyString> { - Ok( - PyString::new( - py, - &format!( - "NonNormalEntries: {}", - hg::utils::join_display( - self - .inner(py) - .borrow_mut() - .iter_non_normal_paths(), - ", " - ) - ) - ) - ) + let mut inner = self.inner(py).borrow_mut(); + let paths = inner + .iter_non_normal_paths() + .collect::<Result<Vec<_>, _>>() + .map_err(|e| v2_error(py, e))?; + let formatted = format!("NonNormalEntries: {}", hg::utils::join_display(paths, ", ")); + Ok(PyString::new(py, &formatted)) } def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> { @@ -248,6 +248,7 @@ let ret = PyList::new(py, &[]); for filename in inner.non_normal_or_other_parent_paths() { + let filename = filename.map_err(|e| v2_error(py, e))?; let as_pystring = PyBytes::new(py, filename.as_bytes()); ret.append(py, as_pystring.into_object()); } @@ -320,7 +321,8 @@ def filefoldmapasdict(&self) -> PyResult<PyDict> { let dict = PyDict::new(py); - for (path, entry) in self.inner(py).borrow_mut().iter() { + for item in self.inner(py).borrow_mut().iter() { + let (path, entry) = item.map_err(|e| v2_error(py, e))?; if entry.state != EntryState::Removed { let key = normalize_case(path); let value = path; @@ -340,13 +342,21 @@ def __contains__(&self, key: PyObject) -> PyResult<bool> { let key = key.extract::<PyBytes>(py)?; - Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py)))) + self.inner(py) + .borrow() + .contains_key(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e)) } def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { let key = key.extract::<PyBytes>(py)?; let key = HgPath::new(key.data(py)); - match self.inner(py).borrow().get(key) { + match self + .inner(py) + .borrow() + .get(key) + .map_err(|e| v2_error(py, e))? + { Some(entry) => { Ok(make_dirstate_tuple(py, &entry)?) }, @@ -418,7 +428,8 @@ // TODO all copymap* methods, see docstring above def copymapcopy(&self) -> PyResult<PyDict> { let dict = PyDict::new(py); - for (key, value) in self.inner(py).borrow().copy_map_iter() { + for item in self.inner(py).borrow().copy_map_iter() { + let (key, value) = item.map_err(|e| v2_error(py, e))?; dict.set_item( py, PyBytes::new(py, key.as_bytes()), @@ -430,7 +441,12 @@ def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> { let key = key.extract::<PyBytes>(py)?; - match self.inner(py).borrow().copy_map_get(HgPath::new(key.data(py))) { + match self + .inner(py) + .borrow() + .copy_map_get(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e))? + { Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())), None => Err(PyErr::new::<exc::KeyError, _>( py, @@ -447,10 +463,10 @@ } def copymapcontains(&self, key: PyObject) -> PyResult<bool> { let key = key.extract::<PyBytes>(py)?; - Ok(self - .inner(py) + self.inner(py) .borrow() - .copy_map_contains_key(HgPath::new(key.data(py)))) + .copy_map_contains_key(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e)) } def copymapget( &self, @@ -462,6 +478,7 @@ .inner(py) .borrow() .copy_map_get(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e))? { Some(copy) => Ok(Some( PyBytes::new(py, copy.as_bytes()).into_object(), @@ -476,10 +493,13 @@ ) -> PyResult<PyObject> { let key = key.extract::<PyBytes>(py)?; let value = value.extract::<PyBytes>(py)?; - self.inner(py).borrow_mut().copy_map_insert( - HgPathBuf::from_bytes(key.data(py)), - HgPathBuf::from_bytes(value.data(py)), - ); + self.inner(py) + .borrow_mut() + .copy_map_insert( + HgPathBuf::from_bytes(key.data(py)), + HgPathBuf::from_bytes(value.data(py)), + ) + .map_err(|e| v2_error(py, e))?; Ok(py.None()) } def copymappop( @@ -492,6 +512,7 @@ .inner(py) .borrow_mut() .copy_map_remove(HgPath::new(key.data(py))) + .map_err(|e| v2_error(py, e))? { Some(_) => Ok(None), None => Ok(default), @@ -525,15 +546,16 @@ } fn translate_key( py: Python, - res: (&HgPath, DirstateEntry), + res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>, ) -> PyResult<Option<PyBytes>> { - Ok(Some(PyBytes::new(py, res.0.as_bytes()))) + let (f, _entry) = res.map_err(|e| v2_error(py, e))?; + Ok(Some(PyBytes::new(py, f.as_bytes()))) } fn translate_key_value( py: Python, - res: (&HgPath, DirstateEntry), + res: Result<(&HgPath, DirstateEntry), DirstateV2ParseError>, ) -> PyResult<Option<(PyBytes, PyObject)>> { - let (f, entry) = res; + let (f, entry) = res.map_err(|e| v2_error(py, e))?; Ok(Some(( PyBytes::new(py, f.as_bytes()), make_dirstate_tuple(py, &entry)?, @@ -562,3 +584,7 @@ Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())), } } + +pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr { + PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2") +}
--- a/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200 @@ -1,12 +1,12 @@ use crate::dirstate::owning::OwningDirstateMap; use hg::dirstate::parsers::Timestamp; use hg::dirstate_tree::dispatch::DirstateMapMethods; +use hg::dirstate_tree::on_disk::DirstateV2ParseError; use hg::matchers::Matcher; use hg::utils::hg_path::{HgPath, HgPathBuf}; use hg::CopyMapIter; use hg::DirstateEntry; use hg::DirstateError; -use hg::DirstateMapError; use hg::DirstateParents; use hg::DirstateStatus; use hg::EntryState; @@ -26,7 +26,7 @@ filename: &HgPath, old_state: EntryState, entry: DirstateEntry, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { self.get_mut().add_file(filename, old_state, entry) } @@ -35,7 +35,7 @@ filename: &HgPath, old_state: EntryState, size: i32, - ) -> Result<(), DirstateMapError> { + ) -> Result<(), DirstateError> { self.get_mut().remove_file(filename, old_state, size) } @@ -43,15 +43,22 @@ &mut self, filename: &HgPath, old_state: EntryState, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.get_mut().drop_file(filename, old_state) } - fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { + fn clear_ambiguous_times( + &mut self, + filenames: Vec<HgPathBuf>, + now: i32, + ) -> Result<(), DirstateV2ParseError> { self.get_mut().clear_ambiguous_times(filenames, now) } - fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool { + fn non_normal_entries_contains( + &mut self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { self.get_mut().non_normal_entries_contains(key) } @@ -61,7 +68,8 @@ fn non_normal_or_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + '_> { + ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_> + { self.get_mut().non_normal_or_other_parent_paths() } @@ -71,33 +79,36 @@ fn iter_non_normal_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { self.get_mut().iter_non_normal_paths() } fn iter_non_normal_paths_panic( &self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { self.get().iter_non_normal_paths_panic() } fn iter_other_parent_paths( &mut self, - ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { + ) -> Box< + dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_, + > { self.get_mut().iter_other_parent_paths() } fn has_tracked_dir( &mut self, directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + ) -> Result<bool, DirstateError> { self.get_mut().has_tracked_dir(directory) } - fn has_dir( - &mut self, - directory: &HgPath, - ) -> Result<bool, DirstateMapError> { + fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> { self.get_mut().has_dir(directory) } @@ -117,11 +128,11 @@ self.get_mut().pack_v2(parents, now) } - fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_all_dirs(&mut self) -> Result<(), DirstateError> { self.get_mut().set_all_dirs() } - fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + fn set_dirs(&mut self) -> Result<(), DirstateError> { self.get_mut().set_dirs() } @@ -145,15 +156,24 @@ self.get().copy_map_iter() } - fn copy_map_contains_key(&self, key: &HgPath) -> bool { + fn copy_map_contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { self.get().copy_map_contains_key(key) } - fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> { + fn copy_map_get( + &self, + key: &HgPath, + ) -> Result<Option<&HgPath>, DirstateV2ParseError> { self.get().copy_map_get(key) } - fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> { + fn copy_map_remove( + &mut self, + key: &HgPath, + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { self.get_mut().copy_map_remove(key) } @@ -161,7 +181,7 @@ &mut self, key: HgPathBuf, value: HgPathBuf, - ) -> Option<HgPathBuf> { + ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { self.get_mut().copy_map_insert(key, value) } @@ -169,11 +189,17 @@ self.get().len() } - fn contains_key(&self, key: &HgPath) -> bool { + fn contains_key( + &self, + key: &HgPath, + ) -> Result<bool, DirstateV2ParseError> { self.get().contains_key(key) } - fn get(&self, key: &HgPath) -> Option<DirstateEntry> { + fn get( + &self, + key: &HgPath, + ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { self.get().get(key) }
--- a/rust/hg-cpython/src/dirstate/non_normal_entries.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/non_normal_entries.rs Wed May 19 13:15:00 2021 +0200 @@ -11,7 +11,9 @@ UnsafePyLeaked, }; +use crate::dirstate::dirstate_map::v2_error; use crate::dirstate::DirstateMap; +use hg::dirstate_tree::on_disk::DirstateV2ParseError; use hg::utils::hg_path::HgPath; use std::cell::RefCell; @@ -54,13 +56,18 @@ Ok(true) } - fn translate_key(py: Python, key: &HgPath) -> PyResult<Option<PyBytes>> { + fn translate_key( + py: Python, + key: Result<&HgPath, DirstateV2ParseError>, + ) -> PyResult<Option<PyBytes>> { + let key = key.map_err(|e| v2_error(py, e))?; Ok(Some(PyBytes::new(py, key.as_bytes()))) } } -type NonNormalEntriesIter<'a> = - Box<dyn Iterator<Item = &'a HgPath> + Send + 'a>; +type NonNormalEntriesIter<'a> = Box< + dyn Iterator<Item = Result<&'a HgPath, DirstateV2ParseError>> + Send + 'a, +>; py_shared_iterator!( NonNormalEntriesIterator,