Mercurial > hg
changeset 47337:0654b3b3d2b5
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes
TODO: more description
Differential Revision: https://phab.mercurial-scm.org/D10751
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 13:15:00 +0200 |
parents | 8d0260d0dbc9 |
children | f27f2afb15da |
files | rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 2 files changed, 170 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -48,14 +48,17 @@ pub(super) enum ChildNodes<'on_disk> { InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>), + OnDisk(&'on_disk [on_disk::Node]), } pub(super) enum ChildNodesRef<'tree, 'on_disk> { InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>), + OnDisk(&'on_disk [on_disk::Node]), } pub(super) enum NodeRef<'tree, 'on_disk> { InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>), + OnDisk(&'on_disk on_disk::Node), } impl Default for ChildNodes<'_> { @@ -70,24 +73,42 @@ ) -> ChildNodesRef<'tree, 'on_disk> { match self { ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes), + ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes), } } pub(super) fn is_empty(&self) -> bool { match self { ChildNodes::InMemory(nodes) => nodes.is_empty(), + ChildNodes::OnDisk(nodes) => nodes.is_empty(), } } pub(super) fn make_mut( &mut self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result< &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>, DirstateV2ParseError, > { match self { ChildNodes::InMemory(nodes) => Ok(nodes), + ChildNodes::OnDisk(nodes) => { + let nodes = nodes + .iter() + .map(|node| { + Ok(( + node.path(on_disk)?, + node.to_in_memory_node(on_disk)?, + )) + }) + .collect::<Result<_, _>>()?; + *self = ChildNodes::InMemory(nodes); + match self { + ChildNodes::InMemory(nodes) => Ok(nodes), + ChildNodes::OnDisk(_) => unreachable!(), + } + } } } } @@ -96,12 +117,29 @@ pub(super) fn get( &self, base_name: &HgPath, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> { match self { ChildNodesRef::InMemory(nodes) => Ok(nodes .get_key_value(base_name) .map(|(k, v)| NodeRef::InMemory(k, v))), + ChildNodesRef::OnDisk(nodes) => { + let mut parse_result = Ok(()); + let search_result = nodes.binary_search_by(|node| { + match node.base_name(on_disk) { + Ok(node_base_name) => node_base_name.cmp(base_name), + Err(e) => { + parse_result = Err(e); + // Dummy comparison result, `search_result` won’t + // be used since `parse_result` is an error + std::cmp::Ordering::Equal + } + } + }); + parse_result.map(|()| { + search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i])) + }) + } } } @@ -110,8 +148,11 @@ &self, ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> { match self { - ChildNodesRef::InMemory(nodes) => { - nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)) + ChildNodesRef::InMemory(nodes) => itertools::Either::Left( + nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)), + ), + ChildNodesRef::OnDisk(nodes) => { + itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk)) } } } @@ -123,9 +164,12 @@ { use rayon::prelude::*; match self { - ChildNodesRef::InMemory(nodes) => { - nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)) - } + ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left( + nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)), + ), + ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right( + nodes.par_iter().map(NodeRef::OnDisk), + ), } } @@ -139,6 +183,7 @@ fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath { match node { NodeRef::InMemory(path, _node) => path.base_name(), + NodeRef::OnDisk(_) => unreachable!(), } } // `sort_unstable_by_key` doesn’t allow keys borrowing from the @@ -146,6 +191,10 @@ vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b))); vec } + ChildNodesRef::OnDisk(nodes) => { + // Nodes on disk are already sorted + nodes.iter().map(NodeRef::OnDisk).collect() + } } } } @@ -153,61 +202,72 @@ impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> { pub(super) fn full_path( &self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<&'tree HgPath, DirstateV2ParseError> { match self { NodeRef::InMemory(path, _node) => Ok(path.full_path()), + NodeRef::OnDisk(node) => node.full_path(on_disk), } } /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree pub(super) fn full_path_cow( &self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> { match self { NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()), + NodeRef::OnDisk(node) => { + Ok(Cow::Borrowed(node.full_path(on_disk)?)) + } } } pub(super) fn base_name( &self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<&'tree HgPath, DirstateV2ParseError> { match self { NodeRef::InMemory(path, _node) => Ok(path.base_name()), + NodeRef::OnDisk(node) => node.base_name(on_disk), } } pub(super) fn children( &self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()), + NodeRef::OnDisk(node) => { + Ok(ChildNodesRef::OnDisk(node.children(on_disk)?)) + } } } pub(super) fn has_copy_source(&self) -> bool { match self { NodeRef::InMemory(_path, node) => node.copy_source.is_some(), + NodeRef::OnDisk(node) => node.has_copy_source(), } } pub(super) fn copy_source( &self, - _on_disk: &'on_disk [u8], + on_disk: &'on_disk [u8], ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => { Ok(node.copy_source.as_ref().map(|s| &**s)) } + NodeRef::OnDisk(node) => node.copy_source(on_disk), } } pub(super) fn has_entry(&self) -> bool { match self { NodeRef::InMemory(_path, node) => node.entry.is_some(), + NodeRef::OnDisk(node) => node.has_entry(), } } @@ -216,6 +276,7 @@ ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => Ok(node.entry), + NodeRef::OnDisk(node) => node.entry(), } } @@ -226,12 +287,14 @@ NodeRef::InMemory(_path, node) => { Ok(node.entry.as_ref().map(|entry| entry.state)) } + NodeRef::OnDisk(node) => node.state(), } } pub(super) fn tracked_descendants_count(&self) -> u32 { match self { NodeRef::InMemory(_path, node) => node.tracked_descendants_count, + NodeRef::OnDisk(node) => node.tracked_descendants_count.get(), } } }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 @@ -16,6 +16,7 @@ use crate::DirstateEntry; use crate::DirstateError; use crate::DirstateParents; +use crate::EntryState; use bytes_cast::unaligned::{I32Be, U32Be, U64Be}; use bytes_cast::BytesCast; use std::borrow::Cow; @@ -42,7 +43,7 @@ #[derive(BytesCast)] #[repr(C)] -struct Node { +pub(super) struct Node { full_path: PathSlice, /// In bytes from `self.full_path.start` @@ -51,12 +52,12 @@ copy_source: OptPathSlice, entry: OptEntry, children: ChildNodes, - tracked_descendants_count: Size, + pub(super) tracked_descendants_count: Size, } /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1 /// format -#[derive(BytesCast)] +#[derive(BytesCast, Copy, Clone)] #[repr(C)] struct OptEntry { state: u8, @@ -149,7 +150,9 @@ } let dirstate_map = DirstateMap { on_disk, - root: read_nodes(on_disk, *root)?, + root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>( + on_disk, *root, + )?), nodes_with_entry_count: nodes_with_entry_count.get(), nodes_with_copy_source_count: nodes_with_copy_source_count.get(), }; @@ -158,49 +161,96 @@ } impl Node { - pub(super) fn path<'on_disk>( + pub(super) fn full_path<'on_disk>( &self, on_disk: &'on_disk [u8], - ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { - let full_path = read_hg_path(on_disk, self.full_path)?; - let base_name_start = usize::try_from(self.base_name_start.get()) - // u32 -> usize, could only panic on a 16-bit CPU - .expect("dirstate-v2 base_name_start out of bounds"); - if base_name_start < full_path.len() { - Ok(WithBasename::from_raw_parts(full_path, base_name_start)) + ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { + read_hg_path(on_disk, self.full_path) + } + + pub(super) fn base_name_start<'on_disk>( + &self, + ) -> Result<usize, DirstateV2ParseError> { + let start = self.base_name_start.get(); + if start < self.full_path.len.get() { + let start = usize::try_from(start) + // u32 -> usize, could only panic on a 16-bit CPU + .expect("dirstate-v2 base_name_start out of bounds"); + Ok(start) } else { Err(DirstateV2ParseError) } } + pub(super) fn base_name<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { + let full_path = self.full_path(on_disk)?; + let base_name_start = self.base_name_start()?; + Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) + } + + pub(super) fn path<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { + Ok(WithBasename::from_raw_parts( + Cow::Borrowed(self.full_path(on_disk)?), + self.base_name_start()?, + )) + } + + pub(super) fn has_copy_source<'on_disk>(&self) -> bool { + self.copy_source.start.get() != 0 + } + pub(super) fn copy_source<'on_disk>( &self, on_disk: &'on_disk [u8], - ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> { - Ok(if self.copy_source.start.get() != 0 { + ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { + Ok(if self.has_copy_source() { Some(read_hg_path(on_disk, self.copy_source)?) } else { None }) } + pub(super) fn has_entry(&self) -> bool { + self.entry.state != b'\0' + } + + pub(super) fn state( + &self, + ) -> Result<Option<EntryState>, DirstateV2ParseError> { + Ok(if self.has_entry() { + Some( + self.entry + .state + .try_into() + .map_err(|_| DirstateV2ParseError)?, + ) + } else { + None + }) + } + pub(super) fn entry( &self, ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { - Ok(if self.entry.state != b'\0' { - Some(DirstateEntry { - state: self - .entry - .state - .try_into() - .map_err(|_| DirstateV2ParseError)?, - mode: self.entry.mode.get(), - mtime: self.entry.mtime.get(), - size: self.entry.size.get(), - }) - } else { - None - }) + Ok(self.state()?.map(|state| DirstateEntry { + state, + mode: self.entry.mode.get(), + mtime: self.entry.mtime.get(), + size: self.entry.size.get(), + })) + } + + pub(super) fn children<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'on_disk [Node], DirstateV2ParseError> { + read_slice::<Node>(on_disk, self.children) } pub(super) fn to_in_memory_node<'on_disk>( @@ -208,33 +258,22 @@ on_disk: &'on_disk [u8], ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { Ok(dirstate_map::Node { - children: read_nodes(on_disk, self.children)?, - copy_source: self.copy_source(on_disk)?, + children: dirstate_map::ChildNodes::OnDisk( + self.children(on_disk)?, + ), + copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), entry: self.entry()?, tracked_descendants_count: self.tracked_descendants_count.get(), }) } } -fn read_nodes( - on_disk: &[u8], - slice: ChildNodes, -) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> { - read_slice::<Node>(on_disk, slice)? - .iter() - .map(|node| { - Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?)) - }) - .collect::<Result<_, _>>() - .map(dirstate_map::ChildNodes::InMemory) -} - fn read_hg_path( on_disk: &[u8], slice: Slice, -) -> Result<Cow<HgPath>, DirstateV2ParseError> { +) -> Result<&HgPath, DirstateV2ParseError> { let bytes = read_slice::<u8>(on_disk, slice)?; - Ok(Cow::Borrowed(HgPath::new(bytes))) + Ok(HgPath::new(bytes)) } fn read_slice<T>( @@ -300,8 +339,11 @@ // First accumulate serialized nodes in a `Vec` let mut on_disk_nodes = Vec::with_capacity(nodes.len()); for node in nodes { - let children = node.children(dirstate_map.on_disk)?; - let children = write_nodes(dirstate_map, children, out)?; + let children = write_nodes( + dirstate_map, + node.children(dirstate_map.on_disk)?, + out, + )?; let full_path = node.full_path(dirstate_map.on_disk)?; let full_path = write_slice::<u8>(full_path.as_bytes(), out); let copy_source = @@ -341,6 +383,12 @@ } }, }, + NodeRef::OnDisk(node) => Node { + children, + copy_source, + full_path, + ..*node + }, }) } // … so we can write them contiguously