Mercurial > hg
changeset 47679:731286dc5321
dirstate-v2: Reuse existing nodes when appending to a data file
When writing a dirstate in v2 format by appending to an existing data file,
nodes that are still "unparsed" from the previous on-disk representation
have been unchanged and can therefore be reused.
This makes the new data point to previously-existing data for tree nodes.
Differential Revision: https://phab.mercurial-scm.org/D11095
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Thu, 15 Jul 2021 08:53:03 +0200 |
parents | 065e61628980 |
children | a8b0f29dc0d7 |
files | rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 1 files changed, 57 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Jul 13 17:18:23 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jul 15 08:53:03 2021 +0200 @@ -590,8 +590,18 @@ &mut self, nodes: dirstate_map::ChildNodesRef, ) -> Result<ChildNodes, DirstateError> { - // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration - // order. Sort to enable binary search in the written file. + // Reuse already-written nodes if possible + if self.append { + if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { + let start = self.offset_of(nodes_slice); + let len = child_nodes_len_from_usize(nodes_slice.len()); + return Ok(ChildNodes { start, len }); + } + } + + // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has + // undefined iteration order. Sort to enable binary search in the + // written file. let nodes = nodes.sorted(); let nodes_len = nodes.len(); @@ -664,32 +674,65 @@ // … so we can write them contiguously, after writing everything else // they refer to. let start = self.current_offset(); - let len = u32::try_from(nodes_len) - // Could only panic with over 4 billion nodes - .expect("dirstate-v2 path length overflow") - .into(); + let len = child_nodes_len_from_usize(nodes_len); self.out.extend(on_disk_nodes.as_bytes()); Ok(ChildNodes { start, len }) } + /// Takes a slice of items within `on_disk` and returns its offset for the + /// start of `on_disk`. + /// + /// Panics if the given slice is not within `on_disk`. + fn offset_of<T>(&self, slice: &[T]) -> Offset + where + T: BytesCast, + { + fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> { + let start = slice.as_ptr() as usize; + let end = start + slice.len(); + start..=end + } + let slice_addresses = address_range(slice.as_bytes()); + let on_disk_addresses = address_range(self.dirstate_map.on_disk); + assert!(on_disk_addresses.contains(slice_addresses.start())); + assert!(on_disk_addresses.contains(slice_addresses.end())); + let offset = slice_addresses.start() - on_disk_addresses.start(); + offset_from_usize(offset) + } + fn current_offset(&mut self) -> Offset { let mut offset = self.out.len(); if self.append { offset += self.dirstate_map.on_disk.len() } - u32::try_from(offset) - // Could only panic for a dirstate file larger than 4 GiB - .expect("dirstate-v2 offset overflow") - .into() + offset_from_usize(offset) } fn write_path(&mut self, slice: &[u8]) -> PathSlice { let start = self.current_offset(); - let len = u16::try_from(slice.len()) - // Could only panic for paths over 64 KiB - .expect("dirstate-v2 path length overflow") - .into(); + let len = path_len_from_usize(slice.len()); self.out.extend(slice.as_bytes()); PathSlice { start, len } } } + +fn offset_from_usize(x: usize) -> Offset { + u32::try_from(x) + // Could only panic for a dirstate file larger than 4 GiB + .expect("dirstate-v2 offset overflow") + .into() +} + +fn child_nodes_len_from_usize(x: usize) -> Size { + u32::try_from(x) + // Could only panic with over 4 billion nodes + .expect("dirstate-v2 slice length overflow") + .into() +} + +fn path_len_from_usize(x: usize) -> PathSize { + u16::try_from(x) + // Could only panic for paths over 64 KiB + .expect("dirstate-v2 path length overflow") + .into() +}