Mercurial > hg
changeset 47676:096ee2e260a3
dirstate-v2: Rename Header to Root, move it to the end of the data file
Now that they don’t have to be at the start, a given data file may contain
multiple "roots". A docket only points to one of them, and previous ones
are left unused to allow allow append-only in-place writing to an existing
data file.
Differential Revision: https://phab.mercurial-scm.org/D11090
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Mon, 12 Jul 2021 23:05:56 +0200 |
parents | 48aec076b8fb |
children | da1c0cd68d53 |
files | mercurial/debugcommands.py rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 2 files changed, 47 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/debugcommands.py Mon Jul 12 22:46:52 2021 +0200 +++ b/mercurial/debugcommands.py Mon Jul 12 23:05:56 2021 +0200 @@ -997,11 +997,13 @@ or nothing for dirstate-v2 """ if repo.dirstate._use_dirstate_v2: - hash_offset = 16 # Four 32-bit integers before this field + docket = repo.dirstate._map.docket hash_len = 20 # 160 bits for SHA-1 - data_filename = repo.dirstate._map.docket.data_filename() + hash_offset = docket.data_size - hash_len # hash is at the end + data_filename = docket.data_filename() with repo.vfs(data_filename) as f: - hash_bytes = f.read(hash_offset + hash_len)[-hash_len:] + f.seek(hash_offset) + hash_bytes = f.read(hash_len) ui.write(binascii.hexlify(hash_bytes) + b'\n')
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Jul 12 22:46:52 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Jul 12 23:05:56 2021 +0200 @@ -2,8 +2,17 @@ //! //! # File format //! -//! The file starts with a fixed-sized header, whose layout is defined by the -//! `Header` struct. Its `root` field contains the slice (offset and length) to +//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts +//! with a fixed-sized header whose layout is defined by the `DocketHeader` +//! struct, followed by the data file identifier. +//! +//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That +//! file may be longer than the size given in the docket, but not shorter. Only +//! the start of the data file up to the given size is considered. The +//! fixed-size "root" of the dirstate tree whose layout is defined by the +//! `Root` struct is found at the end of that slice of data. +//! +//! Its `root_nodes` field contains the slice (offset and length) to //! the nodes representing the files and directories at the root of the //! repository. Each node is also fixed-size, defined by the `Node` struct. //! Nodes in turn contain slices to variable-size paths, and to their own child @@ -56,8 +65,8 @@ #[derive(BytesCast)] #[repr(C)] -struct Header { - root: ChildNodes, +struct Root { + root_nodes: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, @@ -119,7 +128,7 @@ /// - All direct children of this directory (as returned by /// `std::fs::read_dir`) either have a corresponding dirstate node, or /// are ignored by ignore patterns whose hash is in - /// `Header::ignore_patterns_hash`. + /// `Root::ignore_patterns_hash`. /// /// This means that if `std::fs::symlink_metadata` later reports the /// same modification time and ignored patterns haven’t changed, a run @@ -190,7 +199,7 @@ /// Make sure that size-affecting changes are made knowingly fn _static_assert_size_of() { let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; - let _ = std::mem::transmute::<Header, [u8; 36]>; + let _ = std::mem::transmute::<Root, [u8; 36]>; let _ = std::mem::transmute::<Node, [u8; 49]>; } @@ -247,25 +256,36 @@ } } +fn read_root<'on_disk>( + on_disk: &'on_disk [u8], +) -> Result<&'on_disk Root, DirstateV2ParseError> { + // Find the `Root` at the end of the given slice + let root_offset = on_disk + .len() + .checked_sub(std::mem::size_of::<Root>()) + // A non-empty slice too short is an error + .ok_or(DirstateV2ParseError)?; + let (root, _) = Root::from_bytes(&on_disk[root_offset..]) + .map_err(|_| DirstateV2ParseError)?; + Ok(root) +} + pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { if on_disk.is_empty() { return Ok(DirstateMap::empty(on_disk)); } - let (header, _) = - Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; + let root = read_root(on_disk)?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>( on_disk, - header.root, + root.root_nodes, )?), - nodes_with_entry_count: header.nodes_with_entry_count.get(), - nodes_with_copy_source_count: header - .nodes_with_copy_source_count - .get(), - ignore_patterns_hash: header.ignore_patterns_hash, + nodes_with_entry_count: root.nodes_with_entry_count.get(), + nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), + ignore_patterns_hash: root.ignore_patterns_hash, }; Ok(dirstate_map) } @@ -491,8 +511,7 @@ on_disk: &'on_disk [u8], mut f: impl FnMut(&'on_disk HgPath), ) -> Result<(), DirstateV2ParseError> { - let (header, _) = - Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; + let root = read_root(on_disk)?; fn recur<'on_disk>( on_disk: &'on_disk [u8], nodes: Slice, @@ -508,37 +527,33 @@ } Ok(()) } - recur(on_disk, header.root, &mut f) + recur(on_disk, root.root_nodes, &mut f) } pub(super) fn write( dirstate_map: &mut DirstateMap, ) -> Result<Vec<u8>, DirstateError> { - let header_len = std::mem::size_of::<Header>(); + let root_len = std::mem::size_of::<Root>(); // This ignores the space for paths, and for nodes without an entry. // TODO: better estimate? Skip the `Vec` and write to a file directly? - let size_guess = header_len + let size_guess = root_len + std::mem::size_of::<Node>() * dirstate_map.nodes_with_entry_count as usize; let mut out = Vec::with_capacity(size_guess); - // Keep space for the header. We’ll fill it out at the end when we know the - // actual offset for the root nodes. - out.resize(header_len, 0_u8); - - let root = + let root_nodes = write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; - let header = Header { - root, + let root = Root { + root_nodes, nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), nodes_with_copy_source_count: dirstate_map .nodes_with_copy_source_count .into(), ignore_patterns_hash: dirstate_map.ignore_patterns_hash, }; - out[..header_len].copy_from_slice(header.as_bytes()); + out.extend(root.as_bytes()); Ok(out) }