comparison rust/hg-core/src/dirstate_tree/on_disk.rs @ 47330:73f23e7610f8

dirstate-tree: Remove DirstateMap::iter_node_data_mut In an upcoming changeset we want DirstateMap to be able to work directly with nodes in their "on disk" representation, without always allocating corresponding in-memory data structures. Nodes would have two possible representations: one immutable "on disk" refering to the bytes buffer of the contents of the .hg/dirstate file, and one mutable with HashMap like the curren data structure. These nodes would have copy-on-write semantics: when an immutable node would need to be mutated, instead we allocate new mutable node for it and its ancestors. A mutable iterator of the entire tree would still be possible, but it would become much more expensive since we’d need to allocate mutable nodes for everything. Instead, remove this iterator. It was only used to clear ambiguous mtimes while serializing the `DirstateMap`. Instead clearing and serialization are now two separate passes. Clearing first uses an immutable iterator to collect the paths of nodes that need to be cleared, then accesses only those nodes mutably. Differential Revision: https://phab.mercurial-scm.org/D10744
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 19 May 2021 13:15:00 +0200
parents 2a9ddc8094c7
children 0252600fd1cf
comparison
equal deleted inserted replaced
47329:717a94b423b9 47330:73f23e7610f8
7 //! the nodes representing the files and directories at the root of the 7 //! the nodes representing the files and directories at the root of the
8 //! repository. Each node is also fixed-size, defined by the `Node` struct. 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 //! Nodes in turn contain slices to variable-size paths, and to their own child 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 //! nodes (if any) for nested files and directories. 10 //! nodes (if any) for nested files and directories.
11 11
12 use crate::dirstate::parsers::clear_ambiguous_mtime;
13 use crate::dirstate::parsers::Timestamp;
14 use crate::dirstate_tree::dirstate_map::{self, DirstateMap}; 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
15 use crate::dirstate_tree::path_with_basename::WithBasename; 13 use crate::dirstate_tree::path_with_basename::WithBasename;
16 use crate::errors::HgError; 14 use crate::errors::HgError;
17 use crate::utils::hg_path::HgPath; 15 use crate::utils::hg_path::HgPath;
18 use crate::DirstateEntry; 16 use crate::DirstateEntry;
228 } 226 }
229 227
230 pub(super) fn write( 228 pub(super) fn write(
231 dirstate_map: &mut DirstateMap, 229 dirstate_map: &mut DirstateMap,
232 parents: DirstateParents, 230 parents: DirstateParents,
233 now: Timestamp,
234 ) -> Result<Vec<u8>, DirstateError> { 231 ) -> Result<Vec<u8>, DirstateError> {
235 // TODO: how do we want to handle this in 2038?
236 let now: i32 = now.0.try_into().expect("time overflow");
237
238 let header_len = std::mem::size_of::<Header>(); 232 let header_len = std::mem::size_of::<Header>();
239 233
240 // This ignores the space for paths, and for nodes without an entry. 234 // This ignores the space for paths, and for nodes without an entry.
241 // TODO: better estimate? Skip the `Vec` and write to a file directly? 235 // TODO: better estimate? Skip the `Vec` and write to a file directly?
242 let size_guess = header_len 236 let size_guess = header_len
246 240
247 // Keep space for the header. We’ll fill it out at the end when we know the 241 // Keep space for the header. We’ll fill it out at the end when we know the
248 // actual offset for the root nodes. 242 // actual offset for the root nodes.
249 out.resize(header_len, 0_u8); 243 out.resize(header_len, 0_u8);
250 244
251 let root = write_nodes(&mut dirstate_map.root, now, &mut out)?; 245 let root = write_nodes(&mut dirstate_map.root, &mut out)?;
252 246
253 let header = Header { 247 let header = Header {
254 marker: *V2_FORMAT_MARKER, 248 marker: *V2_FORMAT_MARKER,
255 parents: parents, 249 parents: parents,
256 root, 250 root,
261 }; 255 };
262 out[..header_len].copy_from_slice(header.as_bytes()); 256 out[..header_len].copy_from_slice(header.as_bytes());
263 Ok(out) 257 Ok(out)
264 } 258 }
265 259
266 /// Serialize the dirstate to the `v2` format after clearing ambigous `mtime`s.
267 fn write_nodes( 260 fn write_nodes(
268 nodes: &mut dirstate_map::ChildNodes, 261 nodes: &mut dirstate_map::ChildNodes,
269 now: i32,
270 out: &mut Vec<u8>, 262 out: &mut Vec<u8>,
271 ) -> Result<ChildNodes, DirstateError> { 263 ) -> Result<ChildNodes, DirstateError> {
272 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration 264 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
273 // order. Sort to enable binary search in the written file. 265 // order. Sort to enable binary search in the written file.
274 let nodes = dirstate_map::Node::sorted(nodes); 266 let nodes = dirstate_map::Node::sorted(nodes);
275 267
276 // First accumulate serialized nodes in a `Vec` 268 // First accumulate serialized nodes in a `Vec`
277 let mut on_disk_nodes = Vec::with_capacity(nodes.len()); 269 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
278 for (full_path, node) in nodes { 270 for (full_path, node) in nodes {
279 on_disk_nodes.push(Node { 271 on_disk_nodes.push(Node {
280 children: write_nodes(&mut node.children, now, out)?, 272 children: write_nodes(&mut node.children, out)?,
281 tracked_descendants_count: node.tracked_descendants_count.into(), 273 tracked_descendants_count: node.tracked_descendants_count.into(),
282 full_path: write_slice::<u8>( 274 full_path: write_slice::<u8>(
283 full_path.full_path().as_bytes(), 275 full_path.full_path().as_bytes(),
284 out, 276 out,
285 ), 277 ),
294 start: 0.into(), 286 start: 0.into(),
295 len: 0.into(), 287 len: 0.into(),
296 } 288 }
297 }, 289 },
298 entry: if let Some(entry) = &mut node.entry { 290 entry: if let Some(entry) = &mut node.entry {
299 clear_ambiguous_mtime(entry, now);
300 OptEntry { 291 OptEntry {
301 state: entry.state.into(), 292 state: entry.state.into(),
302 mode: entry.mode.into(), 293 mode: entry.mode.into(),
303 mtime: entry.mtime.into(), 294 mtime: entry.mtime.into(),
304 size: entry.size.into(), 295 size: entry.size.into(),