Mercurial > hg
comparison rust/hg-core/src/dirstate_tree/dirstate_map.rs @ 47330:73f23e7610f8
dirstate-tree: Remove DirstateMap::iter_node_data_mut
In an upcoming changeset we want DirstateMap to be able to work directly
with nodes in their "on disk" representation, without always allocating
corresponding in-memory data structures. Nodes would have two possible
representations: one immutable "on disk" refering to the bytes buffer
of the contents of the .hg/dirstate file, and one mutable with HashMap
like the curren data structure.
These nodes would have copy-on-write semantics: when an immutable node
would need to be mutated, instead we allocate new mutable node for it and
its ancestors.
A mutable iterator of the entire tree would still be possible, but it would
become much more expensive since we’d need to allocate mutable nodes for
everything.
Instead, remove this iterator. It was only used to clear ambiguous mtimes
while serializing the `DirstateMap`. Instead clearing and serialization are
now two separate passes. Clearing first uses an immutable iterator to collect
the paths of nodes that need to be cleared, then accesses only those nodes
mutably.
Differential Revision: https://phab.mercurial-scm.org/D10744
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 13:15:00 +0200 |
parents | 2a9ddc8094c7 |
children | 0252600fd1cf |
comparison
equal
deleted
inserted
replaced
47329:717a94b423b9 | 47330:73f23e7610f8 |
---|---|
4 use std::convert::TryInto; | 4 use std::convert::TryInto; |
5 use std::path::PathBuf; | 5 use std::path::PathBuf; |
6 | 6 |
7 use super::on_disk; | 7 use super::on_disk; |
8 use super::path_with_basename::WithBasename; | 8 use super::path_with_basename::WithBasename; |
9 use crate::dirstate::parsers::clear_ambiguous_mtime; | |
10 use crate::dirstate::parsers::pack_entry; | 9 use crate::dirstate::parsers::pack_entry; |
11 use crate::dirstate::parsers::packed_entry_size; | 10 use crate::dirstate::parsers::packed_entry_size; |
12 use crate::dirstate::parsers::parse_dirstate_entries; | 11 use crate::dirstate::parsers::parse_dirstate_entries; |
13 use crate::dirstate::parsers::Timestamp; | 12 use crate::dirstate::parsers::Timestamp; |
14 use crate::matchers::Matcher; | 13 use crate::matchers::Matcher; |
76 // https://github.com/rust-lang/rust/issues/34162 | 75 // https://github.com/rust-lang/rust/issues/34162 |
77 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2)); | 76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2)); |
78 vec | 77 vec |
79 } | 78 } |
80 } | 79 } |
81 | |
82 /// `(full_path, entry, copy_source)` | |
83 type NodeDataMut<'tree, 'on_disk> = ( | |
84 &'tree HgPath, | |
85 &'tree mut Option<DirstateEntry>, | |
86 &'tree mut Option<Cow<'on_disk, HgPath>>, | |
87 ); | |
88 | 80 |
89 impl<'on_disk> DirstateMap<'on_disk> { | 81 impl<'on_disk> DirstateMap<'on_disk> { |
90 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { | 82 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { |
91 Self { | 83 Self { |
92 on_disk, | 84 on_disk, |
250 node.entry = Some(new_entry) | 242 node.entry = Some(new_entry) |
251 } | 243 } |
252 | 244 |
253 fn iter_nodes<'a>( | 245 fn iter_nodes<'a>( |
254 &'a self, | 246 &'a self, |
255 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a { | 247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a { |
256 // Depth first tree traversal. | 248 // Depth first tree traversal. |
257 // | 249 // |
258 // If we could afford internal iteration and recursion, | 250 // If we could afford internal iteration and recursion, |
259 // this would look like: | 251 // this would look like: |
260 // | 252 // |
277 std::iter::from_fn(move || { | 269 std::iter::from_fn(move || { |
278 while let Some((key, child_node)) = iter.next() { | 270 while let Some((key, child_node)) = iter.next() { |
279 // Pseudo-recursion | 271 // Pseudo-recursion |
280 let new_iter = child_node.children.iter(); | 272 let new_iter = child_node.children.iter(); |
281 let old_iter = std::mem::replace(&mut iter, new_iter); | 273 let old_iter = std::mem::replace(&mut iter, new_iter); |
282 let key = &**key.full_path(); | 274 let key = key.full_path(); |
283 stack.push((key, child_node, old_iter)); | 275 stack.push((key, child_node, old_iter)); |
284 } | 276 } |
285 // Found the end of a `children.iter()` iterator. | 277 // Found the end of a `children.iter()` iterator. |
286 if let Some((key, child_node, next_iter)) = stack.pop() { | 278 if let Some((key, child_node, next_iter)) = stack.pop() { |
287 // "Return" from pseudo-recursion by restoring state from the | 279 // "Return" from pseudo-recursion by restoring state from the |
294 None | 286 None |
295 } | 287 } |
296 }) | 288 }) |
297 } | 289 } |
298 | 290 |
299 /// Mutable iterator for the `(entry, copy source)` of each node. | 291 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) { |
300 /// | 292 for path in paths { |
301 /// It would not be safe to yield mutable references to nodes themeselves | 293 if let Some(node) = |
302 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are | 294 Self::get_node_mut(&mut self.root, path.as_ref()) |
303 /// reachable from their ancestor nodes, potentially creating multiple | 295 { |
304 /// `&mut` references to a given node. | 296 if let Some(entry) = node.entry.as_mut() { |
305 fn iter_node_data_mut<'tree>( | 297 entry.clear_mtime(); |
306 &'tree mut self, | 298 } |
307 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree { | 299 } |
308 // Explict stack for pseudo-recursion, see `iter_nodes` above. | 300 } |
309 let mut stack = Vec::new(); | |
310 let mut iter = self.root.iter_mut(); | |
311 std::iter::from_fn(move || { | |
312 while let Some((key, child_node)) = iter.next() { | |
313 // Pseudo-recursion | |
314 let data = ( | |
315 &**key.full_path(), | |
316 &mut child_node.entry, | |
317 &mut child_node.copy_source, | |
318 ); | |
319 let new_iter = child_node.children.iter_mut(); | |
320 let old_iter = std::mem::replace(&mut iter, new_iter); | |
321 stack.push((data, old_iter)); | |
322 } | |
323 // Found the end of a `children.values_mut()` iterator. | |
324 if let Some((data, next_iter)) = stack.pop() { | |
325 // "Return" from pseudo-recursion by restoring state from the | |
326 // explicit stack | |
327 iter = next_iter; | |
328 | |
329 Some(data) | |
330 } else { | |
331 // Reached the bottom of the stack, we’re done | |
332 None | |
333 } | |
334 }) | |
335 } | 301 } |
336 } | 302 } |
337 | 303 |
338 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { | 304 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { |
339 fn clear(&mut self) { | 305 fn clear(&mut self) { |
425 | 391 |
426 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { | 392 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { |
427 for filename in filenames { | 393 for filename in filenames { |
428 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) { | 394 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) { |
429 if let Some(entry) = node.entry.as_mut() { | 395 if let Some(entry) = node.entry.as_mut() { |
430 clear_ambiguous_mtime(entry, now); | 396 entry.clear_ambiguous_mtime(now); |
431 } | 397 } |
432 } | 398 } |
433 } | 399 } |
434 } | 400 } |
435 | 401 |
451 node.entry | 417 node.entry |
452 .as_ref() | 418 .as_ref() |
453 .filter(|entry| { | 419 .filter(|entry| { |
454 entry.is_non_normal() || entry.is_from_other_parent() | 420 entry.is_non_normal() || entry.is_from_other_parent() |
455 }) | 421 }) |
456 .map(|_| path) | 422 .map(|_| &**path) |
457 })) | 423 })) |
458 } | 424 } |
459 | 425 |
460 fn set_non_normal_other_parent_entries(&mut self, _force: bool) { | 426 fn set_non_normal_other_parent_entries(&mut self, _force: bool) { |
461 // Do nothing, this `DirstateMap` does not have a separate "non normal | 427 // Do nothing, this `DirstateMap` does not have a separate "non normal |
473 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { | 439 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { |
474 Box::new(self.iter_nodes().filter_map(|(path, node)| { | 440 Box::new(self.iter_nodes().filter_map(|(path, node)| { |
475 node.entry | 441 node.entry |
476 .as_ref() | 442 .as_ref() |
477 .filter(|entry| entry.is_non_normal()) | 443 .filter(|entry| entry.is_non_normal()) |
478 .map(|_| path) | 444 .map(|_| &**path) |
479 })) | 445 })) |
480 } | 446 } |
481 | 447 |
482 fn iter_other_parent_paths( | 448 fn iter_other_parent_paths( |
483 &mut self, | 449 &mut self, |
484 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { | 450 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { |
485 Box::new(self.iter_nodes().filter_map(|(path, node)| { | 451 Box::new(self.iter_nodes().filter_map(|(path, node)| { |
486 node.entry | 452 node.entry |
487 .as_ref() | 453 .as_ref() |
488 .filter(|entry| entry.is_from_other_parent()) | 454 .filter(|entry| entry.is_from_other_parent()) |
489 .map(|_| path) | 455 .map(|_| &**path) |
490 })) | 456 })) |
491 } | 457 } |
492 | 458 |
493 fn has_tracked_dir( | 459 fn has_tracked_dir( |
494 &mut self, | 460 &mut self, |
520 fn pack_v1( | 486 fn pack_v1( |
521 &mut self, | 487 &mut self, |
522 parents: DirstateParents, | 488 parents: DirstateParents, |
523 now: Timestamp, | 489 now: Timestamp, |
524 ) -> Result<Vec<u8>, DirstateError> { | 490 ) -> Result<Vec<u8>, DirstateError> { |
491 let now: i32 = now.0.try_into().expect("time overflow"); | |
492 let mut ambiguous_mtimes = Vec::new(); | |
525 // Optizimation (to be measured?): pre-compute size to avoid `Vec` | 493 // Optizimation (to be measured?): pre-compute size to avoid `Vec` |
526 // reallocations | 494 // reallocations |
527 let mut size = parents.as_bytes().len(); | 495 let mut size = parents.as_bytes().len(); |
528 for (path, node) in self.iter_nodes() { | 496 for (path, node) in self.iter_nodes() { |
529 if node.entry.is_some() { | 497 if let Some(entry) = &node.entry { |
530 size += packed_entry_size( | 498 size += packed_entry_size( |
531 path, | 499 path, |
532 node.copy_source.as_ref().map(|p| &**p), | 500 node.copy_source.as_ref().map(|p| &**p), |
533 ) | 501 ); |
534 } | 502 if entry.mtime_is_ambiguous(now) { |
535 } | 503 ambiguous_mtimes.push(path.clone()) |
504 } | |
505 } | |
506 } | |
507 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes); | |
536 | 508 |
537 let mut packed = Vec::with_capacity(size); | 509 let mut packed = Vec::with_capacity(size); |
538 packed.extend(parents.as_bytes()); | 510 packed.extend(parents.as_bytes()); |
539 | 511 |
540 let now: i32 = now.0.try_into().expect("time overflow"); | 512 for (path, node) in self.iter_nodes() { |
541 for (path, opt_entry, copy_source) in self.iter_node_data_mut() { | 513 if let Some(entry) = &node.entry { |
542 if let Some(entry) = opt_entry { | |
543 clear_ambiguous_mtime(entry, now); | |
544 pack_entry( | 514 pack_entry( |
545 path, | 515 path, |
546 entry, | 516 entry, |
547 copy_source.as_ref().map(|p| &**p), | 517 node.copy_source.as_ref().map(|p| &**p), |
548 &mut packed, | 518 &mut packed, |
549 ); | 519 ); |
550 } | 520 } |
551 } | 521 } |
552 Ok(packed) | 522 Ok(packed) |
556 fn pack_v2( | 526 fn pack_v2( |
557 &mut self, | 527 &mut self, |
558 parents: DirstateParents, | 528 parents: DirstateParents, |
559 now: Timestamp, | 529 now: Timestamp, |
560 ) -> Result<Vec<u8>, DirstateError> { | 530 ) -> Result<Vec<u8>, DirstateError> { |
561 on_disk::write(self, parents, now) | 531 // TODO: how do we want to handle this in 2038? |
532 let now: i32 = now.0.try_into().expect("time overflow"); | |
533 let mut paths = Vec::new(); | |
534 for (path, node) in self.iter_nodes() { | |
535 if let Some(entry) = &node.entry { | |
536 if entry.mtime_is_ambiguous(now) { | |
537 paths.push(path.clone()) | |
538 } | |
539 } | |
540 } | |
541 // Borrow of `self` ends here since we collect cloned paths | |
542 | |
543 self.clear_known_ambiguous_mtimes(&paths); | |
544 | |
545 on_disk::write(self, parents) | |
562 } | 546 } |
563 | 547 |
564 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { | 548 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { |
565 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that | 549 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that |
566 // needs to be recomputed | 550 // needs to be recomputed |
590 | 574 |
591 fn copy_map_iter(&self) -> CopyMapIter<'_> { | 575 fn copy_map_iter(&self) -> CopyMapIter<'_> { |
592 Box::new(self.iter_nodes().filter_map(|(path, node)| { | 576 Box::new(self.iter_nodes().filter_map(|(path, node)| { |
593 node.copy_source | 577 node.copy_source |
594 .as_ref() | 578 .as_ref() |
595 .map(|copy_source| (path, &**copy_source)) | 579 .map(|copy_source| (&**path, &**copy_source)) |
596 })) | 580 })) |
597 } | 581 } |
598 | 582 |
599 fn copy_map_contains_key(&self, key: &HgPath) -> bool { | 583 fn copy_map_contains_key(&self, key: &HgPath) -> bool { |
600 if let Some(node) = self.get_node(key) { | 584 if let Some(node) = self.get_node(key) { |
647 self.get_node(key)?.entry.as_ref() | 631 self.get_node(key)?.entry.as_ref() |
648 } | 632 } |
649 | 633 |
650 fn iter(&self) -> StateMapIter<'_> { | 634 fn iter(&self) -> StateMapIter<'_> { |
651 Box::new(self.iter_nodes().filter_map(|(path, node)| { | 635 Box::new(self.iter_nodes().filter_map(|(path, node)| { |
652 node.entry.as_ref().map(|entry| (path, entry)) | 636 node.entry.as_ref().map(|entry| (&**path, entry)) |
653 })) | 637 })) |
654 } | 638 } |
655 } | 639 } |