comparison rust/hg-core/src/dirstate_tree/dirstate_map.rs @ 47330:73f23e7610f8

dirstate-tree: Remove DirstateMap::iter_node_data_mut In an upcoming changeset we want DirstateMap to be able to work directly with nodes in their "on disk" representation, without always allocating corresponding in-memory data structures. Nodes would have two possible representations: one immutable "on disk" refering to the bytes buffer of the contents of the .hg/dirstate file, and one mutable with HashMap like the curren data structure. These nodes would have copy-on-write semantics: when an immutable node would need to be mutated, instead we allocate new mutable node for it and its ancestors. A mutable iterator of the entire tree would still be possible, but it would become much more expensive since we’d need to allocate mutable nodes for everything. Instead, remove this iterator. It was only used to clear ambiguous mtimes while serializing the `DirstateMap`. Instead clearing and serialization are now two separate passes. Clearing first uses an immutable iterator to collect the paths of nodes that need to be cleared, then accesses only those nodes mutably. Differential Revision: https://phab.mercurial-scm.org/D10744
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 19 May 2021 13:15:00 +0200
parents 2a9ddc8094c7
children 0252600fd1cf
comparison
equal deleted inserted replaced
47329:717a94b423b9 47330:73f23e7610f8
4 use std::convert::TryInto; 4 use std::convert::TryInto;
5 use std::path::PathBuf; 5 use std::path::PathBuf;
6 6
7 use super::on_disk; 7 use super::on_disk;
8 use super::path_with_basename::WithBasename; 8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::clear_ambiguous_mtime;
10 use crate::dirstate::parsers::pack_entry; 9 use crate::dirstate::parsers::pack_entry;
11 use crate::dirstate::parsers::packed_entry_size; 10 use crate::dirstate::parsers::packed_entry_size;
12 use crate::dirstate::parsers::parse_dirstate_entries; 11 use crate::dirstate::parsers::parse_dirstate_entries;
13 use crate::dirstate::parsers::Timestamp; 12 use crate::dirstate::parsers::Timestamp;
14 use crate::matchers::Matcher; 13 use crate::matchers::Matcher;
76 // https://github.com/rust-lang/rust/issues/34162 75 // https://github.com/rust-lang/rust/issues/34162
77 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2)); 76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
78 vec 77 vec
79 } 78 }
80 } 79 }
81
82 /// `(full_path, entry, copy_source)`
83 type NodeDataMut<'tree, 'on_disk> = (
84 &'tree HgPath,
85 &'tree mut Option<DirstateEntry>,
86 &'tree mut Option<Cow<'on_disk, HgPath>>,
87 );
88 80
89 impl<'on_disk> DirstateMap<'on_disk> { 81 impl<'on_disk> DirstateMap<'on_disk> {
90 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { 82 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
91 Self { 83 Self {
92 on_disk, 84 on_disk,
250 node.entry = Some(new_entry) 242 node.entry = Some(new_entry)
251 } 243 }
252 244
253 fn iter_nodes<'a>( 245 fn iter_nodes<'a>(
254 &'a self, 246 &'a self,
255 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a { 247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
256 // Depth first tree traversal. 248 // Depth first tree traversal.
257 // 249 //
258 // If we could afford internal iteration and recursion, 250 // If we could afford internal iteration and recursion,
259 // this would look like: 251 // this would look like:
260 // 252 //
277 std::iter::from_fn(move || { 269 std::iter::from_fn(move || {
278 while let Some((key, child_node)) = iter.next() { 270 while let Some((key, child_node)) = iter.next() {
279 // Pseudo-recursion 271 // Pseudo-recursion
280 let new_iter = child_node.children.iter(); 272 let new_iter = child_node.children.iter();
281 let old_iter = std::mem::replace(&mut iter, new_iter); 273 let old_iter = std::mem::replace(&mut iter, new_iter);
282 let key = &**key.full_path(); 274 let key = key.full_path();
283 stack.push((key, child_node, old_iter)); 275 stack.push((key, child_node, old_iter));
284 } 276 }
285 // Found the end of a `children.iter()` iterator. 277 // Found the end of a `children.iter()` iterator.
286 if let Some((key, child_node, next_iter)) = stack.pop() { 278 if let Some((key, child_node, next_iter)) = stack.pop() {
287 // "Return" from pseudo-recursion by restoring state from the 279 // "Return" from pseudo-recursion by restoring state from the
294 None 286 None
295 } 287 }
296 }) 288 })
297 } 289 }
298 290
299 /// Mutable iterator for the `(entry, copy source)` of each node. 291 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
300 /// 292 for path in paths {
301 /// It would not be safe to yield mutable references to nodes themeselves 293 if let Some(node) =
302 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are 294 Self::get_node_mut(&mut self.root, path.as_ref())
303 /// reachable from their ancestor nodes, potentially creating multiple 295 {
304 /// `&mut` references to a given node. 296 if let Some(entry) = node.entry.as_mut() {
305 fn iter_node_data_mut<'tree>( 297 entry.clear_mtime();
306 &'tree mut self, 298 }
307 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree { 299 }
308 // Explict stack for pseudo-recursion, see `iter_nodes` above. 300 }
309 let mut stack = Vec::new();
310 let mut iter = self.root.iter_mut();
311 std::iter::from_fn(move || {
312 while let Some((key, child_node)) = iter.next() {
313 // Pseudo-recursion
314 let data = (
315 &**key.full_path(),
316 &mut child_node.entry,
317 &mut child_node.copy_source,
318 );
319 let new_iter = child_node.children.iter_mut();
320 let old_iter = std::mem::replace(&mut iter, new_iter);
321 stack.push((data, old_iter));
322 }
323 // Found the end of a `children.values_mut()` iterator.
324 if let Some((data, next_iter)) = stack.pop() {
325 // "Return" from pseudo-recursion by restoring state from the
326 // explicit stack
327 iter = next_iter;
328
329 Some(data)
330 } else {
331 // Reached the bottom of the stack, we’re done
332 None
333 }
334 })
335 } 301 }
336 } 302 }
337 303
338 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> { 304 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
339 fn clear(&mut self) { 305 fn clear(&mut self) {
425 391
426 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { 392 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
427 for filename in filenames { 393 for filename in filenames {
428 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) { 394 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
429 if let Some(entry) = node.entry.as_mut() { 395 if let Some(entry) = node.entry.as_mut() {
430 clear_ambiguous_mtime(entry, now); 396 entry.clear_ambiguous_mtime(now);
431 } 397 }
432 } 398 }
433 } 399 }
434 } 400 }
435 401
451 node.entry 417 node.entry
452 .as_ref() 418 .as_ref()
453 .filter(|entry| { 419 .filter(|entry| {
454 entry.is_non_normal() || entry.is_from_other_parent() 420 entry.is_non_normal() || entry.is_from_other_parent()
455 }) 421 })
456 .map(|_| path) 422 .map(|_| &**path)
457 })) 423 }))
458 } 424 }
459 425
460 fn set_non_normal_other_parent_entries(&mut self, _force: bool) { 426 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
461 // Do nothing, this `DirstateMap` does not have a separate "non normal 427 // Do nothing, this `DirstateMap` does not have a separate "non normal
473 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { 439 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
474 Box::new(self.iter_nodes().filter_map(|(path, node)| { 440 Box::new(self.iter_nodes().filter_map(|(path, node)| {
475 node.entry 441 node.entry
476 .as_ref() 442 .as_ref()
477 .filter(|entry| entry.is_non_normal()) 443 .filter(|entry| entry.is_non_normal())
478 .map(|_| path) 444 .map(|_| &**path)
479 })) 445 }))
480 } 446 }
481 447
482 fn iter_other_parent_paths( 448 fn iter_other_parent_paths(
483 &mut self, 449 &mut self,
484 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> { 450 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
485 Box::new(self.iter_nodes().filter_map(|(path, node)| { 451 Box::new(self.iter_nodes().filter_map(|(path, node)| {
486 node.entry 452 node.entry
487 .as_ref() 453 .as_ref()
488 .filter(|entry| entry.is_from_other_parent()) 454 .filter(|entry| entry.is_from_other_parent())
489 .map(|_| path) 455 .map(|_| &**path)
490 })) 456 }))
491 } 457 }
492 458
493 fn has_tracked_dir( 459 fn has_tracked_dir(
494 &mut self, 460 &mut self,
520 fn pack_v1( 486 fn pack_v1(
521 &mut self, 487 &mut self,
522 parents: DirstateParents, 488 parents: DirstateParents,
523 now: Timestamp, 489 now: Timestamp,
524 ) -> Result<Vec<u8>, DirstateError> { 490 ) -> Result<Vec<u8>, DirstateError> {
491 let now: i32 = now.0.try_into().expect("time overflow");
492 let mut ambiguous_mtimes = Vec::new();
525 // Optizimation (to be measured?): pre-compute size to avoid `Vec` 493 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
526 // reallocations 494 // reallocations
527 let mut size = parents.as_bytes().len(); 495 let mut size = parents.as_bytes().len();
528 for (path, node) in self.iter_nodes() { 496 for (path, node) in self.iter_nodes() {
529 if node.entry.is_some() { 497 if let Some(entry) = &node.entry {
530 size += packed_entry_size( 498 size += packed_entry_size(
531 path, 499 path,
532 node.copy_source.as_ref().map(|p| &**p), 500 node.copy_source.as_ref().map(|p| &**p),
533 ) 501 );
534 } 502 if entry.mtime_is_ambiguous(now) {
535 } 503 ambiguous_mtimes.push(path.clone())
504 }
505 }
506 }
507 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
536 508
537 let mut packed = Vec::with_capacity(size); 509 let mut packed = Vec::with_capacity(size);
538 packed.extend(parents.as_bytes()); 510 packed.extend(parents.as_bytes());
539 511
540 let now: i32 = now.0.try_into().expect("time overflow"); 512 for (path, node) in self.iter_nodes() {
541 for (path, opt_entry, copy_source) in self.iter_node_data_mut() { 513 if let Some(entry) = &node.entry {
542 if let Some(entry) = opt_entry {
543 clear_ambiguous_mtime(entry, now);
544 pack_entry( 514 pack_entry(
545 path, 515 path,
546 entry, 516 entry,
547 copy_source.as_ref().map(|p| &**p), 517 node.copy_source.as_ref().map(|p| &**p),
548 &mut packed, 518 &mut packed,
549 ); 519 );
550 } 520 }
551 } 521 }
552 Ok(packed) 522 Ok(packed)
556 fn pack_v2( 526 fn pack_v2(
557 &mut self, 527 &mut self,
558 parents: DirstateParents, 528 parents: DirstateParents,
559 now: Timestamp, 529 now: Timestamp,
560 ) -> Result<Vec<u8>, DirstateError> { 530 ) -> Result<Vec<u8>, DirstateError> {
561 on_disk::write(self, parents, now) 531 // TODO: how do we want to handle this in 2038?
532 let now: i32 = now.0.try_into().expect("time overflow");
533 let mut paths = Vec::new();
534 for (path, node) in self.iter_nodes() {
535 if let Some(entry) = &node.entry {
536 if entry.mtime_is_ambiguous(now) {
537 paths.push(path.clone())
538 }
539 }
540 }
541 // Borrow of `self` ends here since we collect cloned paths
542
543 self.clear_known_ambiguous_mtimes(&paths);
544
545 on_disk::write(self, parents)
562 } 546 }
563 547
564 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { 548 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
565 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that 549 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
566 // needs to be recomputed 550 // needs to be recomputed
590 574
591 fn copy_map_iter(&self) -> CopyMapIter<'_> { 575 fn copy_map_iter(&self) -> CopyMapIter<'_> {
592 Box::new(self.iter_nodes().filter_map(|(path, node)| { 576 Box::new(self.iter_nodes().filter_map(|(path, node)| {
593 node.copy_source 577 node.copy_source
594 .as_ref() 578 .as_ref()
595 .map(|copy_source| (path, &**copy_source)) 579 .map(|copy_source| (&**path, &**copy_source))
596 })) 580 }))
597 } 581 }
598 582
599 fn copy_map_contains_key(&self, key: &HgPath) -> bool { 583 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
600 if let Some(node) = self.get_node(key) { 584 if let Some(node) = self.get_node(key) {
647 self.get_node(key)?.entry.as_ref() 631 self.get_node(key)?.entry.as_ref()
648 } 632 }
649 633
650 fn iter(&self) -> StateMapIter<'_> { 634 fn iter(&self) -> StateMapIter<'_> {
651 Box::new(self.iter_nodes().filter_map(|(path, node)| { 635 Box::new(self.iter_nodes().filter_map(|(path, node)| {
652 node.entry.as_ref().map(|entry| (path, entry)) 636 node.entry.as_ref().map(|entry| (&**path, entry))
653 })) 637 }))
654 } 638 }
655 } 639 }