Mercurial > hg
comparison rust/hg-core/src/dirstate/status.rs @ 44528:c8891bca40fb
rust-status: add bare `hg status` support in hg-core
A lot of performance remains to be gained, most notably by doing more things
in parallel, but also by caching, not falling back to Python but switching
to another regex engine, etc..
I have measured on multiple repositories that this change, when in combination
with the next two patches, improve bare `hg status` performance, and has no
observable impact when falling back (because it does so early).
On the Netbeans repository:
C: 840ms
Rust+C: 556ms
Mozilla Central with the one pattern that causes a fallback removed:
C: 2.315s
Rust+C: 1.700 s
Differential Revision: https://phab.mercurial-scm.org/D7929
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Fri, 17 Jan 2020 15:43:46 +0100 |
parents | 1debb5894b39 |
children | f96b28aa4b79 |
comparison
equal
deleted
inserted
replaced
44527:1debb5894b39 | 44528:c8891bca40fb |
---|---|
9 //! It is currently missing a lot of functionality compared to the Python one | 9 //! It is currently missing a lot of functionality compared to the Python one |
10 //! and will only be triggered in narrow cases. | 10 //! and will only be triggered in narrow cases. |
11 | 11 |
12 use crate::{ | 12 use crate::{ |
13 dirstate::SIZE_FROM_OTHER_PARENT, | 13 dirstate::SIZE_FROM_OTHER_PARENT, |
14 matchers::{Matcher, VisitChildrenSet}, | 14 filepatterns::PatternFileWarning, |
15 matchers::{get_ignore_function, Matcher, VisitChildrenSet}, | |
15 utils::{ | 16 utils::{ |
16 files::HgMetadata, | 17 files::{find_dirs, HgMetadata}, |
17 hg_path::{ | 18 hg_path::{ |
18 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf, | 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf, |
20 HgPathError, | |
19 }, | 21 }, |
22 path_auditor::PathAuditor, | |
20 }, | 23 }, |
21 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap, | 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap, |
25 PatternError, | |
22 }; | 26 }; |
27 use lazy_static::lazy_static; | |
23 use rayon::prelude::*; | 28 use rayon::prelude::*; |
24 use std::borrow::Cow; | 29 use std::collections::VecDeque; |
25 use std::collections::{HashSet, VecDeque}; | 30 use std::{ |
26 use std::fs::{read_dir, DirEntry}; | 31 borrow::Cow, |
27 use std::io::ErrorKind; | 32 collections::HashSet, |
28 use std::ops::Deref; | 33 fs::{read_dir, DirEntry}, |
29 use std::path::Path; | 34 io::ErrorKind, |
35 ops::Deref, | |
36 path::Path, | |
37 }; | |
30 | 38 |
31 /// Wrong type of file from a `BadMatch` | 39 /// Wrong type of file from a `BadMatch` |
32 /// Note: a lot of those don't exist on all platforms. | 40 /// Note: a lot of those don't exist on all platforms. |
33 #[derive(Debug)] | 41 #[derive(Debug)] |
34 pub enum BadType { | 42 pub enum BadType { |
48 } | 56 } |
49 | 57 |
50 /// Marker enum used to dispatch new status entries into the right collections. | 58 /// Marker enum used to dispatch new status entries into the right collections. |
51 /// Is similar to `crate::EntryState`, but represents the transient state of | 59 /// Is similar to `crate::EntryState`, but represents the transient state of |
52 /// entries during the lifetime of a command. | 60 /// entries during the lifetime of a command. |
61 #[derive(Debug)] | |
53 enum Dispatch { | 62 enum Dispatch { |
54 Unsure, | 63 Unsure, |
55 Modified, | 64 Modified, |
56 Added, | 65 Added, |
57 Removed, | 66 Removed, |
153 // miss such a racy file change. | 162 // miss such a racy file change. |
154 Dispatch::Unsure | 163 Dispatch::Unsure |
155 } else if options.list_clean { | 164 } else if options.list_clean { |
156 Dispatch::Clean | 165 Dispatch::Clean |
157 } else { | 166 } else { |
158 Dispatch::Unknown | 167 Dispatch::None |
159 } | 168 } |
160 } | 169 } |
161 EntryState::Merged => Dispatch::Modified, | 170 EntryState::Merged => Dispatch::Modified, |
162 EntryState::Added => Dispatch::Added, | 171 EntryState::Added => Dispatch::Added, |
163 EntryState::Removed => Dispatch::Removed, | 172 EntryState::Removed => Dispatch::Removed, |
177 // File is unknown to Mercurial, everything is normal | 186 // File is unknown to Mercurial, everything is normal |
178 EntryState::Unknown => Dispatch::Unknown, | 187 EntryState::Unknown => Dispatch::Unknown, |
179 } | 188 } |
180 } | 189 } |
181 | 190 |
191 lazy_static! { | |
192 static ref DEFAULT_WORK: HashSet<&'static HgPath> = { | |
193 let mut h = HashSet::new(); | |
194 h.insert(HgPath::new(b"")); | |
195 h | |
196 }; | |
197 } | |
198 | |
182 /// Get stat data about the files explicitly specified by match. | 199 /// Get stat data about the files explicitly specified by match. |
183 /// TODO subrepos | 200 /// TODO subrepos |
184 fn walk_explicit<'a>( | 201 fn walk_explicit<'a>( |
185 files: &'a HashSet<&HgPath>, | 202 files: Option<&'a HashSet<&HgPath>>, |
186 dmap: &'a DirstateMap, | 203 dmap: &'a DirstateMap, |
187 root_dir: impl AsRef<Path> + Sync + Send, | 204 root_dir: impl AsRef<Path> + Sync + Send + 'a, |
188 options: StatusOptions, | 205 options: StatusOptions, |
189 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> { | 206 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> { |
190 files.par_iter().filter_map(move |filename| { | 207 files |
191 // TODO normalization | 208 .unwrap_or(&DEFAULT_WORK) |
192 let normalized = filename.as_ref(); | 209 .par_iter() |
193 | 210 .map(move |filename| { |
194 let buf = match hg_path_to_path_buf(normalized) { | 211 // TODO normalization |
195 Ok(x) => x, | 212 let normalized = filename.as_ref(); |
196 Err(e) => return Some(Err(e.into())), | 213 |
197 }; | 214 let buf = match hg_path_to_path_buf(normalized) { |
198 let target = root_dir.as_ref().join(buf); | 215 Ok(x) => x, |
199 let st = target.symlink_metadata(); | 216 Err(e) => return Some(Err(e.into())), |
200 match st { | 217 }; |
201 Ok(meta) => { | 218 let target = root_dir.as_ref().join(buf); |
202 let file_type = meta.file_type(); | 219 let st = target.symlink_metadata(); |
203 if file_type.is_file() || file_type.is_symlink() { | 220 let in_dmap = dmap.get(normalized); |
204 if let Some(entry) = dmap.get(normalized) { | 221 match st { |
222 Ok(meta) => { | |
223 let file_type = meta.file_type(); | |
224 return if file_type.is_file() || file_type.is_symlink() { | |
225 if let Some(entry) = in_dmap { | |
226 return Some(Ok(( | |
227 normalized, | |
228 dispatch_found( | |
229 &normalized, | |
230 *entry, | |
231 HgMetadata::from_metadata(meta), | |
232 &dmap.copy_map, | |
233 options, | |
234 ), | |
235 ))); | |
236 } | |
237 Some(Ok((normalized, Dispatch::Unknown))) | |
238 } else { | |
239 if file_type.is_dir() { | |
240 Some(Ok(( | |
241 normalized, | |
242 Dispatch::Directory { | |
243 was_file: in_dmap.is_some(), | |
244 }, | |
245 ))) | |
246 } else { | |
247 Some(Ok(( | |
248 normalized, | |
249 Dispatch::Bad(BadMatch::BadType( | |
250 // TODO do more than unknown | |
251 // Support for all `BadType` variant | |
252 // varies greatly between platforms. | |
253 // So far, no tests check the type and | |
254 // this should be good enough for most | |
255 // users. | |
256 BadType::Unknown, | |
257 )), | |
258 ))) | |
259 } | |
260 }; | |
261 } | |
262 Err(_) => { | |
263 if let Some(entry) = in_dmap { | |
205 return Some(Ok(( | 264 return Some(Ok(( |
206 normalized, | 265 normalized, |
207 dispatch_found( | 266 dispatch_missing(entry.state), |
208 &normalized, | |
209 *entry, | |
210 HgMetadata::from_metadata(meta), | |
211 &dmap.copy_map, | |
212 options, | |
213 ), | |
214 ))); | 267 ))); |
215 } | 268 } |
216 } else { | 269 } |
217 if dmap.contains_key(normalized) { | 270 }; |
218 return Some(Ok((normalized, Dispatch::Removed))); | 271 None |
219 } | 272 }) |
220 } | 273 .flatten() |
221 } | |
222 Err(_) => { | |
223 if let Some(entry) = dmap.get(normalized) { | |
224 return Some(Ok(( | |
225 normalized, | |
226 dispatch_missing(entry.state), | |
227 ))); | |
228 } | |
229 } | |
230 }; | |
231 None | |
232 }) | |
233 } | 274 } |
234 | 275 |
235 #[derive(Debug, Copy, Clone)] | 276 #[derive(Debug, Copy, Clone)] |
236 pub struct StatusOptions { | 277 pub struct StatusOptions { |
237 /// Remember the most recent modification timeslot for status, to make | 278 /// Remember the most recent modification timeslot for status, to make |
413 new_results.extend(old_results.into_iter()); | 454 new_results.extend(old_results.into_iter()); |
414 | 455 |
415 Ok(new_results) | 456 Ok(new_results) |
416 } | 457 } |
417 | 458 |
418 /// Stat all entries in the `DirstateMap` and mark them for dispatch into | 459 /// Stat all entries in the `DirstateMap` and mark them for dispatch. |
419 /// the relevant collections. | |
420 fn stat_dmap_entries( | 460 fn stat_dmap_entries( |
421 dmap: &DirstateMap, | 461 dmap: &DirstateMap, |
422 root_dir: impl AsRef<Path> + Sync + Send, | 462 root_dir: impl AsRef<Path> + Sync + Send, |
423 options: StatusOptions, | 463 options: StatusOptions, |
424 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> { | 464 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> { |
443 &dmap.copy_map, | 483 &dmap.copy_map, |
444 options, | 484 options, |
445 ), | 485 ), |
446 )), | 486 )), |
447 Err(ref e) | 487 Err(ref e) |
448 if e.kind() == std::io::ErrorKind::NotFound | 488 if e.kind() == ErrorKind::NotFound |
449 || e.raw_os_error() == Some(20) => | 489 || e.raw_os_error() == Some(20) => |
450 { | 490 { |
451 // Rust does not yet have an `ErrorKind` for | 491 // Rust does not yet have an `ErrorKind` for |
452 // `NotADirectory` (errno 20) | 492 // `NotADirectory` (errno 20) |
453 // It happens if the dirstate contains `foo/bar` and | 493 // It happens if the dirstate contains `foo/bar` and |
458 } | 498 } |
459 }) | 499 }) |
460 } | 500 } |
461 | 501 |
462 pub struct DirstateStatus<'a> { | 502 pub struct DirstateStatus<'a> { |
463 pub modified: Vec<&'a HgPath>, | 503 pub modified: Vec<Cow<'a, HgPath>>, |
464 pub added: Vec<&'a HgPath>, | 504 pub added: Vec<Cow<'a, HgPath>>, |
465 pub removed: Vec<&'a HgPath>, | 505 pub removed: Vec<Cow<'a, HgPath>>, |
466 pub deleted: Vec<&'a HgPath>, | 506 pub deleted: Vec<Cow<'a, HgPath>>, |
467 pub clean: Vec<&'a HgPath>, | 507 pub clean: Vec<Cow<'a, HgPath>>, |
468 pub ignored: Vec<&'a HgPath>, | 508 pub ignored: Vec<Cow<'a, HgPath>>, |
469 pub unknown: Vec<&'a HgPath>, | 509 pub unknown: Vec<Cow<'a, HgPath>>, |
470 pub bad: Vec<(&'a HgPath, BadMatch)>, | 510 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>, |
471 } | 511 } |
472 | 512 |
473 fn build_response<'a>( | 513 fn build_response<'a>( |
474 results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>, | 514 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>, |
475 ) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> { | 515 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) { |
476 let mut lookup = vec![]; | 516 let mut lookup = vec![]; |
477 let mut modified = vec![]; | 517 let mut modified = vec![]; |
478 let mut added = vec![]; | 518 let mut added = vec![]; |
479 let mut removed = vec![]; | 519 let mut removed = vec![]; |
480 let mut deleted = vec![]; | 520 let mut deleted = vec![]; |
481 let mut clean = vec![]; | 521 let mut clean = vec![]; |
482 let mut ignored = vec![]; | 522 let mut ignored = vec![]; |
483 let mut unknown = vec![]; | 523 let mut unknown = vec![]; |
484 let mut bad = vec![]; | 524 let mut bad = vec![]; |
485 | 525 |
486 for res in results.into_iter() { | 526 for (filename, dispatch) in results.into_iter() { |
487 let (filename, dispatch) = res?; | |
488 match dispatch { | 527 match dispatch { |
489 Dispatch::Unknown => unknown.push(filename), | 528 Dispatch::Unknown => unknown.push(filename), |
490 Dispatch::Unsure => lookup.push(filename), | 529 Dispatch::Unsure => lookup.push(filename), |
491 Dispatch::Modified => modified.push(filename), | 530 Dispatch::Modified => modified.push(filename), |
492 Dispatch::Added => added.push(filename), | 531 Dispatch::Added => added.push(filename), |
498 Dispatch::Bad(reason) => bad.push((filename, reason)), | 537 Dispatch::Bad(reason) => bad.push((filename, reason)), |
499 Dispatch::Directory { .. } => {} | 538 Dispatch::Directory { .. } => {} |
500 } | 539 } |
501 } | 540 } |
502 | 541 |
503 Ok(( | 542 ( |
504 lookup, | 543 lookup, |
505 DirstateStatus { | 544 DirstateStatus { |
506 modified, | 545 modified, |
507 added, | 546 added, |
508 removed, | 547 removed, |
510 clean, | 549 clean, |
511 ignored, | 550 ignored, |
512 unknown, | 551 unknown, |
513 bad, | 552 bad, |
514 }, | 553 }, |
515 )) | 554 ) |
516 } | 555 } |
517 | 556 |
557 pub enum StatusError { | |
558 IO(std::io::Error), | |
559 Path(HgPathError), | |
560 Pattern(PatternError), | |
561 } | |
562 | |
563 pub type StatusResult<T> = Result<T, StatusError>; | |
564 | |
565 impl From<PatternError> for StatusError { | |
566 fn from(e: PatternError) -> Self { | |
567 StatusError::Pattern(e) | |
568 } | |
569 } | |
570 impl From<HgPathError> for StatusError { | |
571 fn from(e: HgPathError) -> Self { | |
572 StatusError::Path(e) | |
573 } | |
574 } | |
575 impl From<std::io::Error> for StatusError { | |
576 fn from(e: std::io::Error) -> Self { | |
577 StatusError::IO(e) | |
578 } | |
579 } | |
580 | |
581 impl ToString for StatusError { | |
582 fn to_string(&self) -> String { | |
583 match self { | |
584 StatusError::IO(e) => e.to_string(), | |
585 StatusError::Path(e) => e.to_string(), | |
586 StatusError::Pattern(e) => e.to_string(), | |
587 } | |
588 } | |
589 } | |
590 | |
591 /// Get the status of files in the working directory. | |
592 /// | |
593 /// This is the current entry-point for `hg-core` and is realistically unusable | |
594 /// outside of a Python context because its arguments need to provide a lot of | |
595 /// information that will not be necessary in the future. | |
518 pub fn status<'a: 'c, 'b: 'c, 'c>( | 596 pub fn status<'a: 'c, 'b: 'c, 'c>( |
519 dmap: &'a DirstateMap, | 597 dmap: &'a DirstateMap, |
520 matcher: &'b impl Matcher, | 598 matcher: &'b (impl Matcher + Sync), |
521 root_dir: impl AsRef<Path> + Sync + Send + Copy, | 599 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c, |
600 ignore_files: &[impl AsRef<Path> + 'c], | |
522 options: StatusOptions, | 601 options: StatusOptions, |
523 ) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> { | 602 ) -> StatusResult<( |
603 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>), | |
604 Vec<PatternFileWarning>, | |
605 )> { | |
606 let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?; | |
607 | |
608 // Is the path or one of its ancestors ignored? | |
609 let dir_ignore_fn = |dir: &_| { | |
610 if ignore_fn(dir) { | |
611 true | |
612 } else { | |
613 for p in find_dirs(dir) { | |
614 if ignore_fn(p) { | |
615 return true; | |
616 } | |
617 } | |
618 false | |
619 } | |
620 }; | |
621 | |
524 let files = matcher.file_set(); | 622 let files = matcher.file_set(); |
525 let mut results = vec![]; | 623 |
526 if let Some(files) = files { | 624 // Step 1: check the files explicitly mentioned by the user |
527 results.par_extend(walk_explicit(&files, &dmap, root_dir, options)); | 625 let explicit = walk_explicit(files, &dmap, root_dir, options); |
626 let (work, mut results): (Vec<_>, FastHashMap<_, _>) = explicit | |
627 .filter_map(Result::ok) | |
628 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)) | |
629 .partition(|(_, dispatch)| match dispatch { | |
630 Dispatch::Directory { .. } => true, | |
631 _ => false, | |
632 }); | |
633 | |
634 // Step 2: recursively check the working directory for changes if needed | |
635 for (dir, dispatch) in work { | |
636 match dispatch { | |
637 Dispatch::Directory { was_file } => { | |
638 if was_file { | |
639 results.insert(dir.to_owned(), Dispatch::Removed); | |
640 } | |
641 if options.list_ignored | |
642 || options.list_unknown && !dir_ignore_fn(&dir) | |
643 { | |
644 results = traverse( | |
645 matcher, | |
646 root_dir, | |
647 &dmap, | |
648 &dir, | |
649 results, | |
650 &ignore_fn, | |
651 &dir_ignore_fn, | |
652 options, | |
653 )?; | |
654 } | |
655 } | |
656 _ => unreachable!("There can only be directories in `work`"), | |
657 } | |
528 } | 658 } |
529 | 659 |
530 if !matcher.is_exact() { | 660 if !matcher.is_exact() { |
531 let stat_results = stat_dmap_entries(&dmap, root_dir, options); | 661 // Step 3: Check the remaining files from the dmap. |
532 results.par_extend(stat_results); | 662 // If a dmap file is not in results yet, it was either |
533 } | 663 // a) not matched b) ignored, c) missing, or d) under a |
534 | 664 // symlink directory. |
535 build_response(results) | 665 |
536 } | 666 if options.list_unknown { |
667 let to_visit: Box<dyn Iterator<Item = (&HgPath, &DirstateEntry)>> = | |
668 if results.is_empty() && matcher.matches_everything() { | |
669 Box::new(dmap.iter().map(|(f, e)| (f.deref(), e))) | |
670 } else { | |
671 Box::new(dmap.iter().filter_map(|(f, e)| { | |
672 if !results.contains_key(f.deref()) | |
673 && matcher.matches(f) | |
674 { | |
675 Some((f.deref(), e)) | |
676 } else { | |
677 None | |
678 } | |
679 })) | |
680 }; | |
681 let mut to_visit: Vec<_> = to_visit.collect(); | |
682 to_visit.sort_by(|a, b| a.0.cmp(&b.0)); | |
683 | |
684 // We walked all dirs under the roots that weren't ignored, and | |
685 // everything that matched was stat'ed and is already in results. | |
686 // The rest must thus be ignored or under a symlink. | |
687 let mut path_auditor = PathAuditor::new(root_dir); | |
688 | |
689 for (ref filename, entry) in to_visit { | |
690 // Report ignored items in the dmap as long as they are not | |
691 // under a symlink directory. | |
692 if path_auditor.check(filename) { | |
693 // TODO normalize for case-insensitive filesystems | |
694 let buf = hg_path_to_path_buf(filename)?; | |
695 results.insert( | |
696 Cow::Borrowed(filename), | |
697 match root_dir.as_ref().join(&buf).symlink_metadata() { | |
698 // File was just ignored, no links, and exists | |
699 Ok(meta) => { | |
700 let metadata = HgMetadata::from_metadata(meta); | |
701 dispatch_found( | |
702 filename, | |
703 *entry, | |
704 metadata, | |
705 &dmap.copy_map, | |
706 options, | |
707 ) | |
708 } | |
709 // File doesn't exist | |
710 Err(_) => dispatch_missing(entry.state), | |
711 }, | |
712 ); | |
713 } else { | |
714 // It's either missing or under a symlink directory which | |
715 // we, in this case, report as missing. | |
716 results.insert( | |
717 Cow::Borrowed(filename), | |
718 dispatch_missing(entry.state), | |
719 ); | |
720 } | |
721 } | |
722 } else { | |
723 // We may not have walked the full directory tree above, so stat | |
724 // and check everything we missed. | |
725 let stat_results = stat_dmap_entries(&dmap, root_dir, options); | |
726 results.par_extend(stat_results.flatten().map( | |
727 |(filename, dispatch)| (Cow::Borrowed(filename), dispatch), | |
728 )); | |
729 } | |
730 } | |
731 | |
732 let results = results.into_iter().filter_map(|(filename, dispatch)| { | |
733 match dispatch { | |
734 Dispatch::Bad(_) => return Some((filename, dispatch)), | |
735 _ => {} | |
736 }; | |
737 // TODO do this in //, not at the end | |
738 if !dmap.contains_key(filename.deref()) { | |
739 if (options.list_ignored || matcher.exact_match(&filename)) | |
740 && dir_ignore_fn(&filename) | |
741 { | |
742 if options.list_ignored { | |
743 return Some((filename.to_owned(), Dispatch::Ignored)); | |
744 } | |
745 } else { | |
746 if !ignore_fn(&filename) { | |
747 return Some((filename.to_owned(), Dispatch::Unknown)); | |
748 } | |
749 } | |
750 return None; | |
751 } | |
752 Some((filename, dispatch)) | |
753 }); | |
754 | |
755 Ok((build_response(results), warnings)) | |
756 } |