comparison rust/hg-core/src/dirstate/status.rs @ 44528:c8891bca40fb

rust-status: add bare `hg status` support in hg-core A lot of performance remains to be gained, most notably by doing more things in parallel, but also by caching, not falling back to Python but switching to another regex engine, etc.. I have measured on multiple repositories that this change, when in combination with the next two patches, improve bare `hg status` performance, and has no observable impact when falling back (because it does so early). On the Netbeans repository: C: 840ms Rust+C: 556ms Mozilla Central with the one pattern that causes a fallback removed: C: 2.315s Rust+C: 1.700 s Differential Revision: https://phab.mercurial-scm.org/D7929
author Raphaël Gomès <rgomes@octobus.net>
date Fri, 17 Jan 2020 15:43:46 +0100
parents 1debb5894b39
children f96b28aa4b79
comparison
equal deleted inserted replaced
44527:1debb5894b39 44528:c8891bca40fb
9 //! It is currently missing a lot of functionality compared to the Python one 9 //! It is currently missing a lot of functionality compared to the Python one
10 //! and will only be triggered in narrow cases. 10 //! and will only be triggered in narrow cases.
11 11
12 use crate::{ 12 use crate::{
13 dirstate::SIZE_FROM_OTHER_PARENT, 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 matchers::{Matcher, VisitChildrenSet}, 14 filepatterns::PatternFileWarning,
15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
15 utils::{ 16 utils::{
16 files::HgMetadata, 17 files::{find_dirs, HgMetadata},
17 hg_path::{ 18 hg_path::{
18 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf, 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 HgPathError,
19 }, 21 },
22 path_auditor::PathAuditor,
20 }, 23 },
21 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap, 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 PatternError,
22 }; 26 };
27 use lazy_static::lazy_static;
23 use rayon::prelude::*; 28 use rayon::prelude::*;
24 use std::borrow::Cow; 29 use std::collections::VecDeque;
25 use std::collections::{HashSet, VecDeque}; 30 use std::{
26 use std::fs::{read_dir, DirEntry}; 31 borrow::Cow,
27 use std::io::ErrorKind; 32 collections::HashSet,
28 use std::ops::Deref; 33 fs::{read_dir, DirEntry},
29 use std::path::Path; 34 io::ErrorKind,
35 ops::Deref,
36 path::Path,
37 };
30 38
31 /// Wrong type of file from a `BadMatch` 39 /// Wrong type of file from a `BadMatch`
32 /// Note: a lot of those don't exist on all platforms. 40 /// Note: a lot of those don't exist on all platforms.
33 #[derive(Debug)] 41 #[derive(Debug)]
34 pub enum BadType { 42 pub enum BadType {
48 } 56 }
49 57
50 /// Marker enum used to dispatch new status entries into the right collections. 58 /// Marker enum used to dispatch new status entries into the right collections.
51 /// Is similar to `crate::EntryState`, but represents the transient state of 59 /// Is similar to `crate::EntryState`, but represents the transient state of
52 /// entries during the lifetime of a command. 60 /// entries during the lifetime of a command.
61 #[derive(Debug)]
53 enum Dispatch { 62 enum Dispatch {
54 Unsure, 63 Unsure,
55 Modified, 64 Modified,
56 Added, 65 Added,
57 Removed, 66 Removed,
153 // miss such a racy file change. 162 // miss such a racy file change.
154 Dispatch::Unsure 163 Dispatch::Unsure
155 } else if options.list_clean { 164 } else if options.list_clean {
156 Dispatch::Clean 165 Dispatch::Clean
157 } else { 166 } else {
158 Dispatch::Unknown 167 Dispatch::None
159 } 168 }
160 } 169 }
161 EntryState::Merged => Dispatch::Modified, 170 EntryState::Merged => Dispatch::Modified,
162 EntryState::Added => Dispatch::Added, 171 EntryState::Added => Dispatch::Added,
163 EntryState::Removed => Dispatch::Removed, 172 EntryState::Removed => Dispatch::Removed,
177 // File is unknown to Mercurial, everything is normal 186 // File is unknown to Mercurial, everything is normal
178 EntryState::Unknown => Dispatch::Unknown, 187 EntryState::Unknown => Dispatch::Unknown,
179 } 188 }
180 } 189 }
181 190
191 lazy_static! {
192 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
193 let mut h = HashSet::new();
194 h.insert(HgPath::new(b""));
195 h
196 };
197 }
198
182 /// Get stat data about the files explicitly specified by match. 199 /// Get stat data about the files explicitly specified by match.
183 /// TODO subrepos 200 /// TODO subrepos
184 fn walk_explicit<'a>( 201 fn walk_explicit<'a>(
185 files: &'a HashSet<&HgPath>, 202 files: Option<&'a HashSet<&HgPath>>,
186 dmap: &'a DirstateMap, 203 dmap: &'a DirstateMap,
187 root_dir: impl AsRef<Path> + Sync + Send, 204 root_dir: impl AsRef<Path> + Sync + Send + 'a,
188 options: StatusOptions, 205 options: StatusOptions,
189 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> { 206 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
190 files.par_iter().filter_map(move |filename| { 207 files
191 // TODO normalization 208 .unwrap_or(&DEFAULT_WORK)
192 let normalized = filename.as_ref(); 209 .par_iter()
193 210 .map(move |filename| {
194 let buf = match hg_path_to_path_buf(normalized) { 211 // TODO normalization
195 Ok(x) => x, 212 let normalized = filename.as_ref();
196 Err(e) => return Some(Err(e.into())), 213
197 }; 214 let buf = match hg_path_to_path_buf(normalized) {
198 let target = root_dir.as_ref().join(buf); 215 Ok(x) => x,
199 let st = target.symlink_metadata(); 216 Err(e) => return Some(Err(e.into())),
200 match st { 217 };
201 Ok(meta) => { 218 let target = root_dir.as_ref().join(buf);
202 let file_type = meta.file_type(); 219 let st = target.symlink_metadata();
203 if file_type.is_file() || file_type.is_symlink() { 220 let in_dmap = dmap.get(normalized);
204 if let Some(entry) = dmap.get(normalized) { 221 match st {
222 Ok(meta) => {
223 let file_type = meta.file_type();
224 return if file_type.is_file() || file_type.is_symlink() {
225 if let Some(entry) = in_dmap {
226 return Some(Ok((
227 normalized,
228 dispatch_found(
229 &normalized,
230 *entry,
231 HgMetadata::from_metadata(meta),
232 &dmap.copy_map,
233 options,
234 ),
235 )));
236 }
237 Some(Ok((normalized, Dispatch::Unknown)))
238 } else {
239 if file_type.is_dir() {
240 Some(Ok((
241 normalized,
242 Dispatch::Directory {
243 was_file: in_dmap.is_some(),
244 },
245 )))
246 } else {
247 Some(Ok((
248 normalized,
249 Dispatch::Bad(BadMatch::BadType(
250 // TODO do more than unknown
251 // Support for all `BadType` variant
252 // varies greatly between platforms.
253 // So far, no tests check the type and
254 // this should be good enough for most
255 // users.
256 BadType::Unknown,
257 )),
258 )))
259 }
260 };
261 }
262 Err(_) => {
263 if let Some(entry) = in_dmap {
205 return Some(Ok(( 264 return Some(Ok((
206 normalized, 265 normalized,
207 dispatch_found( 266 dispatch_missing(entry.state),
208 &normalized,
209 *entry,
210 HgMetadata::from_metadata(meta),
211 &dmap.copy_map,
212 options,
213 ),
214 ))); 267 )));
215 } 268 }
216 } else { 269 }
217 if dmap.contains_key(normalized) { 270 };
218 return Some(Ok((normalized, Dispatch::Removed))); 271 None
219 } 272 })
220 } 273 .flatten()
221 }
222 Err(_) => {
223 if let Some(entry) = dmap.get(normalized) {
224 return Some(Ok((
225 normalized,
226 dispatch_missing(entry.state),
227 )));
228 }
229 }
230 };
231 None
232 })
233 } 274 }
234 275
235 #[derive(Debug, Copy, Clone)] 276 #[derive(Debug, Copy, Clone)]
236 pub struct StatusOptions { 277 pub struct StatusOptions {
237 /// Remember the most recent modification timeslot for status, to make 278 /// Remember the most recent modification timeslot for status, to make
413 new_results.extend(old_results.into_iter()); 454 new_results.extend(old_results.into_iter());
414 455
415 Ok(new_results) 456 Ok(new_results)
416 } 457 }
417 458
418 /// Stat all entries in the `DirstateMap` and mark them for dispatch into 459 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
419 /// the relevant collections.
420 fn stat_dmap_entries( 460 fn stat_dmap_entries(
421 dmap: &DirstateMap, 461 dmap: &DirstateMap,
422 root_dir: impl AsRef<Path> + Sync + Send, 462 root_dir: impl AsRef<Path> + Sync + Send,
423 options: StatusOptions, 463 options: StatusOptions,
424 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> { 464 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
443 &dmap.copy_map, 483 &dmap.copy_map,
444 options, 484 options,
445 ), 485 ),
446 )), 486 )),
447 Err(ref e) 487 Err(ref e)
448 if e.kind() == std::io::ErrorKind::NotFound 488 if e.kind() == ErrorKind::NotFound
449 || e.raw_os_error() == Some(20) => 489 || e.raw_os_error() == Some(20) =>
450 { 490 {
451 // Rust does not yet have an `ErrorKind` for 491 // Rust does not yet have an `ErrorKind` for
452 // `NotADirectory` (errno 20) 492 // `NotADirectory` (errno 20)
453 // It happens if the dirstate contains `foo/bar` and 493 // It happens if the dirstate contains `foo/bar` and
458 } 498 }
459 }) 499 })
460 } 500 }
461 501
462 pub struct DirstateStatus<'a> { 502 pub struct DirstateStatus<'a> {
463 pub modified: Vec<&'a HgPath>, 503 pub modified: Vec<Cow<'a, HgPath>>,
464 pub added: Vec<&'a HgPath>, 504 pub added: Vec<Cow<'a, HgPath>>,
465 pub removed: Vec<&'a HgPath>, 505 pub removed: Vec<Cow<'a, HgPath>>,
466 pub deleted: Vec<&'a HgPath>, 506 pub deleted: Vec<Cow<'a, HgPath>>,
467 pub clean: Vec<&'a HgPath>, 507 pub clean: Vec<Cow<'a, HgPath>>,
468 pub ignored: Vec<&'a HgPath>, 508 pub ignored: Vec<Cow<'a, HgPath>>,
469 pub unknown: Vec<&'a HgPath>, 509 pub unknown: Vec<Cow<'a, HgPath>>,
470 pub bad: Vec<(&'a HgPath, BadMatch)>, 510 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
471 } 511 }
472 512
473 fn build_response<'a>( 513 fn build_response<'a>(
474 results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>, 514 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
475 ) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> { 515 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
476 let mut lookup = vec![]; 516 let mut lookup = vec![];
477 let mut modified = vec![]; 517 let mut modified = vec![];
478 let mut added = vec![]; 518 let mut added = vec![];
479 let mut removed = vec![]; 519 let mut removed = vec![];
480 let mut deleted = vec![]; 520 let mut deleted = vec![];
481 let mut clean = vec![]; 521 let mut clean = vec![];
482 let mut ignored = vec![]; 522 let mut ignored = vec![];
483 let mut unknown = vec![]; 523 let mut unknown = vec![];
484 let mut bad = vec![]; 524 let mut bad = vec![];
485 525
486 for res in results.into_iter() { 526 for (filename, dispatch) in results.into_iter() {
487 let (filename, dispatch) = res?;
488 match dispatch { 527 match dispatch {
489 Dispatch::Unknown => unknown.push(filename), 528 Dispatch::Unknown => unknown.push(filename),
490 Dispatch::Unsure => lookup.push(filename), 529 Dispatch::Unsure => lookup.push(filename),
491 Dispatch::Modified => modified.push(filename), 530 Dispatch::Modified => modified.push(filename),
492 Dispatch::Added => added.push(filename), 531 Dispatch::Added => added.push(filename),
498 Dispatch::Bad(reason) => bad.push((filename, reason)), 537 Dispatch::Bad(reason) => bad.push((filename, reason)),
499 Dispatch::Directory { .. } => {} 538 Dispatch::Directory { .. } => {}
500 } 539 }
501 } 540 }
502 541
503 Ok(( 542 (
504 lookup, 543 lookup,
505 DirstateStatus { 544 DirstateStatus {
506 modified, 545 modified,
507 added, 546 added,
508 removed, 547 removed,
510 clean, 549 clean,
511 ignored, 550 ignored,
512 unknown, 551 unknown,
513 bad, 552 bad,
514 }, 553 },
515 )) 554 )
516 } 555 }
517 556
557 pub enum StatusError {
558 IO(std::io::Error),
559 Path(HgPathError),
560 Pattern(PatternError),
561 }
562
563 pub type StatusResult<T> = Result<T, StatusError>;
564
565 impl From<PatternError> for StatusError {
566 fn from(e: PatternError) -> Self {
567 StatusError::Pattern(e)
568 }
569 }
570 impl From<HgPathError> for StatusError {
571 fn from(e: HgPathError) -> Self {
572 StatusError::Path(e)
573 }
574 }
575 impl From<std::io::Error> for StatusError {
576 fn from(e: std::io::Error) -> Self {
577 StatusError::IO(e)
578 }
579 }
580
581 impl ToString for StatusError {
582 fn to_string(&self) -> String {
583 match self {
584 StatusError::IO(e) => e.to_string(),
585 StatusError::Path(e) => e.to_string(),
586 StatusError::Pattern(e) => e.to_string(),
587 }
588 }
589 }
590
591 /// Get the status of files in the working directory.
592 ///
593 /// This is the current entry-point for `hg-core` and is realistically unusable
594 /// outside of a Python context because its arguments need to provide a lot of
595 /// information that will not be necessary in the future.
518 pub fn status<'a: 'c, 'b: 'c, 'c>( 596 pub fn status<'a: 'c, 'b: 'c, 'c>(
519 dmap: &'a DirstateMap, 597 dmap: &'a DirstateMap,
520 matcher: &'b impl Matcher, 598 matcher: &'b (impl Matcher + Sync),
521 root_dir: impl AsRef<Path> + Sync + Send + Copy, 599 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
600 ignore_files: &[impl AsRef<Path> + 'c],
522 options: StatusOptions, 601 options: StatusOptions,
523 ) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> { 602 ) -> StatusResult<(
603 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
604 Vec<PatternFileWarning>,
605 )> {
606 let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?;
607
608 // Is the path or one of its ancestors ignored?
609 let dir_ignore_fn = |dir: &_| {
610 if ignore_fn(dir) {
611 true
612 } else {
613 for p in find_dirs(dir) {
614 if ignore_fn(p) {
615 return true;
616 }
617 }
618 false
619 }
620 };
621
524 let files = matcher.file_set(); 622 let files = matcher.file_set();
525 let mut results = vec![]; 623
526 if let Some(files) = files { 624 // Step 1: check the files explicitly mentioned by the user
527 results.par_extend(walk_explicit(&files, &dmap, root_dir, options)); 625 let explicit = walk_explicit(files, &dmap, root_dir, options);
626 let (work, mut results): (Vec<_>, FastHashMap<_, _>) = explicit
627 .filter_map(Result::ok)
628 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
629 .partition(|(_, dispatch)| match dispatch {
630 Dispatch::Directory { .. } => true,
631 _ => false,
632 });
633
634 // Step 2: recursively check the working directory for changes if needed
635 for (dir, dispatch) in work {
636 match dispatch {
637 Dispatch::Directory { was_file } => {
638 if was_file {
639 results.insert(dir.to_owned(), Dispatch::Removed);
640 }
641 if options.list_ignored
642 || options.list_unknown && !dir_ignore_fn(&dir)
643 {
644 results = traverse(
645 matcher,
646 root_dir,
647 &dmap,
648 &dir,
649 results,
650 &ignore_fn,
651 &dir_ignore_fn,
652 options,
653 )?;
654 }
655 }
656 _ => unreachable!("There can only be directories in `work`"),
657 }
528 } 658 }
529 659
530 if !matcher.is_exact() { 660 if !matcher.is_exact() {
531 let stat_results = stat_dmap_entries(&dmap, root_dir, options); 661 // Step 3: Check the remaining files from the dmap.
532 results.par_extend(stat_results); 662 // If a dmap file is not in results yet, it was either
533 } 663 // a) not matched b) ignored, c) missing, or d) under a
534 664 // symlink directory.
535 build_response(results) 665
536 } 666 if options.list_unknown {
667 let to_visit: Box<dyn Iterator<Item = (&HgPath, &DirstateEntry)>> =
668 if results.is_empty() && matcher.matches_everything() {
669 Box::new(dmap.iter().map(|(f, e)| (f.deref(), e)))
670 } else {
671 Box::new(dmap.iter().filter_map(|(f, e)| {
672 if !results.contains_key(f.deref())
673 && matcher.matches(f)
674 {
675 Some((f.deref(), e))
676 } else {
677 None
678 }
679 }))
680 };
681 let mut to_visit: Vec<_> = to_visit.collect();
682 to_visit.sort_by(|a, b| a.0.cmp(&b.0));
683
684 // We walked all dirs under the roots that weren't ignored, and
685 // everything that matched was stat'ed and is already in results.
686 // The rest must thus be ignored or under a symlink.
687 let mut path_auditor = PathAuditor::new(root_dir);
688
689 for (ref filename, entry) in to_visit {
690 // Report ignored items in the dmap as long as they are not
691 // under a symlink directory.
692 if path_auditor.check(filename) {
693 // TODO normalize for case-insensitive filesystems
694 let buf = hg_path_to_path_buf(filename)?;
695 results.insert(
696 Cow::Borrowed(filename),
697 match root_dir.as_ref().join(&buf).symlink_metadata() {
698 // File was just ignored, no links, and exists
699 Ok(meta) => {
700 let metadata = HgMetadata::from_metadata(meta);
701 dispatch_found(
702 filename,
703 *entry,
704 metadata,
705 &dmap.copy_map,
706 options,
707 )
708 }
709 // File doesn't exist
710 Err(_) => dispatch_missing(entry.state),
711 },
712 );
713 } else {
714 // It's either missing or under a symlink directory which
715 // we, in this case, report as missing.
716 results.insert(
717 Cow::Borrowed(filename),
718 dispatch_missing(entry.state),
719 );
720 }
721 }
722 } else {
723 // We may not have walked the full directory tree above, so stat
724 // and check everything we missed.
725 let stat_results = stat_dmap_entries(&dmap, root_dir, options);
726 results.par_extend(stat_results.flatten().map(
727 |(filename, dispatch)| (Cow::Borrowed(filename), dispatch),
728 ));
729 }
730 }
731
732 let results = results.into_iter().filter_map(|(filename, dispatch)| {
733 match dispatch {
734 Dispatch::Bad(_) => return Some((filename, dispatch)),
735 _ => {}
736 };
737 // TODO do this in //, not at the end
738 if !dmap.contains_key(filename.deref()) {
739 if (options.list_ignored || matcher.exact_match(&filename))
740 && dir_ignore_fn(&filename)
741 {
742 if options.list_ignored {
743 return Some((filename.to_owned(), Dispatch::Ignored));
744 }
745 } else {
746 if !ignore_fn(&filename) {
747 return Some((filename.to_owned(), Dispatch::Unknown));
748 }
749 }
750 return None;
751 }
752 Some((filename, dispatch))
753 });
754
755 Ok((build_response(results), warnings))
756 }