comparison rust/hg-core/src/dirstate/status.rs @ 43271:99394e6c5d12

rust-dirstate-status: add first Rust implementation of `dirstate.status` Note: This patch also added the rayon crate as a Cargo dependency. It will help us immensely in making Rust code parallel and easy to maintain. It is a stable, well-known, and supported crate maintained by people on the Rust team. The current `dirstate.status` method has grown over the years through bug reports and new features to the point where it got too big and too complex. This series does not yet improve the logic, but adds a Rust fast-path to speed up certain cases. Tested on mozilla-try-2019-02-18 with zstd compression: - `hg diff` on an empty working copy: - c: 1.64(+-)0.04s - rust+c before this change: 2.84(+-)0.1s - rust+c: 849(+-)40ms - `hg commit` when creating a file: - c: 5.960s - rust+c before this change: 5.828s - rust+c: 4.668s - `hg commit` when updating a file: - c: 4.866s - rust+c before this change: 4.371s - rust+c: 3.855s - `hg status -mard` - c: 1.82(+-)0.04s - rust+c before this change: 2.64(+-)0.1s - rust+c: 896(+-)30ms The numbers are clear: the current Rust `dirstatemap` implementation is super slow, its performance needs to be addressed. This will be done in a future series, immediately after this one, with the goal of getting Rust to be at least to the speed of the Python + C implementation in all cases before the 5.2 freeze. At worse, we gate dirstatemap to only be used in those cases. Cases where the fast-path is not executed: - for commands that need ignore support (`status`, for example) - if subrepos are found (should not be hard to add, but winter is coming) - any other matcher than an `alwaysmatcher`, like patterns, etc. - with extensions like `sparse` and `fsmonitor` The next step after this is to rethink the logic to be closer to Jane Street's Valentin Gatien-Baron's Rust fast-path which does a lot less work when possible. Differential Revision: https://phab.mercurial-scm.org/D7058
author Raphaël Gomès <rgomes@octobus.net>
date Fri, 11 Oct 2019 13:39:57 +0200
parents
children ab9b0a20b9e6
comparison
equal deleted inserted replaced
43270:6a8c166a93a6 43271:99394e6c5d12
1 // status.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Rust implementation of dirstate.status (dirstate.py).
9 //! It is currently missing a lot of functionality compared to the Python one
10 //! and will only be triggered in narrow cases.
11
12 use crate::utils::files::HgMetadata;
13 use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf};
14 use crate::{DirstateEntry, DirstateMap, EntryState};
15 use rayon::prelude::*;
16 use std::collections::HashMap;
17 use std::fs::Metadata;
18 use std::path::Path;
19
20 /// Get stat data about the files explicitly specified by match.
21 /// TODO subrepos
22 fn walk_explicit(
23 files: &[impl AsRef<HgPath> + Sync],
24 dmap: &DirstateMap,
25 root_dir: impl AsRef<Path> + Sync,
26 ) -> std::io::Result<HashMap<HgPathBuf, Option<HgMetadata>>> {
27 let mut results = HashMap::new();
28
29 // A tuple of the normalized filename and the `Result` of the call to
30 // `symlink_metadata` for separate handling.
31 type WalkTuple<'a> = (&'a HgPath, std::io::Result<Metadata>);
32
33 let stats_res: std::io::Result<Vec<WalkTuple>> = files
34 .par_iter()
35 .map(|filename| {
36 // TODO normalization
37 let normalized = filename.as_ref();
38
39 let target_filename =
40 root_dir.as_ref().join(hg_path_to_path_buf(normalized)?);
41
42 Ok((normalized, target_filename.symlink_metadata()))
43 })
44 .collect();
45
46 for res in stats_res? {
47 match res {
48 (normalized, Ok(stat)) => {
49 if stat.is_file() {
50 results.insert(
51 normalized.to_owned(),
52 Some(HgMetadata::from_metadata(stat)),
53 );
54 } else {
55 if dmap.contains_key(normalized) {
56 results.insert(normalized.to_owned(), None);
57 }
58 }
59 }
60 (normalized, Err(_)) => {
61 if dmap.contains_key(normalized) {
62 results.insert(normalized.to_owned(), None);
63 }
64 }
65 };
66 }
67
68 Ok(results)
69 }
70
71 // Stat all entries in the `DirstateMap` and return their new metadata.
72 pub fn stat_dmap_entries(
73 dmap: &DirstateMap,
74 results: &HashMap<HgPathBuf, Option<HgMetadata>>,
75 root_dir: impl AsRef<Path> + Sync,
76 ) -> std::io::Result<Vec<(HgPathBuf, Option<HgMetadata>)>> {
77 dmap.par_iter()
78 .filter_map(
79 // Getting file metadata is costly, so we don't do it if the
80 // file is already present in the results, hence `filter_map`
81 |(filename, _)| -> Option<
82 std::io::Result<(HgPathBuf, Option<HgMetadata>)>
83 > {
84 if results.contains_key(filename) {
85 return None;
86 }
87 let meta = match hg_path_to_path_buf(filename) {
88 Ok(p) => root_dir.as_ref().join(p).symlink_metadata(),
89 Err(e) => return Some(Err(e.into())),
90 };
91
92 Some(match meta {
93 Ok(ref m)
94 if !(m.file_type().is_file()
95 || m.file_type().is_symlink()) =>
96 {
97 Ok((filename.to_owned(), None))
98 }
99 Ok(m) => Ok((
100 filename.to_owned(),
101 Some(HgMetadata::from_metadata(m)),
102 )),
103 Err(ref e)
104 if e.kind() == std::io::ErrorKind::NotFound
105 || e.raw_os_error() == Some(20) =>
106 {
107 // Rust does not yet have an `ErrorKind` for
108 // `NotADirectory` (errno 20)
109 // It happens if the dirstate contains `foo/bar` and
110 // foo is not a directory
111 Ok((filename.to_owned(), None))
112 }
113 Err(e) => Err(e),
114 })
115 },
116 )
117 .collect()
118 }
119
120 pub struct StatusResult {
121 pub modified: Vec<HgPathBuf>,
122 pub added: Vec<HgPathBuf>,
123 pub removed: Vec<HgPathBuf>,
124 pub deleted: Vec<HgPathBuf>,
125 pub clean: Vec<HgPathBuf>,
126 // TODO ignored
127 // TODO unknown
128 }
129
130 fn build_response(
131 dmap: &DirstateMap,
132 list_clean: bool,
133 last_normal_time: i64,
134 check_exec: bool,
135 results: HashMap<HgPathBuf, Option<HgMetadata>>,
136 ) -> (Vec<HgPathBuf>, StatusResult) {
137 let mut lookup = vec![];
138 let mut modified = vec![];
139 let mut added = vec![];
140 let mut removed = vec![];
141 let mut deleted = vec![];
142 let mut clean = vec![];
143
144 for (filename, metadata_option) in results.into_iter() {
145 let DirstateEntry {
146 state,
147 mode,
148 mtime,
149 size,
150 } = match dmap.get(&filename) {
151 None => {
152 continue;
153 }
154 Some(e) => *e,
155 };
156
157 match metadata_option {
158 None => {
159 match state {
160 EntryState::Normal
161 | EntryState::Merged
162 | EntryState::Added => deleted.push(filename),
163 EntryState::Removed => removed.push(filename),
164 _ => {}
165 };
166 }
167 Some(HgMetadata {
168 st_mode,
169 st_size,
170 st_mtime,
171 ..
172 }) => {
173 match state {
174 EntryState::Normal => {
175 // Dates and times that are outside the 31-bit signed
176 // range are compared modulo 2^31. This should prevent
177 // it from behaving badly with very large files or
178 // corrupt dates while still having a high probability
179 // of detecting changes. (issue2608)
180 let range_mask = 0x7fffffff;
181
182 let size_changed = (size != st_size as i32)
183 && size != (st_size as i32 & range_mask);
184 let mode_changed = (mode ^ st_mode as i32) & 0o100
185 != 0o000
186 && check_exec;
187 if size >= 0
188 && (size_changed || mode_changed)
189 || size == -2 // other parent
190 || dmap.copy_map.contains_key(&filename)
191 {
192 modified.push(filename);
193 } else if mtime != st_mtime as i32
194 && mtime != (st_mtime as i32 & range_mask)
195 {
196 lookup.push(filename);
197 } else if st_mtime == last_normal_time {
198 // the file may have just been marked as normal and
199 // it may have changed in the same second without
200 // changing its size. This can happen if we quickly
201 // do multiple commits. Force lookup, so we don't
202 // miss such a racy file change.
203 lookup.push(filename);
204 } else if list_clean {
205 clean.push(filename);
206 }
207 }
208 EntryState::Merged => modified.push(filename),
209 EntryState::Added => added.push(filename),
210 EntryState::Removed => removed.push(filename),
211 EntryState::Unknown => {}
212 }
213 }
214 }
215 }
216
217 (
218 lookup,
219 StatusResult {
220 modified,
221 added,
222 removed,
223 deleted,
224 clean,
225 },
226 )
227 }
228
229 pub fn status(
230 dmap: &DirstateMap,
231 root_dir: impl AsRef<Path> + Sync + Copy,
232 files: &[impl AsRef<HgPath> + Sync],
233 list_clean: bool,
234 last_normal_time: i64,
235 check_exec: bool,
236 ) -> std::io::Result<(Vec<HgPathBuf>, StatusResult)> {
237 let mut results = walk_explicit(files, &dmap, root_dir)?;
238
239 results.extend(stat_dmap_entries(&dmap, &results, root_dir)?);
240
241 Ok(build_response(
242 &dmap,
243 list_clean,
244 last_normal_time,
245 check_exec,
246 results,
247 ))
248 }