Mercurial > hg
comparison rust/hg-core/src/dirstate/status.rs @ 43271:99394e6c5d12
rust-dirstate-status: add first Rust implementation of `dirstate.status`
Note: This patch also added the rayon crate as a Cargo dependency. It will
help us immensely in making Rust code parallel and easy to maintain. It is
a stable, well-known, and supported crate maintained by people on the Rust
team.
The current `dirstate.status` method has grown over the years through bug
reports and new features to the point where it got too big and too complex.
This series does not yet improve the logic, but adds a Rust fast-path to speed
up certain cases.
Tested on mozilla-try-2019-02-18 with zstd compression:
- `hg diff` on an empty working copy:
- c: 1.64(+-)0.04s
- rust+c before this change: 2.84(+-)0.1s
- rust+c: 849(+-)40ms
- `hg commit` when creating a file:
- c: 5.960s
- rust+c before this change: 5.828s
- rust+c: 4.668s
- `hg commit` when updating a file:
- c: 4.866s
- rust+c before this change: 4.371s
- rust+c: 3.855s
- `hg status -mard`
- c: 1.82(+-)0.04s
- rust+c before this change: 2.64(+-)0.1s
- rust+c: 896(+-)30ms
The numbers are clear: the current Rust `dirstatemap` implementation is super
slow, its performance needs to be addressed.
This will be done in a future series, immediately after this one, with the goal
of getting Rust to be at least to the speed of the Python + C implementation
in all cases before the 5.2 freeze. At worse, we gate dirstatemap to only be used
in those cases.
Cases where the fast-path is not executed:
- for commands that need ignore support (`status`, for example)
- if subrepos are found (should not be hard to add, but winter is coming)
- any other matcher than an `alwaysmatcher`, like patterns, etc.
- with extensions like `sparse` and `fsmonitor`
The next step after this is to rethink the logic to be closer to
Jane Street's Valentin Gatien-Baron's Rust fast-path which does a lot less
work when possible.
Differential Revision: https://phab.mercurial-scm.org/D7058
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Fri, 11 Oct 2019 13:39:57 +0200 |
parents | |
children | ab9b0a20b9e6 |
comparison
equal
deleted
inserted
replaced
43270:6a8c166a93a6 | 43271:99394e6c5d12 |
---|---|
1 // status.rs | |
2 // | |
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
4 // | |
5 // This software may be used and distributed according to the terms of the | |
6 // GNU General Public License version 2 or any later version. | |
7 | |
8 //! Rust implementation of dirstate.status (dirstate.py). | |
9 //! It is currently missing a lot of functionality compared to the Python one | |
10 //! and will only be triggered in narrow cases. | |
11 | |
12 use crate::utils::files::HgMetadata; | |
13 use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf}; | |
14 use crate::{DirstateEntry, DirstateMap, EntryState}; | |
15 use rayon::prelude::*; | |
16 use std::collections::HashMap; | |
17 use std::fs::Metadata; | |
18 use std::path::Path; | |
19 | |
20 /// Get stat data about the files explicitly specified by match. | |
21 /// TODO subrepos | |
22 fn walk_explicit( | |
23 files: &[impl AsRef<HgPath> + Sync], | |
24 dmap: &DirstateMap, | |
25 root_dir: impl AsRef<Path> + Sync, | |
26 ) -> std::io::Result<HashMap<HgPathBuf, Option<HgMetadata>>> { | |
27 let mut results = HashMap::new(); | |
28 | |
29 // A tuple of the normalized filename and the `Result` of the call to | |
30 // `symlink_metadata` for separate handling. | |
31 type WalkTuple<'a> = (&'a HgPath, std::io::Result<Metadata>); | |
32 | |
33 let stats_res: std::io::Result<Vec<WalkTuple>> = files | |
34 .par_iter() | |
35 .map(|filename| { | |
36 // TODO normalization | |
37 let normalized = filename.as_ref(); | |
38 | |
39 let target_filename = | |
40 root_dir.as_ref().join(hg_path_to_path_buf(normalized)?); | |
41 | |
42 Ok((normalized, target_filename.symlink_metadata())) | |
43 }) | |
44 .collect(); | |
45 | |
46 for res in stats_res? { | |
47 match res { | |
48 (normalized, Ok(stat)) => { | |
49 if stat.is_file() { | |
50 results.insert( | |
51 normalized.to_owned(), | |
52 Some(HgMetadata::from_metadata(stat)), | |
53 ); | |
54 } else { | |
55 if dmap.contains_key(normalized) { | |
56 results.insert(normalized.to_owned(), None); | |
57 } | |
58 } | |
59 } | |
60 (normalized, Err(_)) => { | |
61 if dmap.contains_key(normalized) { | |
62 results.insert(normalized.to_owned(), None); | |
63 } | |
64 } | |
65 }; | |
66 } | |
67 | |
68 Ok(results) | |
69 } | |
70 | |
71 // Stat all entries in the `DirstateMap` and return their new metadata. | |
72 pub fn stat_dmap_entries( | |
73 dmap: &DirstateMap, | |
74 results: &HashMap<HgPathBuf, Option<HgMetadata>>, | |
75 root_dir: impl AsRef<Path> + Sync, | |
76 ) -> std::io::Result<Vec<(HgPathBuf, Option<HgMetadata>)>> { | |
77 dmap.par_iter() | |
78 .filter_map( | |
79 // Getting file metadata is costly, so we don't do it if the | |
80 // file is already present in the results, hence `filter_map` | |
81 |(filename, _)| -> Option< | |
82 std::io::Result<(HgPathBuf, Option<HgMetadata>)> | |
83 > { | |
84 if results.contains_key(filename) { | |
85 return None; | |
86 } | |
87 let meta = match hg_path_to_path_buf(filename) { | |
88 Ok(p) => root_dir.as_ref().join(p).symlink_metadata(), | |
89 Err(e) => return Some(Err(e.into())), | |
90 }; | |
91 | |
92 Some(match meta { | |
93 Ok(ref m) | |
94 if !(m.file_type().is_file() | |
95 || m.file_type().is_symlink()) => | |
96 { | |
97 Ok((filename.to_owned(), None)) | |
98 } | |
99 Ok(m) => Ok(( | |
100 filename.to_owned(), | |
101 Some(HgMetadata::from_metadata(m)), | |
102 )), | |
103 Err(ref e) | |
104 if e.kind() == std::io::ErrorKind::NotFound | |
105 || e.raw_os_error() == Some(20) => | |
106 { | |
107 // Rust does not yet have an `ErrorKind` for | |
108 // `NotADirectory` (errno 20) | |
109 // It happens if the dirstate contains `foo/bar` and | |
110 // foo is not a directory | |
111 Ok((filename.to_owned(), None)) | |
112 } | |
113 Err(e) => Err(e), | |
114 }) | |
115 }, | |
116 ) | |
117 .collect() | |
118 } | |
119 | |
120 pub struct StatusResult { | |
121 pub modified: Vec<HgPathBuf>, | |
122 pub added: Vec<HgPathBuf>, | |
123 pub removed: Vec<HgPathBuf>, | |
124 pub deleted: Vec<HgPathBuf>, | |
125 pub clean: Vec<HgPathBuf>, | |
126 // TODO ignored | |
127 // TODO unknown | |
128 } | |
129 | |
130 fn build_response( | |
131 dmap: &DirstateMap, | |
132 list_clean: bool, | |
133 last_normal_time: i64, | |
134 check_exec: bool, | |
135 results: HashMap<HgPathBuf, Option<HgMetadata>>, | |
136 ) -> (Vec<HgPathBuf>, StatusResult) { | |
137 let mut lookup = vec![]; | |
138 let mut modified = vec![]; | |
139 let mut added = vec![]; | |
140 let mut removed = vec![]; | |
141 let mut deleted = vec![]; | |
142 let mut clean = vec![]; | |
143 | |
144 for (filename, metadata_option) in results.into_iter() { | |
145 let DirstateEntry { | |
146 state, | |
147 mode, | |
148 mtime, | |
149 size, | |
150 } = match dmap.get(&filename) { | |
151 None => { | |
152 continue; | |
153 } | |
154 Some(e) => *e, | |
155 }; | |
156 | |
157 match metadata_option { | |
158 None => { | |
159 match state { | |
160 EntryState::Normal | |
161 | EntryState::Merged | |
162 | EntryState::Added => deleted.push(filename), | |
163 EntryState::Removed => removed.push(filename), | |
164 _ => {} | |
165 }; | |
166 } | |
167 Some(HgMetadata { | |
168 st_mode, | |
169 st_size, | |
170 st_mtime, | |
171 .. | |
172 }) => { | |
173 match state { | |
174 EntryState::Normal => { | |
175 // Dates and times that are outside the 31-bit signed | |
176 // range are compared modulo 2^31. This should prevent | |
177 // it from behaving badly with very large files or | |
178 // corrupt dates while still having a high probability | |
179 // of detecting changes. (issue2608) | |
180 let range_mask = 0x7fffffff; | |
181 | |
182 let size_changed = (size != st_size as i32) | |
183 && size != (st_size as i32 & range_mask); | |
184 let mode_changed = (mode ^ st_mode as i32) & 0o100 | |
185 != 0o000 | |
186 && check_exec; | |
187 if size >= 0 | |
188 && (size_changed || mode_changed) | |
189 || size == -2 // other parent | |
190 || dmap.copy_map.contains_key(&filename) | |
191 { | |
192 modified.push(filename); | |
193 } else if mtime != st_mtime as i32 | |
194 && mtime != (st_mtime as i32 & range_mask) | |
195 { | |
196 lookup.push(filename); | |
197 } else if st_mtime == last_normal_time { | |
198 // the file may have just been marked as normal and | |
199 // it may have changed in the same second without | |
200 // changing its size. This can happen if we quickly | |
201 // do multiple commits. Force lookup, so we don't | |
202 // miss such a racy file change. | |
203 lookup.push(filename); | |
204 } else if list_clean { | |
205 clean.push(filename); | |
206 } | |
207 } | |
208 EntryState::Merged => modified.push(filename), | |
209 EntryState::Added => added.push(filename), | |
210 EntryState::Removed => removed.push(filename), | |
211 EntryState::Unknown => {} | |
212 } | |
213 } | |
214 } | |
215 } | |
216 | |
217 ( | |
218 lookup, | |
219 StatusResult { | |
220 modified, | |
221 added, | |
222 removed, | |
223 deleted, | |
224 clean, | |
225 }, | |
226 ) | |
227 } | |
228 | |
229 pub fn status( | |
230 dmap: &DirstateMap, | |
231 root_dir: impl AsRef<Path> + Sync + Copy, | |
232 files: &[impl AsRef<HgPath> + Sync], | |
233 list_clean: bool, | |
234 last_normal_time: i64, | |
235 check_exec: bool, | |
236 ) -> std::io::Result<(Vec<HgPathBuf>, StatusResult)> { | |
237 let mut results = walk_explicit(files, &dmap, root_dir)?; | |
238 | |
239 results.extend(stat_dmap_entries(&dmap, &results, root_dir)?); | |
240 | |
241 Ok(build_response( | |
242 &dmap, | |
243 list_clean, | |
244 last_normal_time, | |
245 check_exec, | |
246 results, | |
247 )) | |
248 } |