Mercurial > hg
comparison rust/hg-core/src/dirstate_tree/status.rs @ 47117:60d852ae7e7b
dirstate-tree: Paralellize the status algorithm with Rayon
The `rayon` crate exposes "parallel iterators" that work like normal iterators
but dispatch work on different items to an implicit global thread pool.
Differential Revision: https://phab.mercurial-scm.org/D10551
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 27 Apr 2021 14:20:48 +0200 |
parents | 1b4f0f819f92 |
children | c92e63762573 |
comparison
equal
deleted
inserted
replaced
47116:04bcba539c96 | 47117:60d852ae7e7b |
---|---|
11 use crate::EntryState; | 11 use crate::EntryState; |
12 use crate::HgPathBuf; | 12 use crate::HgPathBuf; |
13 use crate::PatternFileWarning; | 13 use crate::PatternFileWarning; |
14 use crate::StatusError; | 14 use crate::StatusError; |
15 use crate::StatusOptions; | 15 use crate::StatusOptions; |
16 use rayon::prelude::*; | |
16 use std::borrow::Cow; | 17 use std::borrow::Cow; |
17 use std::io; | 18 use std::io; |
18 use std::path::Path; | 19 use std::path::Path; |
19 use std::path::PathBuf; | 20 use std::path::PathBuf; |
21 use std::sync::Mutex; | |
20 | 22 |
21 /// Returns the status of the working directory compared to its parent | 23 /// Returns the status of the working directory compared to its parent |
22 /// changeset. | 24 /// changeset. |
23 /// | 25 /// |
24 /// This algorithm is based on traversing the filesystem tree (`fs` in function | 26 /// This algorithm is based on traversing the filesystem tree (`fs` in function |
39 get_ignore_function(ignore_files, &root_dir)? | 41 get_ignore_function(ignore_files, &root_dir)? |
40 } else { | 42 } else { |
41 (Box::new(|&_| true), vec![]) | 43 (Box::new(|&_| true), vec![]) |
42 }; | 44 }; |
43 | 45 |
44 let mut common = StatusCommon { | 46 let common = StatusCommon { |
45 options, | 47 options, |
46 matcher, | 48 matcher, |
47 ignore_fn, | 49 ignore_fn, |
48 outcome: DirstateStatus::default(), | 50 outcome: Mutex::new(DirstateStatus::default()), |
49 }; | 51 }; |
50 let is_at_repo_root = true; | 52 let is_at_repo_root = true; |
51 let hg_path = HgPath::new(""); | 53 let hg_path = HgPath::new(""); |
52 let has_ignored_ancestor = false; | 54 let has_ignored_ancestor = false; |
53 common.traverse_fs_directory_and_dirstate( | 55 common.traverse_fs_directory_and_dirstate( |
55 &mut dmap.root, | 57 &mut dmap.root, |
56 hg_path, | 58 hg_path, |
57 &root_dir, | 59 &root_dir, |
58 is_at_repo_root, | 60 is_at_repo_root, |
59 ); | 61 ); |
60 Ok((common.outcome, warnings)) | 62 Ok((common.outcome.into_inner().unwrap(), warnings)) |
61 } | 63 } |
62 | 64 |
63 /// Bag of random things needed by various parts of the algorithm. Reduces the | 65 /// Bag of random things needed by various parts of the algorithm. Reduces the |
64 /// number of parameters passed to functions. | 66 /// number of parameters passed to functions. |
65 struct StatusCommon<'tree, 'a> { | 67 struct StatusCommon<'tree, 'a> { |
66 options: StatusOptions, | 68 options: StatusOptions, |
67 matcher: &'a (dyn Matcher + Sync), | 69 matcher: &'a (dyn Matcher + Sync), |
68 ignore_fn: IgnoreFnType<'a>, | 70 ignore_fn: IgnoreFnType<'a>, |
69 outcome: DirstateStatus<'tree>, | 71 outcome: Mutex<DirstateStatus<'tree>>, |
70 } | 72 } |
71 | 73 |
72 impl<'tree, 'a> StatusCommon<'tree, 'a> { | 74 impl<'tree, 'a> StatusCommon<'tree, 'a> { |
73 fn read_dir( | 75 fn read_dir( |
74 &mut self, | 76 &self, |
75 hg_path: &HgPath, | 77 hg_path: &HgPath, |
76 fs_path: &Path, | 78 fs_path: &Path, |
77 is_at_repo_root: bool, | 79 is_at_repo_root: bool, |
78 ) -> Result<Vec<DirEntry>, ()> { | 80 ) -> Result<Vec<DirEntry>, ()> { |
79 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { | 81 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { |
80 let errno = error.raw_os_error().expect("expected real OS error"); | 82 let errno = error.raw_os_error().expect("expected real OS error"); |
81 self.outcome | 83 self.outcome |
84 .lock() | |
85 .unwrap() | |
82 .bad | 86 .bad |
83 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) | 87 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) |
84 }) | 88 }) |
85 } | 89 } |
86 | 90 |
87 fn traverse_fs_directory_and_dirstate( | 91 fn traverse_fs_directory_and_dirstate( |
88 &mut self, | 92 &self, |
89 has_ignored_ancestor: bool, | 93 has_ignored_ancestor: bool, |
90 dirstate_nodes: &'tree mut ChildNodes, | 94 dirstate_nodes: &'tree mut ChildNodes, |
91 directory_hg_path: &'tree HgPath, | 95 directory_hg_path: &'tree HgPath, |
92 directory_fs_path: &Path, | 96 directory_fs_path: &Path, |
93 is_at_repo_root: bool, | 97 is_at_repo_root: bool, |
102 return; | 106 return; |
103 }; | 107 }; |
104 | 108 |
105 // `merge_join_by` requires both its input iterators to be sorted: | 109 // `merge_join_by` requires both its input iterators to be sorted: |
106 | 110 |
111 // | |
107 // * `BTreeMap` iterates according to keys’ ordering by definition | 112 // * `BTreeMap` iterates according to keys’ ordering by definition |
108 | 113 |
109 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: | 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: |
110 // https://github.com/rust-lang/rust/issues/34162 | 115 // https://github.com/rust-lang/rust/issues/34162 |
111 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); | 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); |
112 | 117 |
113 for pair in itertools::merge_join_by( | 118 itertools::merge_join_by( |
114 dirstate_nodes, | 119 dirstate_nodes, |
115 &fs_entries, | 120 &fs_entries, |
116 |(full_path, _node), fs_entry| { | 121 |(full_path, _node), fs_entry| { |
117 full_path.base_name().cmp(&fs_entry.base_name) | 122 full_path.base_name().cmp(&fs_entry.base_name) |
118 }, | 123 }, |
119 ) { | 124 ) |
125 .par_bridge() | |
126 .for_each(|pair| { | |
120 use itertools::EitherOrBoth::*; | 127 use itertools::EitherOrBoth::*; |
121 match pair { | 128 match pair { |
122 Both((hg_path, dirstate_node), fs_entry) => { | 129 Both((hg_path, dirstate_node), fs_entry) => { |
123 self.traverse_fs_and_dirstate( | 130 self.traverse_fs_and_dirstate( |
124 fs_entry, | 131 fs_entry, |
135 has_ignored_ancestor, | 142 has_ignored_ancestor, |
136 directory_hg_path, | 143 directory_hg_path, |
137 fs_entry, | 144 fs_entry, |
138 ), | 145 ), |
139 } | 146 } |
140 } | 147 }) |
141 } | 148 } |
142 | 149 |
143 fn traverse_fs_and_dirstate( | 150 fn traverse_fs_and_dirstate( |
144 &mut self, | 151 &self, |
145 fs_entry: &DirEntry, | 152 fs_entry: &DirEntry, |
146 hg_path: &'tree HgPath, | 153 hg_path: &'tree HgPath, |
147 dirstate_node: &'tree mut Node, | 154 dirstate_node: &'tree mut Node, |
148 has_ignored_ancestor: bool, | 155 has_ignored_ancestor: bool, |
149 ) { | 156 ) { |
158 dirstate_node.state(), | 165 dirstate_node.state(), |
159 ); | 166 ); |
160 } | 167 } |
161 if file_type.is_dir() { | 168 if file_type.is_dir() { |
162 if self.options.collect_traversed_dirs { | 169 if self.options.collect_traversed_dirs { |
163 self.outcome.traversed.push(hg_path.into()) | 170 self.outcome.lock().unwrap().traversed.push(hg_path.into()) |
164 } | 171 } |
165 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); | 172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
166 let is_at_repo_root = false; | 173 let is_at_repo_root = false; |
167 self.traverse_fs_directory_and_dirstate( | 174 self.traverse_fs_directory_and_dirstate( |
168 is_ignored, | 175 is_ignored, |
175 if file_or_symlink && self.matcher.matches(hg_path) { | 182 if file_or_symlink && self.matcher.matches(hg_path) { |
176 let full_path = Cow::from(hg_path); | 183 let full_path = Cow::from(hg_path); |
177 if let Some(entry) = &dirstate_node.entry { | 184 if let Some(entry) = &dirstate_node.entry { |
178 match entry.state { | 185 match entry.state { |
179 EntryState::Added => { | 186 EntryState::Added => { |
180 self.outcome.added.push(full_path) | 187 self.outcome.lock().unwrap().added.push(full_path) |
181 } | 188 } |
182 EntryState::Removed => { | 189 EntryState::Removed => self |
183 self.outcome.removed.push(full_path) | 190 .outcome |
184 } | 191 .lock() |
185 EntryState::Merged => { | 192 .unwrap() |
186 self.outcome.modified.push(full_path) | 193 .removed |
187 } | 194 .push(full_path), |
195 EntryState::Merged => self | |
196 .outcome | |
197 .lock() | |
198 .unwrap() | |
199 .modified | |
200 .push(full_path), | |
188 EntryState::Normal => { | 201 EntryState::Normal => { |
189 self.handle_normal_file( | 202 self.handle_normal_file( |
190 full_path, | 203 full_path, |
191 dirstate_node, | 204 dirstate_node, |
192 entry, | 205 entry, |
217 } | 230 } |
218 | 231 |
219 /// A file with `EntryState::Normal` in the dirstate was found in the | 232 /// A file with `EntryState::Normal` in the dirstate was found in the |
220 /// filesystem | 233 /// filesystem |
221 fn handle_normal_file( | 234 fn handle_normal_file( |
222 &mut self, | 235 &self, |
223 full_path: Cow<'tree, HgPath>, | 236 full_path: Cow<'tree, HgPath>, |
224 dirstate_node: &Node, | 237 dirstate_node: &Node, |
225 entry: &crate::DirstateEntry, | 238 entry: &crate::DirstateEntry, |
226 fs_entry: &DirEntry, | 239 fs_entry: &DirEntry, |
227 ) { | 240 ) { |
241 && size_changed | 254 && size_changed |
242 && fs_entry.metadata.file_type().is_symlink() | 255 && fs_entry.metadata.file_type().is_symlink() |
243 { | 256 { |
244 // issue6456: Size returned may be longer due to encryption | 257 // issue6456: Size returned may be longer due to encryption |
245 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? | 258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? |
246 self.outcome.unsure.push(full_path) | 259 self.outcome.lock().unwrap().unsure.push(full_path) |
247 } else if dirstate_node.copy_source.is_some() | 260 } else if dirstate_node.copy_source.is_some() |
248 || entry.is_from_other_parent() | 261 || entry.is_from_other_parent() |
249 || (entry.size >= 0 && (size_changed || mode_changed())) | 262 || (entry.size >= 0 && (size_changed || mode_changed())) |
250 { | 263 { |
251 self.outcome.modified.push(full_path) | 264 self.outcome.lock().unwrap().modified.push(full_path) |
252 } else { | 265 } else { |
253 let mtime = mtime_seconds(&fs_entry.metadata); | 266 let mtime = mtime_seconds(&fs_entry.metadata); |
254 if truncate_i64(mtime) != entry.mtime | 267 if truncate_i64(mtime) != entry.mtime |
255 || mtime == self.options.last_normal_time | 268 || mtime == self.options.last_normal_time |
256 { | 269 { |
257 self.outcome.unsure.push(full_path) | 270 self.outcome.lock().unwrap().unsure.push(full_path) |
258 } else if self.options.list_clean { | 271 } else if self.options.list_clean { |
259 self.outcome.clean.push(full_path) | 272 self.outcome.lock().unwrap().clean.push(full_path) |
260 } | 273 } |
261 } | 274 } |
262 } | 275 } |
263 | 276 |
264 /// A node in the dirstate tree has no corresponding filesystem entry | 277 /// A node in the dirstate tree has no corresponding filesystem entry |
265 fn traverse_dirstate_only( | 278 fn traverse_dirstate_only( |
266 &mut self, | 279 &self, |
267 hg_path: &'tree HgPath, | 280 hg_path: &'tree HgPath, |
268 dirstate_node: &'tree mut Node, | 281 dirstate_node: &'tree mut Node, |
269 ) { | 282 ) { |
270 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); | 283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); |
271 for (child_hg_path, child_node) in &mut dirstate_node.children { | 284 dirstate_node.children.par_iter_mut().for_each( |
272 self.traverse_dirstate_only(child_hg_path.full_path(), child_node) | 285 |(child_hg_path, child_node)| { |
273 } | 286 self.traverse_dirstate_only( |
287 child_hg_path.full_path(), | |
288 child_node, | |
289 ) | |
290 }, | |
291 ) | |
274 } | 292 } |
275 | 293 |
276 /// A node in the dirstate tree has no corresponding *file* on the | 294 /// A node in the dirstate tree has no corresponding *file* on the |
277 /// filesystem | 295 /// filesystem |
278 /// | 296 /// |
279 /// Does nothing on a "directory" node | 297 /// Does nothing on a "directory" node |
280 fn mark_removed_or_deleted_if_file( | 298 fn mark_removed_or_deleted_if_file( |
281 &mut self, | 299 &self, |
282 hg_path: &'tree HgPath, | 300 hg_path: &'tree HgPath, |
283 dirstate_node_state: Option<EntryState>, | 301 dirstate_node_state: Option<EntryState>, |
284 ) { | 302 ) { |
285 if let Some(state) = dirstate_node_state { | 303 if let Some(state) = dirstate_node_state { |
286 if self.matcher.matches(hg_path) { | 304 if self.matcher.matches(hg_path) { |
287 if let EntryState::Removed = state { | 305 if let EntryState::Removed = state { |
288 self.outcome.removed.push(hg_path.into()) | 306 self.outcome.lock().unwrap().removed.push(hg_path.into()) |
289 } else { | 307 } else { |
290 self.outcome.deleted.push(hg_path.into()) | 308 self.outcome.lock().unwrap().deleted.push(hg_path.into()) |
291 } | 309 } |
292 } | 310 } |
293 } | 311 } |
294 } | 312 } |
295 | 313 |
296 /// Something in the filesystem has no corresponding dirstate node | 314 /// Something in the filesystem has no corresponding dirstate node |
297 fn traverse_fs_only( | 315 fn traverse_fs_only( |
298 &mut self, | 316 &self, |
299 has_ignored_ancestor: bool, | 317 has_ignored_ancestor: bool, |
300 directory_hg_path: &HgPath, | 318 directory_hg_path: &HgPath, |
301 fs_entry: &DirEntry, | 319 fs_entry: &DirEntry, |
302 ) { | 320 ) { |
303 let hg_path = directory_hg_path.join(&fs_entry.base_name); | 321 let hg_path = directory_hg_path.join(&fs_entry.base_name); |
319 if let Ok(children_fs_entries) = self.read_dir( | 337 if let Ok(children_fs_entries) = self.read_dir( |
320 &hg_path, | 338 &hg_path, |
321 &fs_entry.full_path, | 339 &fs_entry.full_path, |
322 is_at_repo_root, | 340 is_at_repo_root, |
323 ) { | 341 ) { |
324 for child_fs_entry in children_fs_entries { | 342 children_fs_entries.par_iter().for_each(|child_fs_entry| { |
325 self.traverse_fs_only( | 343 self.traverse_fs_only( |
326 is_ignored, | 344 is_ignored, |
327 &hg_path, | 345 &hg_path, |
328 &child_fs_entry, | 346 child_fs_entry, |
329 ) | 347 ) |
330 } | 348 }) |
331 } | 349 } |
332 } | 350 } |
333 if self.options.collect_traversed_dirs { | 351 if self.options.collect_traversed_dirs { |
334 self.outcome.traversed.push(hg_path.into()) | 352 self.outcome.lock().unwrap().traversed.push(hg_path.into()) |
335 } | 353 } |
336 } else if file_or_symlink && self.matcher.matches(&hg_path) { | 354 } else if file_or_symlink && self.matcher.matches(&hg_path) { |
337 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) | 355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) |
338 } | 356 } |
339 } | 357 } |
340 | 358 |
341 fn mark_unknown_or_ignored( | 359 fn mark_unknown_or_ignored( |
342 &mut self, | 360 &self, |
343 has_ignored_ancestor: bool, | 361 has_ignored_ancestor: bool, |
344 hg_path: Cow<'tree, HgPath>, | 362 hg_path: Cow<'tree, HgPath>, |
345 ) { | 363 ) { |
346 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); | 364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); |
347 if is_ignored { | 365 if is_ignored { |
348 if self.options.list_ignored { | 366 if self.options.list_ignored { |
349 self.outcome.ignored.push(hg_path) | 367 self.outcome.lock().unwrap().ignored.push(hg_path) |
350 } | 368 } |
351 } else { | 369 } else { |
352 if self.options.list_unknown { | 370 if self.options.list_unknown { |
353 self.outcome.unknown.push(hg_path) | 371 self.outcome.lock().unwrap().unknown.push(hg_path) |
354 } | 372 } |
355 } | 373 } |
356 } | 374 } |
357 } | 375 } |
358 | 376 |