comparison rust/hg-core/src/dirstate_tree/status.rs @ 47117:60d852ae7e7b

dirstate-tree: Paralellize the status algorithm with Rayon The `rayon` crate exposes "parallel iterators" that work like normal iterators but dispatch work on different items to an implicit global thread pool. Differential Revision: https://phab.mercurial-scm.org/D10551
author Simon Sapin <simon.sapin@octobus.net>
date Tue, 27 Apr 2021 14:20:48 +0200
parents 1b4f0f819f92
children c92e63762573
comparison
equal deleted inserted replaced
47116:04bcba539c96 47117:60d852ae7e7b
11 use crate::EntryState; 11 use crate::EntryState;
12 use crate::HgPathBuf; 12 use crate::HgPathBuf;
13 use crate::PatternFileWarning; 13 use crate::PatternFileWarning;
14 use crate::StatusError; 14 use crate::StatusError;
15 use crate::StatusOptions; 15 use crate::StatusOptions;
16 use rayon::prelude::*;
16 use std::borrow::Cow; 17 use std::borrow::Cow;
17 use std::io; 18 use std::io;
18 use std::path::Path; 19 use std::path::Path;
19 use std::path::PathBuf; 20 use std::path::PathBuf;
21 use std::sync::Mutex;
20 22
21 /// Returns the status of the working directory compared to its parent 23 /// Returns the status of the working directory compared to its parent
22 /// changeset. 24 /// changeset.
23 /// 25 ///
24 /// This algorithm is based on traversing the filesystem tree (`fs` in function 26 /// This algorithm is based on traversing the filesystem tree (`fs` in function
39 get_ignore_function(ignore_files, &root_dir)? 41 get_ignore_function(ignore_files, &root_dir)?
40 } else { 42 } else {
41 (Box::new(|&_| true), vec![]) 43 (Box::new(|&_| true), vec![])
42 }; 44 };
43 45
44 let mut common = StatusCommon { 46 let common = StatusCommon {
45 options, 47 options,
46 matcher, 48 matcher,
47 ignore_fn, 49 ignore_fn,
48 outcome: DirstateStatus::default(), 50 outcome: Mutex::new(DirstateStatus::default()),
49 }; 51 };
50 let is_at_repo_root = true; 52 let is_at_repo_root = true;
51 let hg_path = HgPath::new(""); 53 let hg_path = HgPath::new("");
52 let has_ignored_ancestor = false; 54 let has_ignored_ancestor = false;
53 common.traverse_fs_directory_and_dirstate( 55 common.traverse_fs_directory_and_dirstate(
55 &mut dmap.root, 57 &mut dmap.root,
56 hg_path, 58 hg_path,
57 &root_dir, 59 &root_dir,
58 is_at_repo_root, 60 is_at_repo_root,
59 ); 61 );
60 Ok((common.outcome, warnings)) 62 Ok((common.outcome.into_inner().unwrap(), warnings))
61 } 63 }
62 64
63 /// Bag of random things needed by various parts of the algorithm. Reduces the 65 /// Bag of random things needed by various parts of the algorithm. Reduces the
64 /// number of parameters passed to functions. 66 /// number of parameters passed to functions.
65 struct StatusCommon<'tree, 'a> { 67 struct StatusCommon<'tree, 'a> {
66 options: StatusOptions, 68 options: StatusOptions,
67 matcher: &'a (dyn Matcher + Sync), 69 matcher: &'a (dyn Matcher + Sync),
68 ignore_fn: IgnoreFnType<'a>, 70 ignore_fn: IgnoreFnType<'a>,
69 outcome: DirstateStatus<'tree>, 71 outcome: Mutex<DirstateStatus<'tree>>,
70 } 72 }
71 73
72 impl<'tree, 'a> StatusCommon<'tree, 'a> { 74 impl<'tree, 'a> StatusCommon<'tree, 'a> {
73 fn read_dir( 75 fn read_dir(
74 &mut self, 76 &self,
75 hg_path: &HgPath, 77 hg_path: &HgPath,
76 fs_path: &Path, 78 fs_path: &Path,
77 is_at_repo_root: bool, 79 is_at_repo_root: bool,
78 ) -> Result<Vec<DirEntry>, ()> { 80 ) -> Result<Vec<DirEntry>, ()> {
79 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { 81 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
80 let errno = error.raw_os_error().expect("expected real OS error"); 82 let errno = error.raw_os_error().expect("expected real OS error");
81 self.outcome 83 self.outcome
84 .lock()
85 .unwrap()
82 .bad 86 .bad
83 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) 87 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
84 }) 88 })
85 } 89 }
86 90
87 fn traverse_fs_directory_and_dirstate( 91 fn traverse_fs_directory_and_dirstate(
88 &mut self, 92 &self,
89 has_ignored_ancestor: bool, 93 has_ignored_ancestor: bool,
90 dirstate_nodes: &'tree mut ChildNodes, 94 dirstate_nodes: &'tree mut ChildNodes,
91 directory_hg_path: &'tree HgPath, 95 directory_hg_path: &'tree HgPath,
92 directory_fs_path: &Path, 96 directory_fs_path: &Path,
93 is_at_repo_root: bool, 97 is_at_repo_root: bool,
102 return; 106 return;
103 }; 107 };
104 108
105 // `merge_join_by` requires both its input iterators to be sorted: 109 // `merge_join_by` requires both its input iterators to be sorted:
106 110
111 //
107 // * `BTreeMap` iterates according to keys’ ordering by definition 112 // * `BTreeMap` iterates according to keys’ ordering by definition
108 113
109 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
110 // https://github.com/rust-lang/rust/issues/34162 115 // https://github.com/rust-lang/rust/issues/34162
111 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
112 117
113 for pair in itertools::merge_join_by( 118 itertools::merge_join_by(
114 dirstate_nodes, 119 dirstate_nodes,
115 &fs_entries, 120 &fs_entries,
116 |(full_path, _node), fs_entry| { 121 |(full_path, _node), fs_entry| {
117 full_path.base_name().cmp(&fs_entry.base_name) 122 full_path.base_name().cmp(&fs_entry.base_name)
118 }, 123 },
119 ) { 124 )
125 .par_bridge()
126 .for_each(|pair| {
120 use itertools::EitherOrBoth::*; 127 use itertools::EitherOrBoth::*;
121 match pair { 128 match pair {
122 Both((hg_path, dirstate_node), fs_entry) => { 129 Both((hg_path, dirstate_node), fs_entry) => {
123 self.traverse_fs_and_dirstate( 130 self.traverse_fs_and_dirstate(
124 fs_entry, 131 fs_entry,
135 has_ignored_ancestor, 142 has_ignored_ancestor,
136 directory_hg_path, 143 directory_hg_path,
137 fs_entry, 144 fs_entry,
138 ), 145 ),
139 } 146 }
140 } 147 })
141 } 148 }
142 149
143 fn traverse_fs_and_dirstate( 150 fn traverse_fs_and_dirstate(
144 &mut self, 151 &self,
145 fs_entry: &DirEntry, 152 fs_entry: &DirEntry,
146 hg_path: &'tree HgPath, 153 hg_path: &'tree HgPath,
147 dirstate_node: &'tree mut Node, 154 dirstate_node: &'tree mut Node,
148 has_ignored_ancestor: bool, 155 has_ignored_ancestor: bool,
149 ) { 156 ) {
158 dirstate_node.state(), 165 dirstate_node.state(),
159 ); 166 );
160 } 167 }
161 if file_type.is_dir() { 168 if file_type.is_dir() {
162 if self.options.collect_traversed_dirs { 169 if self.options.collect_traversed_dirs {
163 self.outcome.traversed.push(hg_path.into()) 170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
164 } 171 }
165 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); 172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
166 let is_at_repo_root = false; 173 let is_at_repo_root = false;
167 self.traverse_fs_directory_and_dirstate( 174 self.traverse_fs_directory_and_dirstate(
168 is_ignored, 175 is_ignored,
175 if file_or_symlink && self.matcher.matches(hg_path) { 182 if file_or_symlink && self.matcher.matches(hg_path) {
176 let full_path = Cow::from(hg_path); 183 let full_path = Cow::from(hg_path);
177 if let Some(entry) = &dirstate_node.entry { 184 if let Some(entry) = &dirstate_node.entry {
178 match entry.state { 185 match entry.state {
179 EntryState::Added => { 186 EntryState::Added => {
180 self.outcome.added.push(full_path) 187 self.outcome.lock().unwrap().added.push(full_path)
181 } 188 }
182 EntryState::Removed => { 189 EntryState::Removed => self
183 self.outcome.removed.push(full_path) 190 .outcome
184 } 191 .lock()
185 EntryState::Merged => { 192 .unwrap()
186 self.outcome.modified.push(full_path) 193 .removed
187 } 194 .push(full_path),
195 EntryState::Merged => self
196 .outcome
197 .lock()
198 .unwrap()
199 .modified
200 .push(full_path),
188 EntryState::Normal => { 201 EntryState::Normal => {
189 self.handle_normal_file( 202 self.handle_normal_file(
190 full_path, 203 full_path,
191 dirstate_node, 204 dirstate_node,
192 entry, 205 entry,
217 } 230 }
218 231
219 /// A file with `EntryState::Normal` in the dirstate was found in the 232 /// A file with `EntryState::Normal` in the dirstate was found in the
220 /// filesystem 233 /// filesystem
221 fn handle_normal_file( 234 fn handle_normal_file(
222 &mut self, 235 &self,
223 full_path: Cow<'tree, HgPath>, 236 full_path: Cow<'tree, HgPath>,
224 dirstate_node: &Node, 237 dirstate_node: &Node,
225 entry: &crate::DirstateEntry, 238 entry: &crate::DirstateEntry,
226 fs_entry: &DirEntry, 239 fs_entry: &DirEntry,
227 ) { 240 ) {
241 && size_changed 254 && size_changed
242 && fs_entry.metadata.file_type().is_symlink() 255 && fs_entry.metadata.file_type().is_symlink()
243 { 256 {
244 // issue6456: Size returned may be longer due to encryption 257 // issue6456: Size returned may be longer due to encryption
245 // on EXT-4 fscrypt. TODO maybe only do it on EXT4? 258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
246 self.outcome.unsure.push(full_path) 259 self.outcome.lock().unwrap().unsure.push(full_path)
247 } else if dirstate_node.copy_source.is_some() 260 } else if dirstate_node.copy_source.is_some()
248 || entry.is_from_other_parent() 261 || entry.is_from_other_parent()
249 || (entry.size >= 0 && (size_changed || mode_changed())) 262 || (entry.size >= 0 && (size_changed || mode_changed()))
250 { 263 {
251 self.outcome.modified.push(full_path) 264 self.outcome.lock().unwrap().modified.push(full_path)
252 } else { 265 } else {
253 let mtime = mtime_seconds(&fs_entry.metadata); 266 let mtime = mtime_seconds(&fs_entry.metadata);
254 if truncate_i64(mtime) != entry.mtime 267 if truncate_i64(mtime) != entry.mtime
255 || mtime == self.options.last_normal_time 268 || mtime == self.options.last_normal_time
256 { 269 {
257 self.outcome.unsure.push(full_path) 270 self.outcome.lock().unwrap().unsure.push(full_path)
258 } else if self.options.list_clean { 271 } else if self.options.list_clean {
259 self.outcome.clean.push(full_path) 272 self.outcome.lock().unwrap().clean.push(full_path)
260 } 273 }
261 } 274 }
262 } 275 }
263 276
264 /// A node in the dirstate tree has no corresponding filesystem entry 277 /// A node in the dirstate tree has no corresponding filesystem entry
265 fn traverse_dirstate_only( 278 fn traverse_dirstate_only(
266 &mut self, 279 &self,
267 hg_path: &'tree HgPath, 280 hg_path: &'tree HgPath,
268 dirstate_node: &'tree mut Node, 281 dirstate_node: &'tree mut Node,
269 ) { 282 ) {
270 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); 283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
271 for (child_hg_path, child_node) in &mut dirstate_node.children { 284 dirstate_node.children.par_iter_mut().for_each(
272 self.traverse_dirstate_only(child_hg_path.full_path(), child_node) 285 |(child_hg_path, child_node)| {
273 } 286 self.traverse_dirstate_only(
287 child_hg_path.full_path(),
288 child_node,
289 )
290 },
291 )
274 } 292 }
275 293
276 /// A node in the dirstate tree has no corresponding *file* on the 294 /// A node in the dirstate tree has no corresponding *file* on the
277 /// filesystem 295 /// filesystem
278 /// 296 ///
279 /// Does nothing on a "directory" node 297 /// Does nothing on a "directory" node
280 fn mark_removed_or_deleted_if_file( 298 fn mark_removed_or_deleted_if_file(
281 &mut self, 299 &self,
282 hg_path: &'tree HgPath, 300 hg_path: &'tree HgPath,
283 dirstate_node_state: Option<EntryState>, 301 dirstate_node_state: Option<EntryState>,
284 ) { 302 ) {
285 if let Some(state) = dirstate_node_state { 303 if let Some(state) = dirstate_node_state {
286 if self.matcher.matches(hg_path) { 304 if self.matcher.matches(hg_path) {
287 if let EntryState::Removed = state { 305 if let EntryState::Removed = state {
288 self.outcome.removed.push(hg_path.into()) 306 self.outcome.lock().unwrap().removed.push(hg_path.into())
289 } else { 307 } else {
290 self.outcome.deleted.push(hg_path.into()) 308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
291 } 309 }
292 } 310 }
293 } 311 }
294 } 312 }
295 313
296 /// Something in the filesystem has no corresponding dirstate node 314 /// Something in the filesystem has no corresponding dirstate node
297 fn traverse_fs_only( 315 fn traverse_fs_only(
298 &mut self, 316 &self,
299 has_ignored_ancestor: bool, 317 has_ignored_ancestor: bool,
300 directory_hg_path: &HgPath, 318 directory_hg_path: &HgPath,
301 fs_entry: &DirEntry, 319 fs_entry: &DirEntry,
302 ) { 320 ) {
303 let hg_path = directory_hg_path.join(&fs_entry.base_name); 321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
319 if let Ok(children_fs_entries) = self.read_dir( 337 if let Ok(children_fs_entries) = self.read_dir(
320 &hg_path, 338 &hg_path,
321 &fs_entry.full_path, 339 &fs_entry.full_path,
322 is_at_repo_root, 340 is_at_repo_root,
323 ) { 341 ) {
324 for child_fs_entry in children_fs_entries { 342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
325 self.traverse_fs_only( 343 self.traverse_fs_only(
326 is_ignored, 344 is_ignored,
327 &hg_path, 345 &hg_path,
328 &child_fs_entry, 346 child_fs_entry,
329 ) 347 )
330 } 348 })
331 } 349 }
332 } 350 }
333 if self.options.collect_traversed_dirs { 351 if self.options.collect_traversed_dirs {
334 self.outcome.traversed.push(hg_path.into()) 352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
335 } 353 }
336 } else if file_or_symlink && self.matcher.matches(&hg_path) { 354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
337 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) 355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
338 } 356 }
339 } 357 }
340 358
341 fn mark_unknown_or_ignored( 359 fn mark_unknown_or_ignored(
342 &mut self, 360 &self,
343 has_ignored_ancestor: bool, 361 has_ignored_ancestor: bool,
344 hg_path: Cow<'tree, HgPath>, 362 hg_path: Cow<'tree, HgPath>,
345 ) { 363 ) {
346 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); 364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
347 if is_ignored { 365 if is_ignored {
348 if self.options.list_ignored { 366 if self.options.list_ignored {
349 self.outcome.ignored.push(hg_path) 367 self.outcome.lock().unwrap().ignored.push(hg_path)
350 } 368 }
351 } else { 369 } else {
352 if self.options.list_unknown { 370 if self.options.list_unknown {
353 self.outcome.unknown.push(hg_path) 371 self.outcome.lock().unwrap().unknown.push(hg_path)
354 } 372 }
355 } 373 }
356 } 374 }
357 } 375 }
358 376