Mercurial > hg
comparison rust/hg-core/src/dirstate/dirstate_tree/iter.rs @ 45562:b51167d70f5a
rust: add `dirstate_tree` module
Mercurial needs to represent the filesystem hierarchy on which it operates, for
example in the dirstate. Its current on-disk representation is an unsorted, flat
structure that gets transformed in the current Rust code into a `HashMap`.
This loses the hierarchical information of the dirstate, leading to some
unfortunate performance and algorithmic compromises.
This module adds an implementation of a radix tree that is specialized for
representing the dirstate: its unit is the path component. I have made no
efforts to optimize either its memory footprint or its insertion speed: they're
pretty bad for now.
Following will be a few patches that modify the dirstate.status logic to use
that new hierarchical information, fixing issue 6335 in the same swing.
Differential Revision: https://phab.mercurial-scm.org/D9085
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Fri, 25 Sep 2020 17:51:34 +0200 |
parents | |
children | c35db907363d |
comparison
equal
deleted
inserted
replaced
45558:80bf7b1ada15 | 45562:b51167d70f5a |
---|---|
1 // iter.rs | |
2 // | |
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net> | |
4 // | |
5 // This software may be used and distributed according to the terms of the | |
6 // GNU General Public License version 2 or any later version. | |
7 | |
8 use super::node::{Node, NodeKind}; | |
9 use super::tree::Tree; | |
10 use crate::dirstate::dirstate_tree::node::Directory; | |
11 use crate::dirstate::status::Dispatch; | |
12 use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf}; | |
13 use crate::DirstateEntry; | |
14 use std::borrow::Cow; | |
15 use std::collections::VecDeque; | |
16 use std::iter::{FromIterator, FusedIterator}; | |
17 use std::path::PathBuf; | |
18 | |
19 impl FromIterator<(HgPathBuf, DirstateEntry)> for Tree { | |
20 fn from_iter<T: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(iter: T) -> Self { | |
21 let mut tree = Self::new(); | |
22 for (path, entry) in iter { | |
23 tree.insert(path, entry); | |
24 } | |
25 tree | |
26 } | |
27 } | |
28 | |
29 /// Iterator of all entries in the dirstate tree. | |
30 /// | |
31 /// It has no particular ordering. | |
32 pub struct Iter<'a> { | |
33 to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>, | |
34 } | |
35 | |
36 impl<'a> Iter<'a> { | |
37 pub fn new(node: &'a Node) -> Iter<'a> { | |
38 let mut to_visit = VecDeque::new(); | |
39 to_visit.push_back((Cow::Borrowed(&b""[..]), node)); | |
40 Self { to_visit } | |
41 } | |
42 } | |
43 | |
44 impl<'a> Iterator for Iter<'a> { | |
45 type Item = (HgPathBuf, DirstateEntry); | |
46 | |
47 fn next(&mut self) -> Option<Self::Item> { | |
48 while let Some((base_path, node)) = self.to_visit.pop_front() { | |
49 match &node.kind { | |
50 NodeKind::Directory(dir) => { | |
51 add_children_to_visit(&mut self.to_visit, &base_path, &dir); | |
52 if let Some(file) = &dir.was_file { | |
53 return Some((HgPathBuf::from_bytes(&base_path), file.entry)); | |
54 } | |
55 } | |
56 NodeKind::File(file) => { | |
57 if let Some(dir) = &file.was_directory { | |
58 add_children_to_visit(&mut self.to_visit, &base_path, &dir); | |
59 } | |
60 return Some((HgPathBuf::from_bytes(&base_path), file.entry)); | |
61 } | |
62 } | |
63 } | |
64 None | |
65 } | |
66 } | |
67 | |
68 impl<'a> FusedIterator for Iter<'a> {} | |
69 | |
70 /// Iterator of all entries in the dirstate tree, with a special filesystem | |
71 /// handling for the directories containing said entries. | |
72 /// | |
73 /// It checks every directory on-disk to see if it has become a symlink, to | |
74 /// prevent a potential security issue. | |
75 /// Using this information, it may dispatch `status` information early: it | |
76 /// returns canonical paths along with `Shortcut`s, which are either a | |
77 /// `DirstateEntry` or a `Dispatch`, if the fate of said path has already been | |
78 /// determined. | |
79 /// | |
80 /// Like `Iter`, it has no particular ordering. | |
81 pub struct FsIter<'a> { | |
82 root_dir: PathBuf, | |
83 to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>, | |
84 shortcuts: VecDeque<(HgPathBuf, StatusShortcut)>, | |
85 } | |
86 | |
87 impl<'a> FsIter<'a> { | |
88 pub fn new(node: &'a Node, root_dir: PathBuf) -> FsIter<'a> { | |
89 let mut to_visit = VecDeque::new(); | |
90 to_visit.push_back((Cow::Borrowed(&b""[..]), node)); | |
91 Self { | |
92 root_dir, | |
93 to_visit, | |
94 shortcuts: Default::default(), | |
95 } | |
96 } | |
97 | |
98 /// Mercurial tracks symlinks but *not* what they point to. | |
99 /// If a directory is moved and symlinked: | |
100 /// | |
101 /// ```bash | |
102 /// $ mkdir foo | |
103 /// $ touch foo/a | |
104 /// $ # commit... | |
105 /// $ mv foo bar | |
106 /// $ ln -s bar foo | |
107 /// ``` | |
108 /// We need to dispatch the new symlink as `Unknown` and all the | |
109 /// descendents of the directory it replace as `Deleted`. | |
110 fn dispatch_symlinked_directory(&mut self, path: impl AsRef<HgPath>, node: &Node) { | |
111 let path = path.as_ref(); | |
112 self.shortcuts | |
113 .push_back((path.to_owned(), StatusShortcut::Dispatch(Dispatch::Unknown))); | |
114 for (file, _) in node.iter() { | |
115 self.shortcuts.push_back(( | |
116 path.join(&file), | |
117 StatusShortcut::Dispatch(Dispatch::Deleted), | |
118 )); | |
119 } | |
120 } | |
121 | |
122 /// Returns `true` if the canonical `path` of a directory corresponds to a | |
123 /// symlink on disk. It means it was moved and symlinked after the last | |
124 /// dirstate update. | |
125 /// | |
126 /// # Special cases | |
127 /// | |
128 /// Returns `false` for the repository root. | |
129 /// Returns `false` on io error, error handling is outside of the iterator. | |
130 fn directory_became_symlink(&mut self, path: &HgPath) -> bool { | |
131 if path.is_empty() { | |
132 return false; | |
133 } | |
134 let filename_as_path = match hg_path_to_path_buf(&path) { | |
135 Ok(p) => p, | |
136 _ => return false, | |
137 }; | |
138 let meta = self.root_dir.join(filename_as_path).symlink_metadata(); | |
139 match meta { | |
140 Ok(ref m) if m.file_type().is_symlink() => true, | |
141 _ => return false, | |
142 } | |
143 } | |
144 } | |
145 | |
146 /// Returned by `FsIter`, since the `Dispatch` of any given entry may already | |
147 /// be determined during the iteration. This is necessary for performance | |
148 /// reasons, since hierarchical information is needed to `Dispatch` an entire | |
149 /// subtree efficiently. | |
150 #[derive(Debug, Copy, Clone)] | |
151 pub enum StatusShortcut { | |
152 /// A entry in the dirstate for further inspection | |
153 Entry(DirstateEntry), | |
154 /// The result of the status of the corresponding file | |
155 Dispatch(Dispatch), | |
156 } | |
157 | |
158 impl<'a> Iterator for FsIter<'a> { | |
159 type Item = (HgPathBuf, StatusShortcut); | |
160 | |
161 fn next(&mut self) -> Option<Self::Item> { | |
162 // If any paths have already been `Dispatch`-ed, return them | |
163 while let Some(res) = self.shortcuts.pop_front() { | |
164 return Some(res); | |
165 } | |
166 | |
167 while let Some((base_path, node)) = self.to_visit.pop_front() { | |
168 match &node.kind { | |
169 NodeKind::Directory(dir) => { | |
170 let canonical_path = HgPath::new(&base_path); | |
171 if self.directory_became_symlink(canonical_path) { | |
172 // Potential security issue, don't do a normal | |
173 // traversal, force the results. | |
174 self.dispatch_symlinked_directory(canonical_path, &node); | |
175 continue; | |
176 } | |
177 add_children_to_visit(&mut self.to_visit, &base_path, &dir); | |
178 if let Some(file) = &dir.was_file { | |
179 return Some(( | |
180 HgPathBuf::from_bytes(&base_path), | |
181 StatusShortcut::Entry(file.entry), | |
182 )); | |
183 } | |
184 } | |
185 NodeKind::File(file) => { | |
186 if let Some(dir) = &file.was_directory { | |
187 add_children_to_visit(&mut self.to_visit, &base_path, &dir); | |
188 } | |
189 return Some(( | |
190 HgPathBuf::from_bytes(&base_path), | |
191 StatusShortcut::Entry(file.entry), | |
192 )); | |
193 } | |
194 } | |
195 } | |
196 | |
197 None | |
198 } | |
199 } | |
200 | |
201 impl<'a> FusedIterator for FsIter<'a> {} | |
202 | |
203 fn join_path<'a, 'b>(path: &'a [u8], other: &'b [u8]) -> Cow<'b, [u8]> { | |
204 if path.is_empty() { | |
205 other.into() | |
206 } else { | |
207 [path, &b"/"[..], other].concat().into() | |
208 } | |
209 } | |
210 | |
211 /// Adds all children of a given directory `dir` to the visit queue `to_visit` | |
212 /// prefixed by a `base_path`. | |
213 fn add_children_to_visit<'a>( | |
214 to_visit: &mut VecDeque<(Cow<'a, [u8]>, &'a Node)>, | |
215 base_path: &[u8], | |
216 dir: &'a Directory, | |
217 ) { | |
218 to_visit.extend(dir.children.iter().map(|(path, child)| { | |
219 let full_path = join_path(&base_path, &path); | |
220 (Cow::from(full_path), child) | |
221 })); | |
222 } | |
223 | |
224 #[cfg(test)] | |
225 mod tests { | |
226 use super::*; | |
227 use crate::utils::hg_path::HgPath; | |
228 use crate::{EntryState, FastHashMap}; | |
229 use std::collections::HashSet; | |
230 | |
231 #[test] | |
232 fn test_iteration() { | |
233 let mut tree = Tree::new(); | |
234 | |
235 assert_eq!( | |
236 tree.insert( | |
237 HgPathBuf::from_bytes(b"foo/bar"), | |
238 DirstateEntry { | |
239 state: EntryState::Merged, | |
240 mode: 41, | |
241 mtime: 42, | |
242 size: 43, | |
243 } | |
244 ), | |
245 None | |
246 ); | |
247 | |
248 assert_eq!( | |
249 tree.insert( | |
250 HgPathBuf::from_bytes(b"foo2"), | |
251 DirstateEntry { | |
252 state: EntryState::Merged, | |
253 mode: 40, | |
254 mtime: 41, | |
255 size: 42, | |
256 } | |
257 ), | |
258 None | |
259 ); | |
260 | |
261 assert_eq!( | |
262 tree.insert( | |
263 HgPathBuf::from_bytes(b"foo/baz"), | |
264 DirstateEntry { | |
265 state: EntryState::Normal, | |
266 mode: 0, | |
267 mtime: 0, | |
268 size: 0, | |
269 } | |
270 ), | |
271 None | |
272 ); | |
273 | |
274 assert_eq!( | |
275 tree.insert( | |
276 HgPathBuf::from_bytes(b"foo/bap/nested"), | |
277 DirstateEntry { | |
278 state: EntryState::Normal, | |
279 mode: 0, | |
280 mtime: 0, | |
281 size: 0, | |
282 } | |
283 ), | |
284 None | |
285 ); | |
286 | |
287 assert_eq!(tree.len(), 4); | |
288 | |
289 let results: HashSet<_> = tree.iter().map(|(c, _)| c.to_owned()).collect(); | |
290 dbg!(&results); | |
291 assert!(results.contains(HgPath::new(b"foo2"))); | |
292 assert!(results.contains(HgPath::new(b"foo/bar"))); | |
293 assert!(results.contains(HgPath::new(b"foo/baz"))); | |
294 assert!(results.contains(HgPath::new(b"foo/bap/nested"))); | |
295 | |
296 let mut iter = tree.iter(); | |
297 assert!(iter.next().is_some()); | |
298 assert!(iter.next().is_some()); | |
299 assert!(iter.next().is_some()); | |
300 assert!(iter.next().is_some()); | |
301 assert_eq!(None, iter.next()); | |
302 assert_eq!(None, iter.next()); | |
303 drop(iter); | |
304 | |
305 assert_eq!( | |
306 tree.insert( | |
307 HgPathBuf::from_bytes(b"foo/bap/nested/a"), | |
308 DirstateEntry { | |
309 state: EntryState::Normal, | |
310 mode: 0, | |
311 mtime: 0, | |
312 size: 0, | |
313 } | |
314 ), | |
315 None | |
316 ); | |
317 | |
318 let results: FastHashMap<_, _> = tree.iter().collect(); | |
319 assert!(results.contains_key(HgPath::new(b"foo2"))); | |
320 assert!(results.contains_key(HgPath::new(b"foo/bar"))); | |
321 assert!(results.contains_key(HgPath::new(b"foo/baz"))); | |
322 // Is a dir but `was_file`, so it's listed as a removed file | |
323 assert!(results.contains_key(HgPath::new(b"foo/bap/nested"))); | |
324 assert!(results.contains_key(HgPath::new(b"foo/bap/nested/a"))); | |
325 | |
326 // insert removed file (now directory) after nested file | |
327 assert_eq!( | |
328 tree.insert( | |
329 HgPathBuf::from_bytes(b"a/a"), | |
330 DirstateEntry { | |
331 state: EntryState::Normal, | |
332 mode: 0, | |
333 mtime: 0, | |
334 size: 0, | |
335 } | |
336 ), | |
337 None | |
338 ); | |
339 | |
340 // `insert` returns `None` for a directory | |
341 assert_eq!( | |
342 tree.insert( | |
343 HgPathBuf::from_bytes(b"a"), | |
344 DirstateEntry { | |
345 state: EntryState::Removed, | |
346 mode: 0, | |
347 mtime: 0, | |
348 size: 0, | |
349 } | |
350 ), | |
351 None | |
352 ); | |
353 | |
354 let results: FastHashMap<_, _> = tree.iter().collect(); | |
355 assert!(results.contains_key(HgPath::new(b"a"))); | |
356 assert!(results.contains_key(HgPath::new(b"a/a"))); | |
357 } | |
358 } |