Mercurial > hg
comparison rust/hg-core/src/dirstate/dirstate_tree/node.rs @ 45562:b51167d70f5a
rust: add `dirstate_tree` module
Mercurial needs to represent the filesystem hierarchy on which it operates, for
example in the dirstate. Its current on-disk representation is an unsorted, flat
structure that gets transformed in the current Rust code into a `HashMap`.
This loses the hierarchical information of the dirstate, leading to some
unfortunate performance and algorithmic compromises.
This module adds an implementation of a radix tree that is specialized for
representing the dirstate: its unit is the path component. I have made no
efforts to optimize either its memory footprint or its insertion speed: they're
pretty bad for now.
Following will be a few patches that modify the dirstate.status logic to use
that new hierarchical information, fixing issue 6335 in the same swing.
Differential Revision: https://phab.mercurial-scm.org/D9085
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Fri, 25 Sep 2020 17:51:34 +0200 |
parents | |
children | c35db907363d |
comparison
equal
deleted
inserted
replaced
45558:80bf7b1ada15 | 45562:b51167d70f5a |
---|---|
1 // node.rs | |
2 // | |
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net> | |
4 // | |
5 // This software may be used and distributed according to the terms of the | |
6 // GNU General Public License version 2 or any later version. | |
7 | |
8 use super::iter::Iter; | |
9 use crate::utils::hg_path::HgPathBuf; | |
10 use crate::{DirstateEntry, EntryState, FastHashMap}; | |
11 | |
12 /// Represents a filesystem directory in the dirstate tree | |
13 #[derive(Debug, Default, Clone, PartialEq)] | |
14 pub struct Directory { | |
15 /// Contains the old file information if it existed between changesets. | |
16 /// Happens if a file `foo` is marked as removed, removed from the | |
17 /// filesystem then a directory `foo` is created and at least one of its | |
18 /// descendents is added to Mercurial. | |
19 pub(super) was_file: Option<Box<File>>, | |
20 pub(super) children: FastHashMap<Vec<u8>, Node>, | |
21 } | |
22 | |
23 /// Represents a filesystem file (or symlink) in the dirstate tree | |
24 #[derive(Debug, Clone, PartialEq)] | |
25 pub struct File { | |
26 /// Contains the old structure if it existed between changesets. | |
27 /// Happens all descendents of `foo` marked as removed and removed from | |
28 /// the filesystem, then a file `foo` is created and added to Mercurial. | |
29 pub(super) was_directory: Option<Box<Directory>>, | |
30 pub(super) entry: DirstateEntry, | |
31 } | |
32 | |
33 #[derive(Debug, Clone, PartialEq)] | |
34 pub enum NodeKind { | |
35 Directory(Directory), | |
36 File(File), | |
37 } | |
38 | |
39 #[derive(Debug, Default, Clone, PartialEq)] | |
40 pub struct Node { | |
41 pub kind: NodeKind, | |
42 } | |
43 | |
44 impl Default for NodeKind { | |
45 fn default() -> Self { | |
46 NodeKind::Directory(Default::default()) | |
47 } | |
48 } | |
49 | |
50 impl Node { | |
51 pub fn insert(&mut self, path: &[u8], new_entry: DirstateEntry) -> InsertResult { | |
52 let mut split = path.splitn(2, |&c| c == b'/'); | |
53 let head = split.next().unwrap_or(b""); | |
54 let tail = split.next().unwrap_or(b""); | |
55 | |
56 if let NodeKind::File(file) = &mut self.kind { | |
57 if tail.is_empty() && head.is_empty() { | |
58 // We're modifying the current file | |
59 let new = Self { | |
60 kind: NodeKind::File(File { | |
61 entry: new_entry, | |
62 ..file.clone() | |
63 }), | |
64 }; | |
65 return InsertResult { | |
66 did_insert: false, | |
67 old_entry: Some(std::mem::replace(self, new)), | |
68 }; | |
69 } else { | |
70 match file.entry.state { | |
71 // Only replace the current file with a directory if it's | |
72 // marked as `Removed` | |
73 EntryState::Removed => { | |
74 self.kind = NodeKind::Directory(Directory { | |
75 was_file: Some(Box::from(file.clone())), | |
76 children: Default::default(), | |
77 }) | |
78 } | |
79 _ => return Node::insert_in_file(file, new_entry, head, tail), | |
80 } | |
81 } | |
82 } | |
83 | |
84 match &mut self.kind { | |
85 NodeKind::Directory(directory) => { | |
86 return Node::insert_in_directory(directory, new_entry, head, tail); | |
87 } | |
88 NodeKind::File(_) => unreachable!("The file case has already been handled"), | |
89 } | |
90 } | |
91 | |
92 /// The current file still exists and is not marked as `Removed`. | |
93 /// Insert the entry in its `was_directory`. | |
94 fn insert_in_file( | |
95 file: &mut File, | |
96 new_entry: DirstateEntry, | |
97 head: &[u8], | |
98 tail: &[u8], | |
99 ) -> InsertResult { | |
100 if let Some(d) = &mut file.was_directory { | |
101 Node::insert_in_directory(d, new_entry, head, tail) | |
102 } else { | |
103 let mut dir = Directory { | |
104 was_file: None, | |
105 children: FastHashMap::default(), | |
106 }; | |
107 let res = Node::insert_in_directory(&mut dir, new_entry, head, tail); | |
108 file.was_directory = Some(Box::new(dir)); | |
109 res | |
110 } | |
111 } | |
112 | |
113 /// Insert an entry in the subtree of `directory` | |
114 fn insert_in_directory( | |
115 directory: &mut Directory, | |
116 new_entry: DirstateEntry, | |
117 head: &[u8], | |
118 tail: &[u8], | |
119 ) -> InsertResult { | |
120 let mut res = InsertResult::default(); | |
121 | |
122 if let Some(node) = directory.children.get_mut(head) { | |
123 // Node exists | |
124 match &mut node.kind { | |
125 NodeKind::Directory(subdir) => { | |
126 if tail.is_empty() { | |
127 let becomes_file = Self { | |
128 kind: NodeKind::File(File { | |
129 was_directory: Some(Box::from(subdir.clone())), | |
130 entry: new_entry, | |
131 }), | |
132 }; | |
133 let old_entry = directory.children.insert(head.to_owned(), becomes_file); | |
134 return InsertResult { | |
135 did_insert: true, | |
136 old_entry, | |
137 }; | |
138 } else { | |
139 res = node.insert(tail, new_entry); | |
140 } | |
141 } | |
142 NodeKind::File(_) => { | |
143 res = node.insert(tail, new_entry); | |
144 } | |
145 } | |
146 } else if tail.is_empty() { | |
147 // File does not already exist | |
148 directory.children.insert( | |
149 head.to_owned(), | |
150 Self { | |
151 kind: NodeKind::File(File { | |
152 was_directory: None, | |
153 entry: new_entry, | |
154 }), | |
155 }, | |
156 ); | |
157 res.did_insert = true; | |
158 } else { | |
159 // Directory does not already exist | |
160 let mut nested = Self { | |
161 kind: NodeKind::Directory(Directory { | |
162 was_file: None, | |
163 children: Default::default(), | |
164 }), | |
165 }; | |
166 res = nested.insert(tail, new_entry); | |
167 directory.children.insert(head.to_owned(), nested); | |
168 } | |
169 res | |
170 } | |
171 | |
172 /// Removes an entry from the tree, returns a `RemoveResult`. | |
173 pub fn remove(&mut self, path: &[u8]) -> RemoveResult { | |
174 let empty_result = RemoveResult::default(); | |
175 if path.is_empty() { | |
176 return empty_result; | |
177 } | |
178 let mut split = path.splitn(2, |&c| c == b'/'); | |
179 let head = split.next(); | |
180 let tail = split.next().unwrap_or(b""); | |
181 | |
182 let head = match head { | |
183 None => { | |
184 return empty_result; | |
185 } | |
186 Some(h) => h, | |
187 }; | |
188 if head == path { | |
189 match &mut self.kind { | |
190 NodeKind::Directory(d) => { | |
191 return Node::remove_from_directory(head, d); | |
192 } | |
193 NodeKind::File(f) => { | |
194 if let Some(d) = &mut f.was_directory { | |
195 let RemoveResult { old_entry, .. } = Node::remove_from_directory(head, d); | |
196 return RemoveResult { | |
197 cleanup: false, | |
198 old_entry, | |
199 }; | |
200 } | |
201 } | |
202 } | |
203 empty_result | |
204 } else { | |
205 // Look into the dirs | |
206 match &mut self.kind { | |
207 NodeKind::Directory(d) => { | |
208 if let Some(child) = d.children.get_mut(head) { | |
209 let mut res = child.remove(tail); | |
210 if res.cleanup { | |
211 d.children.remove(head); | |
212 } | |
213 res.cleanup = d.children.len() == 0 && d.was_file.is_none(); | |
214 res | |
215 } else { | |
216 empty_result | |
217 } | |
218 } | |
219 NodeKind::File(f) => { | |
220 if let Some(d) = &mut f.was_directory { | |
221 if let Some(child) = d.children.get_mut(head) { | |
222 let RemoveResult { cleanup, old_entry } = child.remove(tail); | |
223 if cleanup { | |
224 d.children.remove(head); | |
225 } | |
226 if d.children.len() == 0 && d.was_file.is_none() { | |
227 f.was_directory = None; | |
228 } | |
229 | |
230 return RemoveResult { | |
231 cleanup: false, | |
232 old_entry, | |
233 }; | |
234 } | |
235 } | |
236 empty_result | |
237 } | |
238 } | |
239 } | |
240 } | |
241 | |
242 fn remove_from_directory(head: &[u8], d: &mut Directory) -> RemoveResult { | |
243 if let Some(node) = d.children.get_mut(head) { | |
244 return match &mut node.kind { | |
245 NodeKind::Directory(d) => { | |
246 if let Some(f) = &mut d.was_file { | |
247 let entry = f.entry; | |
248 d.was_file = None; | |
249 RemoveResult { | |
250 cleanup: false, | |
251 old_entry: Some(entry), | |
252 } | |
253 } else { | |
254 RemoveResult::default() | |
255 } | |
256 } | |
257 NodeKind::File(f) => { | |
258 let entry = f.entry; | |
259 let mut cleanup = false; | |
260 match &f.was_directory { | |
261 None => { | |
262 if d.children.len() == 1 { | |
263 cleanup = true; | |
264 } | |
265 d.children.remove(head); | |
266 } | |
267 Some(dir) => { | |
268 node.kind = NodeKind::Directory(*dir.clone()); | |
269 } | |
270 } | |
271 | |
272 RemoveResult { | |
273 cleanup: cleanup, | |
274 old_entry: Some(entry), | |
275 } | |
276 } | |
277 }; | |
278 } | |
279 RemoveResult::default() | |
280 } | |
281 | |
282 pub fn get(&self, path: &[u8]) -> Option<&Node> { | |
283 if path.is_empty() { | |
284 return Some(&self); | |
285 } | |
286 let mut split = path.splitn(2, |&c| c == b'/'); | |
287 let head = split.next(); | |
288 let tail = split.next().unwrap_or(b""); | |
289 | |
290 let head = match head { | |
291 None => { | |
292 return Some(&self); | |
293 } | |
294 Some(h) => h, | |
295 }; | |
296 match &self.kind { | |
297 NodeKind::Directory(d) => { | |
298 if let Some(child) = d.children.get(head) { | |
299 return child.get(tail); | |
300 } | |
301 } | |
302 NodeKind::File(f) => { | |
303 if let Some(d) = &f.was_directory { | |
304 if let Some(child) = d.children.get(head) { | |
305 return child.get(tail); | |
306 } | |
307 } | |
308 } | |
309 } | |
310 | |
311 None | |
312 } | |
313 | |
314 pub fn get_mut(&mut self, path: &[u8]) -> Option<&mut NodeKind> { | |
315 if path.is_empty() { | |
316 return Some(&mut self.kind); | |
317 } | |
318 let mut split = path.splitn(2, |&c| c == b'/'); | |
319 let head = split.next(); | |
320 let tail = split.next().unwrap_or(b""); | |
321 | |
322 let head = match head { | |
323 None => { | |
324 return Some(&mut self.kind); | |
325 } | |
326 Some(h) => h, | |
327 }; | |
328 match &mut self.kind { | |
329 NodeKind::Directory(d) => { | |
330 if let Some(child) = d.children.get_mut(head) { | |
331 return child.get_mut(tail); | |
332 } | |
333 } | |
334 NodeKind::File(f) => { | |
335 if let Some(d) = &mut f.was_directory { | |
336 if let Some(child) = d.children.get_mut(head) { | |
337 return child.get_mut(tail); | |
338 } | |
339 } | |
340 } | |
341 } | |
342 | |
343 None | |
344 } | |
345 | |
346 pub fn iter(&self) -> Iter { | |
347 Iter::new(self) | |
348 } | |
349 } | |
350 | |
351 /// Information returned to the caller of an `insert` operation for integrity. | |
352 #[derive(Debug, Default)] | |
353 pub struct InsertResult { | |
354 /// Whether the insertion resulted in an actual insertion and not an | |
355 /// update | |
356 pub(super) did_insert: bool, | |
357 /// The entry that was replaced, if it exists | |
358 pub(super) old_entry: Option<Node>, | |
359 } | |
360 | |
361 /// Information returned to the caller of a `remove` operation integrity. | |
362 #[derive(Debug, Default)] | |
363 pub struct RemoveResult { | |
364 /// If the caller needs to remove the current node | |
365 pub(super) cleanup: bool, | |
366 /// The entry that was replaced, if it exists | |
367 pub(super) old_entry: Option<DirstateEntry>, | |
368 } | |
369 | |
370 impl<'a> IntoIterator for &'a Node { | |
371 type Item = (HgPathBuf, DirstateEntry); | |
372 type IntoIter = Iter<'a>; | |
373 | |
374 fn into_iter(self) -> Self::IntoIter { | |
375 self.iter() | |
376 } | |
377 } |