comparison rust/hg-core/src/dirstate/dirstate_tree/node.rs @ 45562:b51167d70f5a

rust: add `dirstate_tree` module Mercurial needs to represent the filesystem hierarchy on which it operates, for example in the dirstate. Its current on-disk representation is an unsorted, flat structure that gets transformed in the current Rust code into a `HashMap`. This loses the hierarchical information of the dirstate, leading to some unfortunate performance and algorithmic compromises. This module adds an implementation of a radix tree that is specialized for representing the dirstate: its unit is the path component. I have made no efforts to optimize either its memory footprint or its insertion speed: they're pretty bad for now. Following will be a few patches that modify the dirstate.status logic to use that new hierarchical information, fixing issue 6335 in the same swing. Differential Revision: https://phab.mercurial-scm.org/D9085
author Raphaël Gomès <rgomes@octobus.net>
date Fri, 25 Sep 2020 17:51:34 +0200
parents
children c35db907363d
comparison
equal deleted inserted replaced
45558:80bf7b1ada15 45562:b51167d70f5a
1 // node.rs
2 //
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use super::iter::Iter;
9 use crate::utils::hg_path::HgPathBuf;
10 use crate::{DirstateEntry, EntryState, FastHashMap};
11
12 /// Represents a filesystem directory in the dirstate tree
13 #[derive(Debug, Default, Clone, PartialEq)]
14 pub struct Directory {
15 /// Contains the old file information if it existed between changesets.
16 /// Happens if a file `foo` is marked as removed, removed from the
17 /// filesystem then a directory `foo` is created and at least one of its
18 /// descendents is added to Mercurial.
19 pub(super) was_file: Option<Box<File>>,
20 pub(super) children: FastHashMap<Vec<u8>, Node>,
21 }
22
23 /// Represents a filesystem file (or symlink) in the dirstate tree
24 #[derive(Debug, Clone, PartialEq)]
25 pub struct File {
26 /// Contains the old structure if it existed between changesets.
27 /// Happens all descendents of `foo` marked as removed and removed from
28 /// the filesystem, then a file `foo` is created and added to Mercurial.
29 pub(super) was_directory: Option<Box<Directory>>,
30 pub(super) entry: DirstateEntry,
31 }
32
33 #[derive(Debug, Clone, PartialEq)]
34 pub enum NodeKind {
35 Directory(Directory),
36 File(File),
37 }
38
39 #[derive(Debug, Default, Clone, PartialEq)]
40 pub struct Node {
41 pub kind: NodeKind,
42 }
43
44 impl Default for NodeKind {
45 fn default() -> Self {
46 NodeKind::Directory(Default::default())
47 }
48 }
49
50 impl Node {
51 pub fn insert(&mut self, path: &[u8], new_entry: DirstateEntry) -> InsertResult {
52 let mut split = path.splitn(2, |&c| c == b'/');
53 let head = split.next().unwrap_or(b"");
54 let tail = split.next().unwrap_or(b"");
55
56 if let NodeKind::File(file) = &mut self.kind {
57 if tail.is_empty() && head.is_empty() {
58 // We're modifying the current file
59 let new = Self {
60 kind: NodeKind::File(File {
61 entry: new_entry,
62 ..file.clone()
63 }),
64 };
65 return InsertResult {
66 did_insert: false,
67 old_entry: Some(std::mem::replace(self, new)),
68 };
69 } else {
70 match file.entry.state {
71 // Only replace the current file with a directory if it's
72 // marked as `Removed`
73 EntryState::Removed => {
74 self.kind = NodeKind::Directory(Directory {
75 was_file: Some(Box::from(file.clone())),
76 children: Default::default(),
77 })
78 }
79 _ => return Node::insert_in_file(file, new_entry, head, tail),
80 }
81 }
82 }
83
84 match &mut self.kind {
85 NodeKind::Directory(directory) => {
86 return Node::insert_in_directory(directory, new_entry, head, tail);
87 }
88 NodeKind::File(_) => unreachable!("The file case has already been handled"),
89 }
90 }
91
92 /// The current file still exists and is not marked as `Removed`.
93 /// Insert the entry in its `was_directory`.
94 fn insert_in_file(
95 file: &mut File,
96 new_entry: DirstateEntry,
97 head: &[u8],
98 tail: &[u8],
99 ) -> InsertResult {
100 if let Some(d) = &mut file.was_directory {
101 Node::insert_in_directory(d, new_entry, head, tail)
102 } else {
103 let mut dir = Directory {
104 was_file: None,
105 children: FastHashMap::default(),
106 };
107 let res = Node::insert_in_directory(&mut dir, new_entry, head, tail);
108 file.was_directory = Some(Box::new(dir));
109 res
110 }
111 }
112
113 /// Insert an entry in the subtree of `directory`
114 fn insert_in_directory(
115 directory: &mut Directory,
116 new_entry: DirstateEntry,
117 head: &[u8],
118 tail: &[u8],
119 ) -> InsertResult {
120 let mut res = InsertResult::default();
121
122 if let Some(node) = directory.children.get_mut(head) {
123 // Node exists
124 match &mut node.kind {
125 NodeKind::Directory(subdir) => {
126 if tail.is_empty() {
127 let becomes_file = Self {
128 kind: NodeKind::File(File {
129 was_directory: Some(Box::from(subdir.clone())),
130 entry: new_entry,
131 }),
132 };
133 let old_entry = directory.children.insert(head.to_owned(), becomes_file);
134 return InsertResult {
135 did_insert: true,
136 old_entry,
137 };
138 } else {
139 res = node.insert(tail, new_entry);
140 }
141 }
142 NodeKind::File(_) => {
143 res = node.insert(tail, new_entry);
144 }
145 }
146 } else if tail.is_empty() {
147 // File does not already exist
148 directory.children.insert(
149 head.to_owned(),
150 Self {
151 kind: NodeKind::File(File {
152 was_directory: None,
153 entry: new_entry,
154 }),
155 },
156 );
157 res.did_insert = true;
158 } else {
159 // Directory does not already exist
160 let mut nested = Self {
161 kind: NodeKind::Directory(Directory {
162 was_file: None,
163 children: Default::default(),
164 }),
165 };
166 res = nested.insert(tail, new_entry);
167 directory.children.insert(head.to_owned(), nested);
168 }
169 res
170 }
171
172 /// Removes an entry from the tree, returns a `RemoveResult`.
173 pub fn remove(&mut self, path: &[u8]) -> RemoveResult {
174 let empty_result = RemoveResult::default();
175 if path.is_empty() {
176 return empty_result;
177 }
178 let mut split = path.splitn(2, |&c| c == b'/');
179 let head = split.next();
180 let tail = split.next().unwrap_or(b"");
181
182 let head = match head {
183 None => {
184 return empty_result;
185 }
186 Some(h) => h,
187 };
188 if head == path {
189 match &mut self.kind {
190 NodeKind::Directory(d) => {
191 return Node::remove_from_directory(head, d);
192 }
193 NodeKind::File(f) => {
194 if let Some(d) = &mut f.was_directory {
195 let RemoveResult { old_entry, .. } = Node::remove_from_directory(head, d);
196 return RemoveResult {
197 cleanup: false,
198 old_entry,
199 };
200 }
201 }
202 }
203 empty_result
204 } else {
205 // Look into the dirs
206 match &mut self.kind {
207 NodeKind::Directory(d) => {
208 if let Some(child) = d.children.get_mut(head) {
209 let mut res = child.remove(tail);
210 if res.cleanup {
211 d.children.remove(head);
212 }
213 res.cleanup = d.children.len() == 0 && d.was_file.is_none();
214 res
215 } else {
216 empty_result
217 }
218 }
219 NodeKind::File(f) => {
220 if let Some(d) = &mut f.was_directory {
221 if let Some(child) = d.children.get_mut(head) {
222 let RemoveResult { cleanup, old_entry } = child.remove(tail);
223 if cleanup {
224 d.children.remove(head);
225 }
226 if d.children.len() == 0 && d.was_file.is_none() {
227 f.was_directory = None;
228 }
229
230 return RemoveResult {
231 cleanup: false,
232 old_entry,
233 };
234 }
235 }
236 empty_result
237 }
238 }
239 }
240 }
241
242 fn remove_from_directory(head: &[u8], d: &mut Directory) -> RemoveResult {
243 if let Some(node) = d.children.get_mut(head) {
244 return match &mut node.kind {
245 NodeKind::Directory(d) => {
246 if let Some(f) = &mut d.was_file {
247 let entry = f.entry;
248 d.was_file = None;
249 RemoveResult {
250 cleanup: false,
251 old_entry: Some(entry),
252 }
253 } else {
254 RemoveResult::default()
255 }
256 }
257 NodeKind::File(f) => {
258 let entry = f.entry;
259 let mut cleanup = false;
260 match &f.was_directory {
261 None => {
262 if d.children.len() == 1 {
263 cleanup = true;
264 }
265 d.children.remove(head);
266 }
267 Some(dir) => {
268 node.kind = NodeKind::Directory(*dir.clone());
269 }
270 }
271
272 RemoveResult {
273 cleanup: cleanup,
274 old_entry: Some(entry),
275 }
276 }
277 };
278 }
279 RemoveResult::default()
280 }
281
282 pub fn get(&self, path: &[u8]) -> Option<&Node> {
283 if path.is_empty() {
284 return Some(&self);
285 }
286 let mut split = path.splitn(2, |&c| c == b'/');
287 let head = split.next();
288 let tail = split.next().unwrap_or(b"");
289
290 let head = match head {
291 None => {
292 return Some(&self);
293 }
294 Some(h) => h,
295 };
296 match &self.kind {
297 NodeKind::Directory(d) => {
298 if let Some(child) = d.children.get(head) {
299 return child.get(tail);
300 }
301 }
302 NodeKind::File(f) => {
303 if let Some(d) = &f.was_directory {
304 if let Some(child) = d.children.get(head) {
305 return child.get(tail);
306 }
307 }
308 }
309 }
310
311 None
312 }
313
314 pub fn get_mut(&mut self, path: &[u8]) -> Option<&mut NodeKind> {
315 if path.is_empty() {
316 return Some(&mut self.kind);
317 }
318 let mut split = path.splitn(2, |&c| c == b'/');
319 let head = split.next();
320 let tail = split.next().unwrap_or(b"");
321
322 let head = match head {
323 None => {
324 return Some(&mut self.kind);
325 }
326 Some(h) => h,
327 };
328 match &mut self.kind {
329 NodeKind::Directory(d) => {
330 if let Some(child) = d.children.get_mut(head) {
331 return child.get_mut(tail);
332 }
333 }
334 NodeKind::File(f) => {
335 if let Some(d) = &mut f.was_directory {
336 if let Some(child) = d.children.get_mut(head) {
337 return child.get_mut(tail);
338 }
339 }
340 }
341 }
342
343 None
344 }
345
346 pub fn iter(&self) -> Iter {
347 Iter::new(self)
348 }
349 }
350
351 /// Information returned to the caller of an `insert` operation for integrity.
352 #[derive(Debug, Default)]
353 pub struct InsertResult {
354 /// Whether the insertion resulted in an actual insertion and not an
355 /// update
356 pub(super) did_insert: bool,
357 /// The entry that was replaced, if it exists
358 pub(super) old_entry: Option<Node>,
359 }
360
361 /// Information returned to the caller of a `remove` operation integrity.
362 #[derive(Debug, Default)]
363 pub struct RemoveResult {
364 /// If the caller needs to remove the current node
365 pub(super) cleanup: bool,
366 /// The entry that was replaced, if it exists
367 pub(super) old_entry: Option<DirstateEntry>,
368 }
369
370 impl<'a> IntoIterator for &'a Node {
371 type Item = (HgPathBuf, DirstateEntry);
372 type IntoIter = Iter<'a>;
373
374 fn into_iter(self) -> Self::IntoIter {
375 self.iter()
376 }
377 }