Mercurial > hg
comparison rust/hg-core/src/dirstate/dirstate_map.rs @ 42753:fce6dc93a510
rust-dirstate: rust implementation of dirstatemap
The `dirstatemap` is one of the last building blocks needed to get to a
`dirstate.walk` Rust implementation.
Disclaimer: This change is part of a big (10) series of patches, all of which
started as one big changeset that took a long time to write.
This `dirstatemap` implementation is a compromise in terms of complexity both
for me and for the reviewers. I chose to submit this patch right now because
while it is not perfect, it works and is simple enough (IMHO) to be reviewed.
The Python implementation uses a lot of lazy propertycaches, breaks
encapsulation and is used as an iterator in a lot of places, all of which
dictated the somewhat unidiomatic patterns in this change.
Like written in the comments, rewriting this struct to use the typestate
pattern might be a good idea, but this is a good first step.
Differential Revision: https://phab.mercurial-scm.org/D6632
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 10 Jul 2019 09:56:23 +0200 |
parents | |
children | 8d2d5dfa07f5 |
comparison
equal
deleted
inserted
replaced
42752:30320c7bf79f | 42753:fce6dc93a510 |
---|---|
1 // dirstate_map.rs | |
2 // | |
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
4 // | |
5 // This software may be used and distributed according to the terms of the | |
6 // GNU General Public License version 2 or any later version. | |
7 | |
8 use crate::{ | |
9 dirstate::{parsers::PARENT_SIZE, EntryState}, | |
10 pack_dirstate, parse_dirstate, | |
11 utils::copy_into_array, | |
12 CopyMap, DirsIterable, DirsMultiset, DirstateEntry, DirstateError, | |
13 DirstateMapError, DirstateParents, DirstateParseError, StateMap, | |
14 }; | |
15 use core::borrow::Borrow; | |
16 use std::collections::{HashMap, HashSet}; | |
17 use std::iter::FromIterator; | |
18 use std::ops::Deref; | |
19 use std::time::Duration; | |
20 | |
21 pub type FileFoldMap = HashMap<Vec<u8>, Vec<u8>>; | |
22 | |
23 const NULL_REVISION: [u8; 20] = [ | |
24 b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', | |
25 b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', b'\0', | |
26 ]; | |
27 const MTIME_UNSET: i32 = -1; | |
28 const SIZE_DIRTY: i32 = -2; | |
29 | |
30 #[derive(Default)] | |
31 pub struct DirstateMap { | |
32 state_map: StateMap, | |
33 pub copy_map: CopyMap, | |
34 file_fold_map: Option<FileFoldMap>, | |
35 pub dirs: Option<DirsMultiset>, | |
36 pub all_dirs: Option<DirsMultiset>, | |
37 non_normal_set: HashSet<Vec<u8>>, | |
38 other_parent_set: HashSet<Vec<u8>>, | |
39 parents: Option<DirstateParents>, | |
40 dirty_parents: bool, | |
41 } | |
42 | |
43 /// Should only really be used in python interface code, for clarity | |
44 impl Deref for DirstateMap { | |
45 type Target = StateMap; | |
46 | |
47 fn deref(&self) -> &Self::Target { | |
48 &self.state_map | |
49 } | |
50 } | |
51 | |
52 impl FromIterator<(Vec<u8>, DirstateEntry)> for DirstateMap { | |
53 fn from_iter<I: IntoIterator<Item = (Vec<u8>, DirstateEntry)>>( | |
54 iter: I, | |
55 ) -> Self { | |
56 Self { | |
57 state_map: iter.into_iter().collect(), | |
58 ..Self::default() | |
59 } | |
60 } | |
61 } | |
62 | |
63 impl DirstateMap { | |
64 pub fn new() -> Self { | |
65 Self::default() | |
66 } | |
67 | |
68 pub fn clear(&mut self) { | |
69 self.state_map.clear(); | |
70 self.copy_map.clear(); | |
71 self.file_fold_map = None; | |
72 self.non_normal_set.clear(); | |
73 self.other_parent_set.clear(); | |
74 self.set_parents(DirstateParents { | |
75 p1: NULL_REVISION, | |
76 p2: NULL_REVISION, | |
77 }) | |
78 } | |
79 | |
80 /// Add a tracked file to the dirstate | |
81 pub fn add_file( | |
82 &mut self, | |
83 filename: &[u8], | |
84 old_state: EntryState, | |
85 entry: DirstateEntry, | |
86 ) { | |
87 if old_state == EntryState::Unknown || old_state == EntryState::Removed | |
88 { | |
89 if let Some(ref mut dirs) = self.dirs { | |
90 dirs.add_path(filename) | |
91 } | |
92 } | |
93 if old_state == EntryState::Unknown { | |
94 if let Some(ref mut all_dirs) = self.all_dirs { | |
95 all_dirs.add_path(filename) | |
96 } | |
97 } | |
98 self.state_map.insert(filename.to_owned(), entry.to_owned()); | |
99 | |
100 if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET { | |
101 self.non_normal_set.insert(filename.to_owned()); | |
102 } | |
103 | |
104 if entry.size == SIZE_DIRTY { | |
105 self.other_parent_set.insert(filename.to_owned()); | |
106 } | |
107 } | |
108 | |
109 /// Mark a file as removed in the dirstate. | |
110 /// | |
111 /// The `size` parameter is used to store sentinel values that indicate | |
112 /// the file's previous state. In the future, we should refactor this | |
113 /// to be more explicit about what that state is. | |
114 pub fn remove_file( | |
115 &mut self, | |
116 filename: &[u8], | |
117 old_state: EntryState, | |
118 size: i32, | |
119 ) -> Result<(), DirstateMapError> { | |
120 if old_state != EntryState::Unknown && old_state != EntryState::Removed | |
121 { | |
122 if let Some(ref mut dirs) = self.dirs { | |
123 dirs.delete_path(filename)?; | |
124 } | |
125 } | |
126 if old_state == EntryState::Unknown { | |
127 if let Some(ref mut all_dirs) = self.all_dirs { | |
128 all_dirs.add_path(filename); | |
129 } | |
130 } | |
131 | |
132 if let Some(ref mut file_fold_map) = self.file_fold_map { | |
133 file_fold_map | |
134 .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref()); | |
135 } | |
136 self.state_map.insert( | |
137 filename.to_owned(), | |
138 DirstateEntry { | |
139 state: EntryState::Removed, | |
140 mode: 0, | |
141 size, | |
142 mtime: 0, | |
143 }, | |
144 ); | |
145 self.non_normal_set.insert(filename.to_owned()); | |
146 Ok(()) | |
147 } | |
148 | |
149 /// Remove a file from the dirstate. | |
150 /// Returns `true` if the file was previously recorded. | |
151 pub fn drop_file( | |
152 &mut self, | |
153 filename: &[u8], | |
154 old_state: EntryState, | |
155 ) -> Result<bool, DirstateMapError> { | |
156 let exists = self | |
157 .state_map | |
158 .remove::<Vec<u8>>(filename.to_owned().as_ref()) | |
159 .is_some(); | |
160 | |
161 if exists { | |
162 if old_state != EntryState::Removed { | |
163 if let Some(ref mut dirs) = self.dirs { | |
164 dirs.delete_path(filename)?; | |
165 } | |
166 } | |
167 if let Some(ref mut all_dirs) = self.all_dirs { | |
168 all_dirs.delete_path(filename)?; | |
169 } | |
170 } | |
171 if let Some(ref mut file_fold_map) = self.file_fold_map { | |
172 file_fold_map | |
173 .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref()); | |
174 } | |
175 self.non_normal_set | |
176 .remove::<Vec<u8>>(filename.to_owned().as_ref()); | |
177 | |
178 Ok(exists) | |
179 } | |
180 | |
181 pub fn clear_ambiguous_times( | |
182 &mut self, | |
183 filenames: Vec<Vec<u8>>, | |
184 now: i32, | |
185 ) { | |
186 for filename in filenames { | |
187 let mut changed = false; | |
188 self.state_map | |
189 .entry(filename.to_owned()) | |
190 .and_modify(|entry| { | |
191 if entry.state == EntryState::Normal && entry.mtime == now | |
192 { | |
193 changed = true; | |
194 *entry = DirstateEntry { | |
195 mtime: MTIME_UNSET, | |
196 ..*entry | |
197 }; | |
198 } | |
199 }); | |
200 if changed { | |
201 self.non_normal_set.insert(filename.to_owned()); | |
202 } | |
203 } | |
204 } | |
205 | |
206 pub fn non_normal_other_parent_entries( | |
207 &self, | |
208 ) -> (HashSet<Vec<u8>>, HashSet<Vec<u8>>) { | |
209 let mut non_normal = HashSet::new(); | |
210 let mut other_parent = HashSet::new(); | |
211 | |
212 for ( | |
213 filename, | |
214 DirstateEntry { | |
215 state, size, mtime, .. | |
216 }, | |
217 ) in self.state_map.iter() | |
218 { | |
219 if *state != EntryState::Normal || *mtime == MTIME_UNSET { | |
220 non_normal.insert(filename.to_owned()); | |
221 } | |
222 if *state == EntryState::Normal && *size == SIZE_DIRTY { | |
223 other_parent.insert(filename.to_owned()); | |
224 } | |
225 } | |
226 | |
227 (non_normal, other_parent) | |
228 } | |
229 | |
230 /// Both of these setters and their uses appear to be the simplest way to | |
231 /// emulate a Python lazy property, but it is ugly and unidiomatic. | |
232 /// TODO One day, rewriting this struct using the typestate might be a | |
233 /// good idea. | |
234 pub fn set_all_dirs(&mut self) { | |
235 if self.all_dirs.is_none() { | |
236 self.all_dirs = Some(DirsMultiset::new( | |
237 DirsIterable::Dirstate(&self.state_map), | |
238 None, | |
239 )); | |
240 } | |
241 } | |
242 | |
243 pub fn set_dirs(&mut self) { | |
244 if self.dirs.is_none() { | |
245 self.dirs = Some(DirsMultiset::new( | |
246 DirsIterable::Dirstate(&self.state_map), | |
247 Some(EntryState::Removed), | |
248 )); | |
249 } | |
250 } | |
251 | |
252 pub fn has_tracked_dir(&mut self, directory: &[u8]) -> bool { | |
253 self.set_dirs(); | |
254 self.dirs.as_ref().unwrap().contains(directory.as_ref()) | |
255 } | |
256 | |
257 pub fn has_dir(&mut self, directory: &[u8]) -> bool { | |
258 self.set_all_dirs(); | |
259 self.all_dirs.as_ref().unwrap().contains(directory.as_ref()) | |
260 } | |
261 | |
262 pub fn parents( | |
263 &mut self, | |
264 file_contents: &[u8], | |
265 ) -> Result<DirstateParents, DirstateError> { | |
266 if let Some(ref parents) = self.parents { | |
267 return Ok(parents.clone()); | |
268 } | |
269 let parents; | |
270 if file_contents.len() == 40 { | |
271 parents = DirstateParents { | |
272 p1: copy_into_array(&file_contents[..PARENT_SIZE]), | |
273 p2: copy_into_array( | |
274 &file_contents[PARENT_SIZE..PARENT_SIZE * 2], | |
275 ), | |
276 }; | |
277 } else if file_contents.is_empty() { | |
278 parents = DirstateParents { | |
279 p1: NULL_REVISION, | |
280 p2: NULL_REVISION, | |
281 }; | |
282 } else { | |
283 return Err(DirstateError::Parse(DirstateParseError::Damaged)); | |
284 } | |
285 | |
286 self.parents = Some(parents.to_owned()); | |
287 Ok(parents.clone()) | |
288 } | |
289 | |
290 pub fn set_parents(&mut self, parents: DirstateParents) { | |
291 self.parents = Some(parents.clone()); | |
292 self.dirty_parents = true; | |
293 } | |
294 | |
295 pub fn read( | |
296 &mut self, | |
297 file_contents: &[u8], | |
298 ) -> Result<Option<DirstateParents>, DirstateError> { | |
299 if file_contents.is_empty() { | |
300 return Ok(None); | |
301 } | |
302 | |
303 let parents = parse_dirstate( | |
304 &mut self.state_map, | |
305 &mut self.copy_map, | |
306 file_contents, | |
307 )?; | |
308 | |
309 if !self.dirty_parents { | |
310 self.set_parents(parents.to_owned()); | |
311 } | |
312 | |
313 Ok(Some(parents)) | |
314 } | |
315 | |
316 pub fn pack( | |
317 &mut self, | |
318 parents: DirstateParents, | |
319 now: Duration, | |
320 ) -> Result<Vec<u8>, DirstateError> { | |
321 let packed = | |
322 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?; | |
323 | |
324 self.dirty_parents = false; | |
325 | |
326 let result = self.non_normal_other_parent_entries(); | |
327 self.non_normal_set = result.0; | |
328 self.other_parent_set = result.1; | |
329 Ok(packed) | |
330 } | |
331 | |
332 pub fn build_file_fold_map(&mut self) -> FileFoldMap { | |
333 if let Some(ref file_fold_map) = self.file_fold_map { | |
334 return file_fold_map.to_owned(); | |
335 } | |
336 let mut new_file_fold_map = FileFoldMap::new(); | |
337 for (filename, DirstateEntry { state, .. }) in self.state_map.borrow() | |
338 { | |
339 if *state == EntryState::Removed { | |
340 new_file_fold_map.insert( | |
341 filename.to_ascii_uppercase().to_owned(), | |
342 filename.to_owned(), | |
343 ); | |
344 } | |
345 } | |
346 self.file_fold_map = Some(new_file_fold_map); | |
347 self.file_fold_map.to_owned().unwrap() | |
348 } | |
349 } | |
350 | |
351 #[cfg(test)] | |
352 mod tests { | |
353 use super::*; | |
354 | |
355 #[test] | |
356 fn test_dirs_multiset() { | |
357 let mut map = DirstateMap::new(); | |
358 assert!(map.dirs.is_none()); | |
359 assert!(map.all_dirs.is_none()); | |
360 | |
361 assert_eq!(false, map.has_dir(b"nope")); | |
362 assert!(map.all_dirs.is_some()); | |
363 assert!(map.dirs.is_none()); | |
364 | |
365 assert_eq!(false, map.has_tracked_dir(b"nope")); | |
366 assert!(map.dirs.is_some()); | |
367 } | |
368 | |
369 #[test] | |
370 fn test_add_file() { | |
371 let mut map = DirstateMap::new(); | |
372 | |
373 assert_eq!(0, map.len()); | |
374 | |
375 map.add_file( | |
376 b"meh", | |
377 EntryState::Normal, | |
378 DirstateEntry { | |
379 state: EntryState::Normal, | |
380 mode: 1337, | |
381 mtime: 1337, | |
382 size: 1337, | |
383 }, | |
384 ); | |
385 | |
386 assert_eq!(1, map.len()); | |
387 assert_eq!(0, map.non_normal_set.len()); | |
388 assert_eq!(0, map.other_parent_set.len()); | |
389 } | |
390 | |
391 #[test] | |
392 fn test_non_normal_other_parent_entries() { | |
393 let map: DirstateMap = [ | |
394 (b"f1", (EntryState::Removed, 1337, 1337, 1337)), | |
395 (b"f2", (EntryState::Normal, 1337, 1337, -1)), | |
396 (b"f3", (EntryState::Normal, 1337, 1337, 1337)), | |
397 (b"f4", (EntryState::Normal, 1337, -2, 1337)), | |
398 (b"f5", (EntryState::Added, 1337, 1337, 1337)), | |
399 (b"f6", (EntryState::Added, 1337, 1337, -1)), | |
400 (b"f7", (EntryState::Merged, 1337, 1337, -1)), | |
401 (b"f8", (EntryState::Merged, 1337, 1337, 1337)), | |
402 (b"f9", (EntryState::Merged, 1337, -2, 1337)), | |
403 (b"fa", (EntryState::Added, 1337, -2, 1337)), | |
404 (b"fb", (EntryState::Removed, 1337, -2, 1337)), | |
405 ] | |
406 .iter() | |
407 .map(|(fname, (state, mode, size, mtime))| { | |
408 ( | |
409 fname.to_vec(), | |
410 DirstateEntry { | |
411 state: *state, | |
412 mode: *mode, | |
413 size: *size, | |
414 mtime: *mtime, | |
415 }, | |
416 ) | |
417 }) | |
418 .collect(); | |
419 | |
420 let non_normal = [ | |
421 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb", | |
422 ] | |
423 .iter() | |
424 .map(|x| x.to_vec()) | |
425 .collect(); | |
426 | |
427 let mut other_parent = HashSet::new(); | |
428 other_parent.insert(b"f4".to_vec()); | |
429 | |
430 assert_eq!( | |
431 (non_normal, other_parent), | |
432 map.non_normal_other_parent_entries() | |
433 ); | |
434 } | |
435 } |