Mercurial > hg
comparison rust/hg-core/src/dirstate.rs @ 42302:d1786c1d34fa
rust-dirstate: add rust implementation of `parse_dirstate` and `pack_dirstate`
Working towards the goal of having a complete Rust implementation of
`hg status`, these two utils are a first step of many to be taken
to improve performance and code maintainability.
Two dependencies have been added: `memchr` and `byteorder`.
Both of them have been written by reputable community members and are
very mature crates.
The Rust code will often need to use their byte-oriented functions.
A few unit tests have been added and may help future development and debugging.
In a future patch that uses `parse_dirstate` to stat the working tree in
parallel - which neither the Python nor the C implementations do - actual
performance improvements will be seen for larger repositories.
Differential Revision: https://phab.mercurial-scm.org/D6348
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 06 May 2019 22:48:09 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
42301:2a7109cc5a28 | 42302:d1786c1d34fa |
---|---|
1 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
2 // | |
3 // This software may be used and distributed according to the terms of the | |
4 // GNU General Public License version 2 or any later version. | |
5 | |
6 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; | |
7 use std::collections::HashMap; | |
8 use std::io::Cursor; | |
9 use {DirstatePackError, DirstateParseError}; | |
10 | |
11 #[derive(Debug, PartialEq, Copy, Clone)] | |
12 pub struct DirstateParents<'a> { | |
13 pub p1: &'a [u8], | |
14 pub p2: &'a [u8], | |
15 } | |
16 /// The C implementation uses all signed types. This will be an issue | |
17 /// either when 4GB+ source files are commonplace or in 2038, whichever | |
18 /// comes first. | |
19 #[derive(Debug, PartialEq)] | |
20 pub struct DirstateEntry { | |
21 pub state: i8, | |
22 pub mode: i32, | |
23 pub mtime: i32, | |
24 pub size: i32, | |
25 } | |
26 pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>; | |
27 | |
28 #[derive(Debug, PartialEq)] | |
29 pub struct CopyVecEntry<'a> { | |
30 pub path: &'a [u8], | |
31 pub copy_path: &'a [u8], | |
32 } | |
33 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>; | |
34 | |
35 /// Parents are stored in the dirstate as byte hashes. | |
36 const PARENT_SIZE: usize = 20; | |
37 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. | |
38 const MIN_ENTRY_SIZE: usize = 17; | |
39 | |
40 pub fn parse_dirstate( | |
41 contents: &[u8], | |
42 ) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> { | |
43 if contents.len() < PARENT_SIZE * 2 { | |
44 return Err(DirstateParseError::TooLittleData); | |
45 } | |
46 | |
47 let mut dirstate_vec = vec![]; | |
48 let mut copies = vec![]; | |
49 let mut curr_pos = PARENT_SIZE * 2; | |
50 let parents = DirstateParents { | |
51 p1: &contents[..PARENT_SIZE], | |
52 p2: &contents[PARENT_SIZE..curr_pos], | |
53 }; | |
54 | |
55 while curr_pos < contents.len() { | |
56 if curr_pos + MIN_ENTRY_SIZE > contents.len() { | |
57 return Err(DirstateParseError::Overflow); | |
58 } | |
59 let entry_bytes = &contents[curr_pos..]; | |
60 | |
61 let mut cursor = Cursor::new(entry_bytes); | |
62 let state = cursor.read_i8()?; | |
63 let mode = cursor.read_i32::<BigEndian>()?; | |
64 let size = cursor.read_i32::<BigEndian>()?; | |
65 let mtime = cursor.read_i32::<BigEndian>()?; | |
66 let path_len = cursor.read_i32::<BigEndian>()? as usize; | |
67 | |
68 if path_len > contents.len() - curr_pos { | |
69 return Err(DirstateParseError::Overflow); | |
70 } | |
71 | |
72 // Slice instead of allocating a Vec needed for `read_exact` | |
73 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; | |
74 | |
75 let (path, copy) = match memchr::memchr(0, path) { | |
76 None => (path, None), | |
77 Some(i) => (&path[..i], Some(&path[(i + 1)..])), | |
78 }; | |
79 | |
80 if let Some(copy_path) = copy { | |
81 copies.push(CopyVecEntry { path, copy_path }); | |
82 }; | |
83 dirstate_vec.push(( | |
84 path.to_owned(), | |
85 DirstateEntry { | |
86 state, | |
87 mode, | |
88 size, | |
89 mtime, | |
90 }, | |
91 )); | |
92 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); | |
93 } | |
94 | |
95 Ok((parents, dirstate_vec, copies)) | |
96 } | |
97 | |
98 pub fn pack_dirstate( | |
99 dirstate_vec: &DirstateVec, | |
100 copymap: &HashMap<Vec<u8>, Vec<u8>>, | |
101 parents: DirstateParents, | |
102 now: i32, | |
103 ) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> { | |
104 if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE { | |
105 return Err(DirstatePackError::CorruptedParent); | |
106 } | |
107 | |
108 let expected_size: usize = dirstate_vec | |
109 .iter() | |
110 .map(|(ref filename, _)| { | |
111 let mut length = MIN_ENTRY_SIZE + filename.len(); | |
112 if let Some(ref copy) = copymap.get(filename) { | |
113 length += copy.len() + 1; | |
114 } | |
115 length | |
116 }) | |
117 .sum(); | |
118 let expected_size = expected_size + PARENT_SIZE * 2; | |
119 | |
120 let mut packed = Vec::with_capacity(expected_size); | |
121 let mut new_dirstate_vec = vec![]; | |
122 | |
123 packed.extend(parents.p1); | |
124 packed.extend(parents.p2); | |
125 | |
126 for (ref filename, entry) in dirstate_vec { | |
127 let mut new_filename: Vec<u8> = filename.to_owned(); | |
128 let mut new_mtime: i32 = entry.mtime; | |
129 if entry.state == 'n' as i8 && entry.mtime == now.into() { | |
130 // The file was last modified "simultaneously" with the current | |
131 // write to dirstate (i.e. within the same second for file- | |
132 // systems with a granularity of 1 sec). This commonly happens | |
133 // for at least a couple of files on 'update'. | |
134 // The user could change the file without changing its size | |
135 // within the same second. Invalidate the file's mtime in | |
136 // dirstate, forcing future 'status' calls to compare the | |
137 // contents of the file if the size is the same. This prevents | |
138 // mistakenly treating such files as clean. | |
139 new_mtime = -1; | |
140 new_dirstate_vec.push(( | |
141 filename.to_owned(), | |
142 DirstateEntry { | |
143 mtime: new_mtime, | |
144 ..*entry | |
145 }, | |
146 )); | |
147 } | |
148 | |
149 if let Some(copy) = copymap.get(filename) { | |
150 new_filename.push('\0' as u8); | |
151 new_filename.extend(copy); | |
152 } | |
153 | |
154 packed.write_i8(entry.state)?; | |
155 packed.write_i32::<BigEndian>(entry.mode)?; | |
156 packed.write_i32::<BigEndian>(entry.size)?; | |
157 packed.write_i32::<BigEndian>(new_mtime)?; | |
158 packed.write_i32::<BigEndian>(new_filename.len() as i32)?; | |
159 packed.extend(new_filename) | |
160 } | |
161 | |
162 if packed.len() != expected_size { | |
163 return Err(DirstatePackError::BadSize(expected_size, packed.len())); | |
164 } | |
165 | |
166 Ok((packed, new_dirstate_vec)) | |
167 } | |
168 | |
169 #[cfg(test)] | |
170 mod tests { | |
171 use super::*; | |
172 | |
173 #[test] | |
174 fn test_pack_dirstate_empty() { | |
175 let dirstate_vec: DirstateVec = vec![]; | |
176 let copymap = HashMap::new(); | |
177 let parents = DirstateParents { | |
178 p1: b"12345678910111213141", | |
179 p2: b"00000000000000000000", | |
180 }; | |
181 let now: i32 = 15000000; | |
182 let expected = | |
183 (b"1234567891011121314100000000000000000000".to_vec(), vec![]); | |
184 | |
185 assert_eq!( | |
186 expected, | |
187 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
188 ); | |
189 } | |
190 #[test] | |
191 fn test_pack_dirstate_one_entry() { | |
192 let dirstate_vec: DirstateVec = vec![( | |
193 vec!['f' as u8, '1' as u8], | |
194 DirstateEntry { | |
195 state: 'n' as i8, | |
196 mode: 0o644, | |
197 size: 0, | |
198 mtime: 791231220, | |
199 }, | |
200 )]; | |
201 let copymap = HashMap::new(); | |
202 let parents = DirstateParents { | |
203 p1: b"12345678910111213141", | |
204 p2: b"00000000000000000000", | |
205 }; | |
206 let now: i32 = 15000000; | |
207 let expected = ( | |
208 [ | |
209 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, | |
210 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, | |
211 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, | |
212 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49, | |
213 ] | |
214 .to_vec(), | |
215 vec![], | |
216 ); | |
217 | |
218 assert_eq!( | |
219 expected, | |
220 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
221 ); | |
222 } | |
223 #[test] | |
224 fn test_pack_dirstate_one_entry_with_copy() { | |
225 let dirstate_vec: DirstateVec = vec![( | |
226 b"f1".to_vec(), | |
227 DirstateEntry { | |
228 state: 'n' as i8, | |
229 mode: 0o644, | |
230 size: 0, | |
231 mtime: 791231220, | |
232 }, | |
233 )]; | |
234 let mut copymap = HashMap::new(); | |
235 copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
236 let parents = DirstateParents { | |
237 p1: b"12345678910111213141", | |
238 p2: b"00000000000000000000", | |
239 }; | |
240 let now: i32 = 15000000; | |
241 let expected = ( | |
242 [ | |
243 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, | |
244 49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, | |
245 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, | |
246 0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, | |
247 112, 121, 110, 97, 109, 101, | |
248 ] | |
249 .to_vec(), | |
250 vec![], | |
251 ); | |
252 | |
253 assert_eq!( | |
254 expected, | |
255 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap() | |
256 ); | |
257 } | |
258 | |
259 #[test] | |
260 fn test_parse_pack_one_entry_with_copy() { | |
261 let dirstate_vec: DirstateVec = vec![( | |
262 b"f1".to_vec(), | |
263 DirstateEntry { | |
264 state: 'n' as i8, | |
265 mode: 0o644, | |
266 size: 0, | |
267 mtime: 791231220, | |
268 }, | |
269 )]; | |
270 let mut copymap = HashMap::new(); | |
271 copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
272 let parents = DirstateParents { | |
273 p1: b"12345678910111213141", | |
274 p2: b"00000000000000000000", | |
275 }; | |
276 let now: i32 = 15000000; | |
277 let result = | |
278 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
279 | |
280 assert_eq!( | |
281 ( | |
282 parents, | |
283 dirstate_vec, | |
284 copymap | |
285 .iter() | |
286 .map(|(k, v)| CopyVecEntry { | |
287 path: k.as_slice(), | |
288 copy_path: v.as_slice() | |
289 }) | |
290 .collect() | |
291 ), | |
292 parse_dirstate(result.0.as_slice()).unwrap() | |
293 ) | |
294 } | |
295 | |
296 #[test] | |
297 fn test_parse_pack_multiple_entries_with_copy() { | |
298 let dirstate_vec: DirstateVec = vec![ | |
299 ( | |
300 b"f1".to_vec(), | |
301 DirstateEntry { | |
302 state: 'n' as i8, | |
303 mode: 0o644, | |
304 size: 0, | |
305 mtime: 791231220, | |
306 }, | |
307 ), | |
308 ( | |
309 b"f2".to_vec(), | |
310 DirstateEntry { | |
311 state: 'm' as i8, | |
312 mode: 0o777, | |
313 size: 1000, | |
314 mtime: 791231220, | |
315 }, | |
316 ), | |
317 ( | |
318 b"f3".to_vec(), | |
319 DirstateEntry { | |
320 state: 'r' as i8, | |
321 mode: 0o644, | |
322 size: 234553, | |
323 mtime: 791231220, | |
324 }, | |
325 ), | |
326 ( | |
327 b"f4\xF6".to_vec(), | |
328 DirstateEntry { | |
329 state: 'a' as i8, | |
330 mode: 0o644, | |
331 size: -1, | |
332 mtime: -1, | |
333 }, | |
334 ), | |
335 ]; | |
336 let mut copymap = HashMap::new(); | |
337 copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
338 copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec()); | |
339 let parents = DirstateParents { | |
340 p1: b"12345678910111213141", | |
341 p2: b"00000000000000000000", | |
342 }; | |
343 let now: i32 = 15000000; | |
344 let result = | |
345 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
346 | |
347 assert_eq!( | |
348 (parents, dirstate_vec, copymap), | |
349 parse_dirstate(result.0.as_slice()) | |
350 .and_then(|(p, dvec, cvec)| Ok(( | |
351 p, | |
352 dvec, | |
353 cvec.iter() | |
354 .map(|entry| ( | |
355 entry.path.to_vec(), | |
356 entry.copy_path.to_vec() | |
357 )) | |
358 .collect() | |
359 ))) | |
360 .unwrap() | |
361 ) | |
362 } | |
363 | |
364 #[test] | |
365 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4 | |
366 fn test_parse_pack_one_entry_with_copy_and_time_conflict() { | |
367 let dirstate_vec: DirstateVec = vec![( | |
368 b"f1".to_vec(), | |
369 DirstateEntry { | |
370 state: 'n' as i8, | |
371 mode: 0o644, | |
372 size: 0, | |
373 mtime: 15000000, | |
374 }, | |
375 )]; | |
376 let mut copymap = HashMap::new(); | |
377 copymap.insert(b"f1".to_vec(), b"copyname".to_vec()); | |
378 let parents = DirstateParents { | |
379 p1: b"12345678910111213141", | |
380 p2: b"00000000000000000000", | |
381 }; | |
382 let now: i32 = 15000000; | |
383 let result = | |
384 pack_dirstate(&dirstate_vec, ©map, parents, now).unwrap(); | |
385 | |
386 assert_eq!( | |
387 ( | |
388 parents, | |
389 vec![( | |
390 b"f1".to_vec(), | |
391 DirstateEntry { | |
392 state: 'n' as i8, | |
393 mode: 0o644, | |
394 size: 0, | |
395 mtime: -1 | |
396 } | |
397 )], | |
398 copymap | |
399 .iter() | |
400 .map(|(k, v)| CopyVecEntry { | |
401 path: k.as_slice(), | |
402 copy_path: v.as_slice() | |
403 }) | |
404 .collect() | |
405 ), | |
406 parse_dirstate(result.0.as_slice()).unwrap() | |
407 ) | |
408 } | |
409 } |