Mercurial > hg
comparison rust/hg-core/src/revlog/revlog.rs @ 45601:900b9b79b99c
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)
Prevent building `Index` every time it is needed. It was a bad idea anyway.
When `Index::new` will return `Result` it will avoid things like `Revlog::len`
returning `Result<usize>` instead of `usize`.
[X] make `Index` owner of its bytes
[ ] make `Index::new` return an error if `offset != bytes.len()`
Differential Revision: https://phab.mercurial-scm.org/D9106
author | Antoine cezar<acezar@chwitlabs.fr> |
---|---|
date | Mon, 28 Sep 2020 15:13:51 +0200 |
parents | 497657895b54 |
children | 1cef583541c0 |
comparison
equal
deleted
inserted
replaced
45600:b68b19104d16 | 45601:900b9b79b99c |
---|---|
34 /// Read only implementation of revlog. | 34 /// Read only implementation of revlog. |
35 pub struct Revlog { | 35 pub struct Revlog { |
36 /// When index and data are not interleaved: bytes of the revlog index. | 36 /// When index and data are not interleaved: bytes of the revlog index. |
37 /// When index and data are interleaved: bytes of the revlog index and | 37 /// When index and data are interleaved: bytes of the revlog index and |
38 /// data. | 38 /// data. |
39 index_bytes: Box<dyn Deref<Target = [u8]> + Send>, | 39 index: Index, |
40 /// When index and data are not interleaved: bytes of the revlog data | 40 /// When index and data are not interleaved: bytes of the revlog data |
41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, | 41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, |
42 } | 42 } |
43 | 43 |
44 impl Revlog { | 44 impl Revlog { |
54 let version = get_version(&index_mmap); | 54 let version = get_version(&index_mmap); |
55 if version != 1 { | 55 if version != 1 { |
56 return Err(RevlogError::UnsuportedVersion(version)); | 56 return Err(RevlogError::UnsuportedVersion(version)); |
57 } | 57 } |
58 | 58 |
59 let is_inline = is_inline(&index_mmap); | 59 let index = Index::new(Box::new(index_mmap)); |
60 | |
61 let index_bytes = Box::new(index_mmap); | |
62 | 60 |
63 // TODO load data only when needed // | 61 // TODO load data only when needed // |
64 // type annotation required | 62 // type annotation required |
65 // won't recognize Mmap as Deref<Target = [u8]> | 63 // won't recognize Mmap as Deref<Target = [u8]> |
66 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = | 64 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = |
67 if is_inline { | 65 if index.is_inline() { |
68 None | 66 None |
69 } else { | 67 } else { |
70 let data_path = index_path.with_extension("d"); | 68 let data_path = index_path.with_extension("d"); |
71 let data_mmap = | 69 let data_mmap = |
72 mmap_open(&data_path).map_err(RevlogError::IoError)?; | 70 mmap_open(&data_path).map_err(RevlogError::IoError)?; |
73 Some(Box::new(data_mmap)) | 71 Some(Box::new(data_mmap)) |
74 }; | 72 }; |
75 | 73 |
76 Ok(Revlog { | 74 Ok(Revlog { index, data_bytes }) |
77 index_bytes, | |
78 data_bytes, | |
79 }) | |
80 } | 75 } |
81 | 76 |
82 /// Return number of entries of the `Revlog`. | 77 /// Return number of entries of the `Revlog`. |
83 pub fn len(&self) -> usize { | 78 pub fn len(&self) -> usize { |
84 self.index().len() | 79 self.index.len() |
85 } | 80 } |
86 | 81 |
87 /// Returns `true` if the `Revlog` has zero `entries`. | 82 /// Returns `true` if the `Revlog` has zero `entries`. |
88 pub fn is_empty(&self) -> bool { | 83 pub fn is_empty(&self) -> bool { |
89 self.index().is_empty() | 84 self.index.is_empty() |
90 } | 85 } |
91 | 86 |
92 /// Return the full data associated to a node. | 87 /// Return the full data associated to a node. |
93 #[timed] | 88 #[timed] |
94 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> { | 89 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> { |
95 let index = self.index(); | |
96 // This is brute force. But it is fast enough for now. | 90 // This is brute force. But it is fast enough for now. |
97 // Optimization will come later. | 91 // Optimization will come later. |
98 for rev in (0..self.len() as Revision).rev() { | 92 for rev in (0..self.len() as Revision).rev() { |
99 let index_entry = | 93 let index_entry = |
100 index.get_entry(rev).ok_or(RevlogError::Corrupted)?; | 94 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?; |
101 if node == index_entry.hash() { | 95 if node == index_entry.hash() { |
102 return Ok(rev); | 96 return Ok(rev); |
103 } | 97 } |
104 } | 98 } |
105 Err(RevlogError::InvalidRevision) | 99 Err(RevlogError::InvalidRevision) |
121 .get_entry(base_rev) | 115 .get_entry(base_rev) |
122 .map_err(|_| RevlogError::Corrupted)?; | 116 .map_err(|_| RevlogError::Corrupted)?; |
123 } | 117 } |
124 | 118 |
125 // TODO do not look twice in the index | 119 // TODO do not look twice in the index |
126 let index = self.index(); | 120 let index_entry = self |
127 let index_entry = | 121 .index |
128 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; | 122 .get_entry(rev) |
123 .ok_or(RevlogError::InvalidRevision)?; | |
129 | 124 |
130 let data: Vec<u8> = if delta_chain.is_empty() { | 125 let data: Vec<u8> = if delta_chain.is_empty() { |
131 entry.data()?.into() | 126 entry.data()?.into() |
132 } else { | 127 } else { |
133 Revlog::build_data_from_deltas(entry, &delta_chain)? | 128 Revlog::build_data_from_deltas(entry, &delta_chain)? |
151 p1: Revision, | 146 p1: Revision, |
152 p2: Revision, | 147 p2: Revision, |
153 expected: &[u8], | 148 expected: &[u8], |
154 data: &[u8], | 149 data: &[u8], |
155 ) -> bool { | 150 ) -> bool { |
156 let index = self.index(); | 151 let e1 = self.index.get_entry(p1); |
157 let e1 = index.get_entry(p1); | |
158 let h1 = match e1 { | 152 let h1 = match e1 { |
159 Some(ref entry) => entry.hash(), | 153 Some(ref entry) => entry.hash(), |
160 None => &NULL_NODE_ID, | 154 None => &NULL_NODE_ID, |
161 }; | 155 }; |
162 let e2 = index.get_entry(p2); | 156 let e2 = self.index.get_entry(p2); |
163 let h2 = match e2 { | 157 let h2 = match e2 { |
164 Some(ref entry) => entry.hash(), | 158 Some(ref entry) => entry.hash(), |
165 None => &NULL_NODE_ID, | 159 None => &NULL_NODE_ID, |
166 }; | 160 }; |
167 | 161 |
185 deltas.iter().map(|d| patch::PatchList::new(d)).collect(); | 179 deltas.iter().map(|d| patch::PatchList::new(d)).collect(); |
186 let patch = patch::fold_patch_lists(&patches); | 180 let patch = patch::fold_patch_lists(&patches); |
187 Ok(patch.apply(&snapshot)) | 181 Ok(patch.apply(&snapshot)) |
188 } | 182 } |
189 | 183 |
190 /// Return the revlog index. | |
191 pub fn index(&self) -> Index { | |
192 let is_inline = self.data_bytes.is_none(); | |
193 Index::new(&self.index_bytes, is_inline) | |
194 } | |
195 | |
196 /// Return the revlog data. | 184 /// Return the revlog data. |
197 fn data(&self) -> &[u8] { | 185 fn data(&self) -> &[u8] { |
198 match self.data_bytes { | 186 match self.data_bytes { |
199 Some(ref data_bytes) => &data_bytes, | 187 Some(ref data_bytes) => &data_bytes, |
200 None => &self.index_bytes, | 188 None => panic!( |
189 "forgot to load the data or trying to access inline data" | |
190 ), | |
201 } | 191 } |
202 } | 192 } |
203 | 193 |
204 /// Get an entry of the revlog. | 194 /// Get an entry of the revlog. |
205 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { | 195 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { |
206 let index = self.index(); | 196 let index_entry = self |
207 let index_entry = | 197 .index |
208 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; | 198 .get_entry(rev) |
199 .ok_or(RevlogError::InvalidRevision)?; | |
209 let start = index_entry.offset(); | 200 let start = index_entry.offset(); |
210 let end = start + index_entry.compressed_len(); | 201 let end = start + index_entry.compressed_len(); |
202 let data = if self.index.is_inline() { | |
203 self.index.data(start, end) | |
204 } else { | |
205 &self.data()[start..end] | |
206 }; | |
211 let entry = RevlogEntry { | 207 let entry = RevlogEntry { |
212 rev, | 208 rev, |
213 bytes: &self.data()[start..end], | 209 bytes: data, |
214 compressed_len: index_entry.compressed_len(), | 210 compressed_len: index_entry.compressed_len(), |
215 uncompressed_len: index_entry.uncompressed_len(), | 211 uncompressed_len: index_entry.uncompressed_len(), |
216 base_rev: if index_entry.base_revision() == rev { | 212 base_rev: if index_entry.base_revision() == rev { |
217 None | 213 None |
218 } else { | 214 } else { |
294 fn is_delta(&self) -> bool { | 290 fn is_delta(&self) -> bool { |
295 self.base_rev.is_some() | 291 self.base_rev.is_some() |
296 } | 292 } |
297 } | 293 } |
298 | 294 |
299 /// Value of the inline flag. | |
300 pub fn is_inline(index_bytes: &[u8]) -> bool { | |
301 match &index_bytes[0..=1] { | |
302 [0, 0] | [0, 2] => false, | |
303 _ => true, | |
304 } | |
305 } | |
306 | |
307 /// Format version of the revlog. | 295 /// Format version of the revlog. |
308 pub fn get_version(index_bytes: &[u8]) -> u16 { | 296 pub fn get_version(index_bytes: &[u8]) -> u16 { |
309 BigEndian::read_u16(&index_bytes[2..=3]) | 297 BigEndian::read_u16(&index_bytes[2..=3]) |
310 } | 298 } |
311 | 299 |
330 mod tests { | 318 mod tests { |
331 use super::*; | 319 use super::*; |
332 | 320 |
333 use super::super::index::IndexEntryBuilder; | 321 use super::super::index::IndexEntryBuilder; |
334 | 322 |
335 #[cfg(test)] | |
336 pub struct RevlogBuilder { | |
337 version: u16, | |
338 is_general_delta: bool, | |
339 is_inline: bool, | |
340 offset: usize, | |
341 index: Vec<Vec<u8>>, | |
342 data: Vec<Vec<u8>>, | |
343 } | |
344 | |
345 #[cfg(test)] | |
346 impl RevlogBuilder { | |
347 pub fn new() -> Self { | |
348 Self { | |
349 version: 2, | |
350 is_inline: false, | |
351 is_general_delta: true, | |
352 offset: 0, | |
353 index: vec![], | |
354 data: vec![], | |
355 } | |
356 } | |
357 | |
358 pub fn with_inline(&mut self, value: bool) -> &mut Self { | |
359 self.is_inline = value; | |
360 self | |
361 } | |
362 | |
363 pub fn with_general_delta(&mut self, value: bool) -> &mut Self { | |
364 self.is_general_delta = value; | |
365 self | |
366 } | |
367 | |
368 pub fn with_version(&mut self, value: u16) -> &mut Self { | |
369 self.version = value; | |
370 self | |
371 } | |
372 | |
373 pub fn push( | |
374 &mut self, | |
375 mut index: IndexEntryBuilder, | |
376 data: Vec<u8>, | |
377 ) -> &mut Self { | |
378 if self.index.is_empty() { | |
379 index.is_first(true); | |
380 index.with_general_delta(self.is_general_delta); | |
381 index.with_inline(self.is_inline); | |
382 index.with_version(self.version); | |
383 } else { | |
384 index.with_offset(self.offset); | |
385 } | |
386 self.index.push(index.build()); | |
387 self.offset += data.len(); | |
388 self.data.push(data); | |
389 self | |
390 } | |
391 | |
392 pub fn build_inline(&self) -> Vec<u8> { | |
393 let mut bytes = | |
394 Vec::with_capacity(self.index.len() + self.data.len()); | |
395 for (index, data) in self.index.iter().zip(self.data.iter()) { | |
396 bytes.extend(index); | |
397 bytes.extend(data); | |
398 } | |
399 bytes | |
400 } | |
401 } | |
402 | |
403 #[test] | |
404 fn is_not_inline_when_no_inline_flag_test() { | |
405 let bytes = RevlogBuilder::new() | |
406 .with_general_delta(false) | |
407 .with_inline(false) | |
408 .push(IndexEntryBuilder::new(), vec![]) | |
409 .build_inline(); | |
410 | |
411 assert_eq!(is_inline(&bytes), false) | |
412 } | |
413 | |
414 #[test] | |
415 fn is_inline_when_inline_flag_test() { | |
416 let bytes = RevlogBuilder::new() | |
417 .with_general_delta(false) | |
418 .with_inline(true) | |
419 .push(IndexEntryBuilder::new(), vec![]) | |
420 .build_inline(); | |
421 | |
422 assert_eq!(is_inline(&bytes), true) | |
423 } | |
424 | |
425 #[test] | |
426 fn is_inline_when_inline_and_generaldelta_flags_test() { | |
427 let bytes = RevlogBuilder::new() | |
428 .with_general_delta(true) | |
429 .with_inline(true) | |
430 .push(IndexEntryBuilder::new(), vec![]) | |
431 .build_inline(); | |
432 | |
433 assert_eq!(is_inline(&bytes), true) | |
434 } | |
435 | |
436 #[test] | 323 #[test] |
437 fn version_test() { | 324 fn version_test() { |
438 let bytes = RevlogBuilder::new() | 325 let bytes = IndexEntryBuilder::new() |
326 .is_first(true) | |
439 .with_version(1) | 327 .with_version(1) |
440 .push(IndexEntryBuilder::new(), vec![]) | 328 .build(); |
441 .build_inline(); | |
442 | 329 |
443 assert_eq!(get_version(&bytes), 1) | 330 assert_eq!(get_version(&bytes), 1) |
444 } | 331 } |
445 } | 332 } |