comparison rust/hg-core/src/revlog/revlog.rs @ 45601:900b9b79b99c

hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2) Prevent building `Index` every time it is needed. It was a bad idea anyway. When `Index::new` will return `Result` it will avoid things like `Revlog::len` returning `Result<usize>` instead of `usize`. [X] make `Index` owner of its bytes [ ] make `Index::new` return an error if `offset != bytes.len()` Differential Revision: https://phab.mercurial-scm.org/D9106
author Antoine cezar<acezar@chwitlabs.fr>
date Mon, 28 Sep 2020 15:13:51 +0200
parents 497657895b54
children 1cef583541c0
comparison
equal deleted inserted replaced
45600:b68b19104d16 45601:900b9b79b99c
34 /// Read only implementation of revlog. 34 /// Read only implementation of revlog.
35 pub struct Revlog { 35 pub struct Revlog {
36 /// When index and data are not interleaved: bytes of the revlog index. 36 /// When index and data are not interleaved: bytes of the revlog index.
37 /// When index and data are interleaved: bytes of the revlog index and 37 /// When index and data are interleaved: bytes of the revlog index and
38 /// data. 38 /// data.
39 index_bytes: Box<dyn Deref<Target = [u8]> + Send>, 39 index: Index,
40 /// When index and data are not interleaved: bytes of the revlog data 40 /// When index and data are not interleaved: bytes of the revlog data
41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, 41 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
42 } 42 }
43 43
44 impl Revlog { 44 impl Revlog {
54 let version = get_version(&index_mmap); 54 let version = get_version(&index_mmap);
55 if version != 1 { 55 if version != 1 {
56 return Err(RevlogError::UnsuportedVersion(version)); 56 return Err(RevlogError::UnsuportedVersion(version));
57 } 57 }
58 58
59 let is_inline = is_inline(&index_mmap); 59 let index = Index::new(Box::new(index_mmap));
60
61 let index_bytes = Box::new(index_mmap);
62 60
63 // TODO load data only when needed // 61 // TODO load data only when needed //
64 // type annotation required 62 // type annotation required
65 // won't recognize Mmap as Deref<Target = [u8]> 63 // won't recognize Mmap as Deref<Target = [u8]>
66 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = 64 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
67 if is_inline { 65 if index.is_inline() {
68 None 66 None
69 } else { 67 } else {
70 let data_path = index_path.with_extension("d"); 68 let data_path = index_path.with_extension("d");
71 let data_mmap = 69 let data_mmap =
72 mmap_open(&data_path).map_err(RevlogError::IoError)?; 70 mmap_open(&data_path).map_err(RevlogError::IoError)?;
73 Some(Box::new(data_mmap)) 71 Some(Box::new(data_mmap))
74 }; 72 };
75 73
76 Ok(Revlog { 74 Ok(Revlog { index, data_bytes })
77 index_bytes,
78 data_bytes,
79 })
80 } 75 }
81 76
82 /// Return number of entries of the `Revlog`. 77 /// Return number of entries of the `Revlog`.
83 pub fn len(&self) -> usize { 78 pub fn len(&self) -> usize {
84 self.index().len() 79 self.index.len()
85 } 80 }
86 81
87 /// Returns `true` if the `Revlog` has zero `entries`. 82 /// Returns `true` if the `Revlog` has zero `entries`.
88 pub fn is_empty(&self) -> bool { 83 pub fn is_empty(&self) -> bool {
89 self.index().is_empty() 84 self.index.is_empty()
90 } 85 }
91 86
92 /// Return the full data associated to a node. 87 /// Return the full data associated to a node.
93 #[timed] 88 #[timed]
94 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> { 89 pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
95 let index = self.index();
96 // This is brute force. But it is fast enough for now. 90 // This is brute force. But it is fast enough for now.
97 // Optimization will come later. 91 // Optimization will come later.
98 for rev in (0..self.len() as Revision).rev() { 92 for rev in (0..self.len() as Revision).rev() {
99 let index_entry = 93 let index_entry =
100 index.get_entry(rev).ok_or(RevlogError::Corrupted)?; 94 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
101 if node == index_entry.hash() { 95 if node == index_entry.hash() {
102 return Ok(rev); 96 return Ok(rev);
103 } 97 }
104 } 98 }
105 Err(RevlogError::InvalidRevision) 99 Err(RevlogError::InvalidRevision)
121 .get_entry(base_rev) 115 .get_entry(base_rev)
122 .map_err(|_| RevlogError::Corrupted)?; 116 .map_err(|_| RevlogError::Corrupted)?;
123 } 117 }
124 118
125 // TODO do not look twice in the index 119 // TODO do not look twice in the index
126 let index = self.index(); 120 let index_entry = self
127 let index_entry = 121 .index
128 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; 122 .get_entry(rev)
123 .ok_or(RevlogError::InvalidRevision)?;
129 124
130 let data: Vec<u8> = if delta_chain.is_empty() { 125 let data: Vec<u8> = if delta_chain.is_empty() {
131 entry.data()?.into() 126 entry.data()?.into()
132 } else { 127 } else {
133 Revlog::build_data_from_deltas(entry, &delta_chain)? 128 Revlog::build_data_from_deltas(entry, &delta_chain)?
151 p1: Revision, 146 p1: Revision,
152 p2: Revision, 147 p2: Revision,
153 expected: &[u8], 148 expected: &[u8],
154 data: &[u8], 149 data: &[u8],
155 ) -> bool { 150 ) -> bool {
156 let index = self.index(); 151 let e1 = self.index.get_entry(p1);
157 let e1 = index.get_entry(p1);
158 let h1 = match e1 { 152 let h1 = match e1 {
159 Some(ref entry) => entry.hash(), 153 Some(ref entry) => entry.hash(),
160 None => &NULL_NODE_ID, 154 None => &NULL_NODE_ID,
161 }; 155 };
162 let e2 = index.get_entry(p2); 156 let e2 = self.index.get_entry(p2);
163 let h2 = match e2 { 157 let h2 = match e2 {
164 Some(ref entry) => entry.hash(), 158 Some(ref entry) => entry.hash(),
165 None => &NULL_NODE_ID, 159 None => &NULL_NODE_ID,
166 }; 160 };
167 161
185 deltas.iter().map(|d| patch::PatchList::new(d)).collect(); 179 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
186 let patch = patch::fold_patch_lists(&patches); 180 let patch = patch::fold_patch_lists(&patches);
187 Ok(patch.apply(&snapshot)) 181 Ok(patch.apply(&snapshot))
188 } 182 }
189 183
190 /// Return the revlog index.
191 pub fn index(&self) -> Index {
192 let is_inline = self.data_bytes.is_none();
193 Index::new(&self.index_bytes, is_inline)
194 }
195
196 /// Return the revlog data. 184 /// Return the revlog data.
197 fn data(&self) -> &[u8] { 185 fn data(&self) -> &[u8] {
198 match self.data_bytes { 186 match self.data_bytes {
199 Some(ref data_bytes) => &data_bytes, 187 Some(ref data_bytes) => &data_bytes,
200 None => &self.index_bytes, 188 None => panic!(
189 "forgot to load the data or trying to access inline data"
190 ),
201 } 191 }
202 } 192 }
203 193
204 /// Get an entry of the revlog. 194 /// Get an entry of the revlog.
205 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { 195 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
206 let index = self.index(); 196 let index_entry = self
207 let index_entry = 197 .index
208 index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; 198 .get_entry(rev)
199 .ok_or(RevlogError::InvalidRevision)?;
209 let start = index_entry.offset(); 200 let start = index_entry.offset();
210 let end = start + index_entry.compressed_len(); 201 let end = start + index_entry.compressed_len();
202 let data = if self.index.is_inline() {
203 self.index.data(start, end)
204 } else {
205 &self.data()[start..end]
206 };
211 let entry = RevlogEntry { 207 let entry = RevlogEntry {
212 rev, 208 rev,
213 bytes: &self.data()[start..end], 209 bytes: data,
214 compressed_len: index_entry.compressed_len(), 210 compressed_len: index_entry.compressed_len(),
215 uncompressed_len: index_entry.uncompressed_len(), 211 uncompressed_len: index_entry.uncompressed_len(),
216 base_rev: if index_entry.base_revision() == rev { 212 base_rev: if index_entry.base_revision() == rev {
217 None 213 None
218 } else { 214 } else {
294 fn is_delta(&self) -> bool { 290 fn is_delta(&self) -> bool {
295 self.base_rev.is_some() 291 self.base_rev.is_some()
296 } 292 }
297 } 293 }
298 294
299 /// Value of the inline flag.
300 pub fn is_inline(index_bytes: &[u8]) -> bool {
301 match &index_bytes[0..=1] {
302 [0, 0] | [0, 2] => false,
303 _ => true,
304 }
305 }
306
307 /// Format version of the revlog. 295 /// Format version of the revlog.
308 pub fn get_version(index_bytes: &[u8]) -> u16 { 296 pub fn get_version(index_bytes: &[u8]) -> u16 {
309 BigEndian::read_u16(&index_bytes[2..=3]) 297 BigEndian::read_u16(&index_bytes[2..=3])
310 } 298 }
311 299
330 mod tests { 318 mod tests {
331 use super::*; 319 use super::*;
332 320
333 use super::super::index::IndexEntryBuilder; 321 use super::super::index::IndexEntryBuilder;
334 322
335 #[cfg(test)]
336 pub struct RevlogBuilder {
337 version: u16,
338 is_general_delta: bool,
339 is_inline: bool,
340 offset: usize,
341 index: Vec<Vec<u8>>,
342 data: Vec<Vec<u8>>,
343 }
344
345 #[cfg(test)]
346 impl RevlogBuilder {
347 pub fn new() -> Self {
348 Self {
349 version: 2,
350 is_inline: false,
351 is_general_delta: true,
352 offset: 0,
353 index: vec![],
354 data: vec![],
355 }
356 }
357
358 pub fn with_inline(&mut self, value: bool) -> &mut Self {
359 self.is_inline = value;
360 self
361 }
362
363 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
364 self.is_general_delta = value;
365 self
366 }
367
368 pub fn with_version(&mut self, value: u16) -> &mut Self {
369 self.version = value;
370 self
371 }
372
373 pub fn push(
374 &mut self,
375 mut index: IndexEntryBuilder,
376 data: Vec<u8>,
377 ) -> &mut Self {
378 if self.index.is_empty() {
379 index.is_first(true);
380 index.with_general_delta(self.is_general_delta);
381 index.with_inline(self.is_inline);
382 index.with_version(self.version);
383 } else {
384 index.with_offset(self.offset);
385 }
386 self.index.push(index.build());
387 self.offset += data.len();
388 self.data.push(data);
389 self
390 }
391
392 pub fn build_inline(&self) -> Vec<u8> {
393 let mut bytes =
394 Vec::with_capacity(self.index.len() + self.data.len());
395 for (index, data) in self.index.iter().zip(self.data.iter()) {
396 bytes.extend(index);
397 bytes.extend(data);
398 }
399 bytes
400 }
401 }
402
403 #[test]
404 fn is_not_inline_when_no_inline_flag_test() {
405 let bytes = RevlogBuilder::new()
406 .with_general_delta(false)
407 .with_inline(false)
408 .push(IndexEntryBuilder::new(), vec![])
409 .build_inline();
410
411 assert_eq!(is_inline(&bytes), false)
412 }
413
414 #[test]
415 fn is_inline_when_inline_flag_test() {
416 let bytes = RevlogBuilder::new()
417 .with_general_delta(false)
418 .with_inline(true)
419 .push(IndexEntryBuilder::new(), vec![])
420 .build_inline();
421
422 assert_eq!(is_inline(&bytes), true)
423 }
424
425 #[test]
426 fn is_inline_when_inline_and_generaldelta_flags_test() {
427 let bytes = RevlogBuilder::new()
428 .with_general_delta(true)
429 .with_inline(true)
430 .push(IndexEntryBuilder::new(), vec![])
431 .build_inline();
432
433 assert_eq!(is_inline(&bytes), true)
434 }
435
436 #[test] 323 #[test]
437 fn version_test() { 324 fn version_test() {
438 let bytes = RevlogBuilder::new() 325 let bytes = IndexEntryBuilder::new()
326 .is_first(true)
439 .with_version(1) 327 .with_version(1)
440 .push(IndexEntryBuilder::new(), vec![]) 328 .build();
441 .build_inline();
442 329
443 assert_eq!(get_version(&bytes), 1) 330 assert_eq!(get_version(&bytes), 1)
444 } 331 }
445 } 332 }