comparison rust/hg-core/src/revlog/nodemap.rs @ 44385:a98ba6983a63

rust-nodemap: input/output primitives These allow to initiate a `NodeTree` from an immutable opaque sequence of bytes, which could be passed over from Python (extracted from a `PyBuffer`) or directly mmapped from a file. Conversely, we can consume a `NodeTree`, extracting the bytes that express what has been added to the immutable part, together with the original immutable part. This gives callers the choice to start a new Nodetree. After writing to disk, some would prefer to reread for best guarantees (very cheap if mmapping), some others will find it more convenient to grow the memory that was considered immutable in the `NodeTree` and continue from there. This is enough to build examples running on real data and start gathering performance hints. Differential Revision: https://phab.mercurial-scm.org/D7796
author Georges Racinet <georges.racinet@octobus.net>
date Tue, 18 Feb 2020 19:11:15 +0100
parents d2da8667125b
children 00d251d32007
comparison
equal deleted inserted replaced
44384:6689cebacb32 44385:a98ba6983a63
15 use super::{ 15 use super::{
16 Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex, 16 Node, NodeError, NodePrefix, NodePrefixRef, Revision, RevlogIndex,
17 }; 17 };
18 18
19 use std::fmt; 19 use std::fmt;
20 use std::mem;
20 use std::ops::Deref; 21 use std::ops::Deref;
21 use std::ops::Index; 22 use std::ops::Index;
23 use std::slice;
22 24
23 #[derive(Debug, PartialEq)] 25 #[derive(Debug, PartialEq)]
24 pub enum NodeMapError { 26 pub enum NodeMapError {
25 MultipleResults, 27 MultipleResults,
26 InvalidNodePrefix(NodeError), 28 InvalidNodePrefix(NodeError),
170 /// 172 ///
171 /// Another related difference is that `NULL_REVISION` (-1) is not 173 /// Another related difference is that `NULL_REVISION` (-1) is not
172 /// represented at all, because we want an immutable empty nodetree 174 /// represented at all, because we want an immutable empty nodetree
173 /// to be valid. 175 /// to be valid.
174 176
175 #[derive(Clone, PartialEq)] 177 #[derive(Copy, Clone)]
176 pub struct Block([RawElement; 16]); 178 pub struct Block([u8; BLOCK_SIZE]);
179
180 /// Not derivable for arrays of length >32 until const generics are stable
181 impl PartialEq for Block {
182 fn eq(&self, other: &Self) -> bool {
183 &self.0[..] == &other.0[..]
184 }
185 }
186
187 pub const BLOCK_SIZE: usize = 64;
177 188
178 impl Block { 189 impl Block {
179 fn new() -> Self { 190 fn new() -> Self {
180 Block([-1; 16]) 191 // -1 in 2's complement to create an absent node
192 let byte: u8 = 255;
193 Block([byte; BLOCK_SIZE])
181 } 194 }
182 195
183 fn get(&self, nybble: u8) -> Element { 196 fn get(&self, nybble: u8) -> Element {
184 Element::from(RawElement::from_be(self.0[nybble as usize])) 197 let index = nybble as usize * mem::size_of::<RawElement>();
198 Element::from(RawElement::from_be_bytes([
199 self.0[index],
200 self.0[index + 1],
201 self.0[index + 2],
202 self.0[index + 3],
203 ]))
185 } 204 }
186 205
187 fn set(&mut self, nybble: u8, element: Element) { 206 fn set(&mut self, nybble: u8, element: Element) {
188 self.0[nybble as usize] = RawElement::to_be(element.into()) 207 let values = RawElement::to_be_bytes(element.into());
208 let index = nybble as usize * mem::size_of::<RawElement>();
209 self.0[index] = values[0];
210 self.0[index + 1] = values[1];
211 self.0[index + 2] = values[2];
212 self.0[index + 3] = values[3];
189 } 213 }
190 } 214 }
191 215
192 impl fmt::Debug for Block { 216 impl fmt::Debug for Block {
193 /// sparse representation for testing and debugging purposes 217 /// sparse representation for testing and debugging purposes
228 } 252 }
229 } 253 }
230 } 254 }
231 255
232 /// Return `None` unless the `Node` for `rev` has given prefix in `index`. 256 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
233 fn has_prefix_or_none<'p>( 257 fn has_prefix_or_none(
234 idx: &impl RevlogIndex, 258 idx: &impl RevlogIndex,
235 prefix: NodePrefixRef<'p>, 259 prefix: NodePrefixRef,
236 rev: Revision, 260 rev: Revision,
237 ) -> Result<Option<Revision>, NodeMapError> { 261 ) -> Result<Option<Revision>, NodeMapError> {
238 idx.node(rev) 262 idx.node(rev)
239 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev)) 263 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
240 .map(|node| { 264 .map(|node| {
258 NodeTree { 282 NodeTree {
259 readonly: readonly, 283 readonly: readonly,
260 growable: Vec::new(), 284 growable: Vec::new(),
261 root: root, 285 root: root,
262 } 286 }
287 }
288
289 /// Create from an opaque bunch of bytes
290 ///
291 /// The created `NodeTreeBytes` from `buffer`,
292 /// of which exactly `amount` bytes are used.
293 ///
294 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
295 /// - `offset` allows for the final file format to include fixed data
296 /// (generation number, behavioural flags)
297 /// - `amount` is expressed in bytes, and is not automatically derived from
298 /// `bytes`, so that a caller that manages them atomically can perform
299 /// temporary disk serializations and still rollback easily if needed.
300 /// First use-case for this would be to support Mercurial shell hooks.
301 ///
302 /// panics if `buffer` is smaller than `amount`
303 pub fn load_bytes(
304 bytes: Box<dyn Deref<Target = [u8]> + Send>,
305 amount: usize,
306 ) -> Self {
307 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
308 }
309
310 /// Retrieve added `Block` and the original immutable data
311 pub fn into_readonly_and_added(
312 self,
313 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
314 let mut vec = self.growable;
315 let readonly = self.readonly;
316 if readonly.last() != Some(&self.root) {
317 vec.push(self.root);
318 }
319 (readonly, vec)
320 }
321
322 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
323 /// storage
324 pub fn into_readonly_and_added_bytes(
325 self,
326 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
327 let (readonly, vec) = self.into_readonly_and_added();
328 // Prevent running `v`'s destructor so we are in complete control
329 // of the allocation.
330 let vec = mem::ManuallyDrop::new(vec);
331
332 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
333 // bytes, so this is perfectly safe.
334 let bytes = unsafe {
335 // Assert that `Block` hasn't been changed and has no padding
336 let _: [u8; 4 * BLOCK_SIZE] =
337 std::mem::transmute([Block::new(); 4]);
338
339 // /!\ Any use of `vec` after this is use-after-free.
340 // TODO: use `into_raw_parts` once stabilized
341 Vec::from_raw_parts(
342 vec.as_ptr() as *mut u8,
343 vec.len() * BLOCK_SIZE,
344 vec.capacity() * BLOCK_SIZE,
345 )
346 };
347 (readonly, bytes)
263 } 348 }
264 349
265 /// Total number of blocks 350 /// Total number of blocks
266 fn len(&self) -> usize { 351 fn len(&self) -> usize {
267 self.readonly.len() + self.growable.len() + 1 352 self.readonly.len() + self.growable.len() + 1
408 } 493 }
409 Ok(()) 494 Ok(())
410 } 495 }
411 } 496 }
412 497
498 pub struct NodeTreeBytes {
499 buffer: Box<dyn Deref<Target = [u8]> + Send>,
500 len_in_blocks: usize,
501 }
502
503 impl NodeTreeBytes {
504 fn new(
505 buffer: Box<dyn Deref<Target = [u8]> + Send>,
506 amount: usize,
507 ) -> Self {
508 assert!(buffer.len() >= amount);
509 let len_in_blocks = amount / BLOCK_SIZE;
510 NodeTreeBytes {
511 buffer,
512 len_in_blocks,
513 }
514 }
515 }
516
517 impl Deref for NodeTreeBytes {
518 type Target = [Block];
519
520 fn deref(&self) -> &[Block] {
521 unsafe {
522 slice::from_raw_parts(
523 (&self.buffer).as_ptr() as *const Block,
524 self.len_in_blocks,
525 )
526 }
527 }
528 }
529
413 struct NodeTreeVisitor<'n, 'p> { 530 struct NodeTreeVisitor<'n, 'p> {
414 nt: &'n NodeTree, 531 nt: &'n NodeTree,
415 prefix: NodePrefixRef<'p>, 532 prefix: NodePrefixRef<'p>,
416 visit: usize, 533 visit: usize,
417 nybble_idx: usize, 534 nybble_idx: usize,
537 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}"); 654 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
538 } 655 }
539 656
540 #[test] 657 #[test]
541 fn test_raw_block() { 658 fn test_raw_block() {
542 let mut raw = [-1; 16]; 659 let mut raw = [255u8; 64];
543 raw[0] = 0; 660
544 raw[1] = RawElement::to_be(15); 661 let mut counter = 0;
545 raw[2] = RawElement::to_be(-2); 662 for val in [0, 15, -2, -1, -3].iter() {
546 raw[3] = RawElement::to_be(-1); 663 for byte in RawElement::to_be_bytes(*val).iter() {
547 raw[4] = RawElement::to_be(-3); 664 raw[counter] = *byte;
665 counter += 1;
666 }
667 }
548 let block = Block(raw); 668 let block = Block(raw);
549 assert_eq!(block.get(0), Element::Block(0)); 669 assert_eq!(block.get(0), Element::Block(0));
550 assert_eq!(block.get(1), Element::Block(15)); 670 assert_eq!(block.get(1), Element::Block(15));
551 assert_eq!(block.get(3), Element::None); 671 assert_eq!(block.get(3), Element::None);
552 assert_eq!(block.get(2), Element::Rev(0)); 672 assert_eq!(block.get(2), Element::Rev(0));
785 assert_eq!(idx.find_hex("c1")?, None); 905 assert_eq!(idx.find_hex("c1")?, None);
786 assert_eq!(idx.find_hex("1234")?, Some(0)); 906 assert_eq!(idx.find_hex("1234")?, Some(0));
787 907
788 Ok(()) 908 Ok(())
789 } 909 }
790 } 910
911 #[test]
912 fn test_into_added_empty() {
913 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
914 assert!(sample_nodetree()
915 .into_readonly_and_added_bytes()
916 .1
917 .is_empty());
918 }
919
920 #[test]
921 fn test_into_added_bytes() -> Result<(), NodeMapError> {
922 let mut idx = TestNtIndex::new();
923 idx.insert(0, "1234")?;
924 let mut idx = idx.commit();
925 idx.insert(4, "cafe")?;
926 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
927
928 // only the root block has been changed
929 assert_eq!(bytes.len(), BLOCK_SIZE);
930 // big endian for -2
931 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
932 // big endian for -6
933 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
934 Ok(())
935 }
936 }