# HG changeset patch # User Georges Racinet # Date 1582049477 -3600 # Node ID 5ac1eecc9c6479e2dd7306e1faf091bad134466b # Parent 00d251d32007ef6b456ebf02c839d9599b6cd26d rust-nodemap: core implementation for shortest In this implementation, we just make `lookup()` return also the number of steps that have been needed to come to a conclusion from the nodetree data, and `validate_candidate()` takes care of the special cases related to `NULL_NODE`. This way of doing minimizes code duplication, but it means that the comparatively slower finding of first non zero nybble will run for all calls to `find()` where it is not needed. Still running on the file generated for the mozilla-central repository, it seems indeed that we now get more ofter 320 ns than 310. The odds that this could have a significant impact on real life Mercurial performance are still looking low. Let's wait for actual benchmark runs to see if an optimization is needed here. Differential Revision: https://phab.mercurial-scm.org/D7819 diff -r 00d251d32007 -r 5ac1eecc9c64 rust/hg-core/src/revlog/node.rs --- a/rust/hg-core/src/revlog/node.rs Tue Feb 18 19:11:16 2020 +0100 +++ b/rust/hg-core/src/revlog/node.rs Tue Feb 18 19:11:17 2020 +0100 @@ -226,6 +226,36 @@ assert!(i < self.len()); get_nybble(self.buf, i) } + + /// Return the index first nybble that's different from `node` + /// + /// If the return value is `None` that means that `self` is + /// a prefix of `node`, but the current method is a bit slower + /// than `is_prefix_of`. + /// + /// Returned index is as in `get_nybble`, i.e., starting at 0. + pub fn first_different_nybble(&self, node: &Node) -> Option { + let buf = self.buf; + let until = if self.is_odd { + buf.len() - 1 + } else { + buf.len() + }; + for i in 0..until { + if buf[i] != node.data[i] { + if buf[i] & 0xf0 == node.data[i] & 0xf0 { + return Some(2 * i + 1); + } else { + return Some(2 * i); + } + } + } + if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 { + Some(until * 2) + } else { + None + } + } } /// A shortcut for full `Node` references @@ -362,6 +392,36 @@ assert_eq!(prefix.borrow().get_nybble(7), 9); Ok(()) } + + #[test] + fn test_first_different_nybble_even_prefix() { + let prefix = NodePrefix::from_hex("12ca").unwrap(); + let prefref = prefix.borrow(); + let mut node = Node::from([0; NODE_BYTES_LENGTH]); + assert_eq!(prefref.first_different_nybble(&node), Some(0)); + node.data[0] = 0x13; + assert_eq!(prefref.first_different_nybble(&node), Some(1)); + node.data[0] = 0x12; + assert_eq!(prefref.first_different_nybble(&node), Some(2)); + node.data[1] = 0xca; + // now it is a prefix + assert_eq!(prefref.first_different_nybble(&node), None); + } + + #[test] + fn test_first_different_nybble_odd_prefix() { + let prefix = NodePrefix::from_hex("12c").unwrap(); + let prefref = prefix.borrow(); + let mut node = Node::from([0; NODE_BYTES_LENGTH]); + assert_eq!(prefref.first_different_nybble(&node), Some(0)); + node.data[0] = 0x13; + assert_eq!(prefref.first_different_nybble(&node), Some(1)); + node.data[0] = 0x12; + assert_eq!(prefref.first_different_nybble(&node), Some(2)); + node.data[1] = 0xca; + // now it is a prefix + assert_eq!(prefref.first_different_nybble(&node), None); + } } #[cfg(test)] diff -r 00d251d32007 -r 5ac1eecc9c64 rust/hg-core/src/revlog/nodemap.rs --- a/rust/hg-core/src/revlog/nodemap.rs Tue Feb 18 19:11:16 2020 +0100 +++ b/rust/hg-core/src/revlog/nodemap.rs Tue Feb 18 19:11:17 2020 +0100 @@ -17,6 +17,7 @@ RevlogIndex, NULL_REVISION, }; +use std::cmp::max; use std::fmt; use std::mem; use std::ops::Deref; @@ -98,6 +99,42 @@ ) -> Result, NodeMapError> { self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow()) } + + /// Give the size of the shortest node prefix that determines + /// the revision uniquely. + /// + /// From a binary node prefix, if it is matched in the node map, this + /// returns the number of hexadecimal digits that would had sufficed + /// to find the revision uniquely. + /// + /// Returns `None` if no `Revision` could be found for the prefix. + /// + /// If several Revisions match the given prefix, a [`MultipleResults`] + /// error is returned. + fn unique_prefix_len_bin<'a>( + &self, + idx: &impl RevlogIndex, + node_prefix: NodePrefixRef<'a>, + ) -> Result, NodeMapError>; + + /// Same as `unique_prefix_len_bin`, with the hexadecimal representation + /// of the prefix as input. + fn unique_prefix_len_hex( + &self, + idx: &impl RevlogIndex, + prefix: &str, + ) -> Result, NodeMapError> { + self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow()) + } + + /// Same as `unique_prefix_len_bin`, with a full `Node` as input + fn unique_prefix_len_node( + &self, + idx: &impl RevlogIndex, + node: &Node, + ) -> Result, NodeMapError> { + self.unique_prefix_len_bin(idx, node.into()) + } } pub trait MutableNodeMap: NodeMap { @@ -279,20 +316,24 @@ fn validate_candidate( idx: &impl RevlogIndex, prefix: NodePrefixRef, - rev: Option, -) -> Result, NodeMapError> { - if prefix.is_prefix_of(&NULL_NODE) { - // NULL_REVISION always matches a prefix made only of zeros + candidate: (Option, usize), +) -> Result<(Option, usize), NodeMapError> { + let (rev, steps) = candidate; + if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) { + rev.map_or(Ok((None, steps)), |r| { + has_prefix_or_none(idx, prefix, r) + .map(|opt| (opt, max(steps, nz_nybble + 1))) + }) + } else { + // the prefix is only made of zeros; NULL_REVISION always matches it // and any other *valid* result is an ambiguity match rev { - None => Ok(Some(NULL_REVISION)), + None => Ok((Some(NULL_REVISION), steps + 1)), Some(r) => match has_prefix_or_none(idx, prefix, r)? { - None => Ok(Some(NULL_REVISION)), + None => Ok((Some(NULL_REVISION), steps + 1)), _ => Err(NodeMapError::MultipleResults), }, } - } else { - rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r)) } } @@ -387,13 +428,26 @@ } /// Main working method for `NodeTree` searches - fn lookup<'p>( + /// + /// The first returned value is the result of analysing `NodeTree` data + /// *alone*: whereas `None` guarantees that the given prefix is absent + /// from the `NodeTree` data (but still could match `NULL_NODE`), with + /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision` + /// that could match the prefix. Actually, all that can be inferred from + /// the `NodeTree` data is that `rev` is the revision with the longest + /// common node prefix with the given prefix. + /// + /// The second returned value is the size of the smallest subprefix + /// of `prefix` that would give the same result, i.e. not the + /// `MultipleResults` error variant (again, using only the data of the + /// `NodeTree`). + fn lookup( &self, - prefix: NodePrefixRef<'p>, - ) -> Result, NodeMapError> { - for visit_item in self.visit(prefix) { + prefix: NodePrefixRef, + ) -> Result<(Option, usize), NodeMapError> { + for (i, visit_item) in self.visit(prefix).enumerate() { if let Some(opt) = visit_item.final_revision() { - return Ok(opt); + return Ok((opt, i + 1)); } } Err(NodeMapError::MultipleResults) @@ -638,6 +692,16 @@ prefix: NodePrefixRef<'a>, ) -> Result, NodeMapError> { validate_candidate(idx, prefix.clone(), self.lookup(prefix)?) + .map(|(opt, _shortest)| opt) + } + + fn unique_prefix_len_bin<'a>( + &self, + idx: &impl RevlogIndex, + prefix: NodePrefixRef<'a>, + ) -> Result, NodeMapError> { + validate_candidate(idx, prefix.clone(), self.lookup(prefix)?) + .map(|(opt, shortest)| opt.map(|_rev| shortest)) } } @@ -772,6 +836,7 @@ assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9))); assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults)); assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0))); + assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3))); assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION))); } @@ -791,8 +856,10 @@ }; assert_eq!(nt.find_hex(&idx, "10")?, Some(1)); assert_eq!(nt.find_hex(&idx, "c")?, Some(2)); + assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1)); assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults)); assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION)); + assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3)); assert_eq!(nt.find_hex(&idx, "01")?, Some(9)); Ok(()) } @@ -828,6 +895,13 @@ self.nt.find_hex(&self.index, prefix) } + fn unique_prefix_len_hex( + &self, + prefix: &str, + ) -> Result, NodeMapError> { + self.nt.unique_prefix_len_hex(&self.index, prefix) + } + /// Drain `added` and restart a new one fn commit(self) -> Self { let mut as_vec: Vec = @@ -880,6 +954,28 @@ } #[test] + fn test_unique_prefix_len_zero_prefix() { + let mut idx = TestNtIndex::new(); + idx.insert(0, "00000abcd").unwrap(); + + assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults)); + // in the nodetree proper, this will be found at the first nybble + // yet the correct answer for unique_prefix_len is not 1, nor 1+1, + // but the first difference with `NULL_NODE` + assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); + assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); + + // same with odd result + idx.insert(1, "00123").unwrap(); + assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3))); + assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3))); + + // these are unchanged of course + assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6))); + assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6))); + } + + #[test] fn test_insert_extreme_splitting() -> Result<(), NodeMapError> { // check that the splitting loop is long enough let mut nt_idx = TestNtIndex::new();