rust-revlog: fix incorrect results with NULL_NODE prefixes stable
authorGeorges Racinet <georges.racinet@octobus.net>
Thu, 30 Mar 2023 11:34:30 +0200
branchstable
changeset 50744 bca4037306da
parent 50743 0159b014f3ab
child 50745 3338c6ffdaa3
rust-revlog: fix incorrect results with NULL_NODE prefixes In case a short hash is a prefix of `NULL_NODE`, the correct revision number lookup is `NULL_REVISION` only if there is no match in the nodemap. Indeed, if there is a single nodemap match, then it is an ambiguity with the always matching `NULL_NODE`. Before this change, using the Mercurial development repository as a testbed (it has public changesets with node ID starting with `0005` and `0009`), this is what `rhg` did (plain `hg` provided for reference) ``` $ rust/target/debug/rhg cat -r 000 README README: no such file in rev 000000000000 $ hg cat -r 000 README abort: ambiguous revision identifier: 000 ``` Here is the expected output for `rhg` on ambiguous prefixes (again, before this change): ``` $ rust/target/debug/rhg cat -r 0001 README abort: ambiguous revision identifier: 0001 ``` The test provided by 8c29af0f6d6e in `test-rhg.t` could become flaky with this change, unless all hashes are fixed. We expect reviewers to be more sure about that than we are.
rust/hg-core/src/revlog/mod.rs
--- a/rust/hg-core/src/revlog/mod.rs	Thu Mar 30 10:29:29 2023 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Thu Mar 30 11:34:30 2023 +0200
@@ -225,16 +225,23 @@
         &self,
         node: NodePrefix,
     ) -> Result<Revision, RevlogError> {
-        if node.is_prefix_of(&NULL_NODE) {
-            return Ok(NULL_REVISION);
-        }
+        let looked_up = if let Some(nodemap) = &self.nodemap {
+            nodemap
+                .find_bin(&self.index, node)?
+                .ok_or(RevlogError::InvalidRevision)
+        } else {
+            self.rev_from_node_no_persistent_nodemap(node)
+        };
 
-        if let Some(nodemap) = &self.nodemap {
-            return nodemap
-                .find_bin(&self.index, node)?
-                .ok_or(RevlogError::InvalidRevision);
-        }
-        self.rev_from_node_no_persistent_nodemap(node)
+        if node.is_prefix_of(&NULL_NODE) {
+            return match looked_up {
+                Ok(_) => Err(RevlogError::AmbiguousPrefix),
+                Err(RevlogError::InvalidRevision) => Ok(NULL_REVISION),
+                res => res,
+            };
+        };
+
+        looked_up
     }
 
     /// Same as `rev_from_node`, without using a persistent nodemap
@@ -677,6 +684,7 @@
         assert_eq!(revlog.len(), 0);
         assert!(revlog.get_entry(0).is_err());
         assert!(!revlog.has_rev(0));
+        assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
     }
 
     #[test]
@@ -748,4 +756,65 @@
         assert!(p2_entry.is_some());
         assert_eq!(p2_entry.unwrap().revision(), 1);
     }
+
+    #[test]
+    fn test_nodemap() {
+        let temp = tempfile::tempdir().unwrap();
+        let vfs = Vfs { base: temp.path() };
+
+        // building a revlog with a forced Node starting with zeros
+        // This is a corruption, but it does not preclude using the nodemap
+        // if we don't try and access the data
+        let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
+            .unwrap();
+        let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
+            .unwrap();
+        let entry0_bytes = IndexEntryBuilder::new()
+            .is_first(true)
+            .with_version(1)
+            .with_inline(true)
+            .with_offset(INDEX_ENTRY_SIZE)
+            .with_node(node0)
+            .build();
+        let entry1_bytes = IndexEntryBuilder::new()
+            .with_offset(INDEX_ENTRY_SIZE)
+            .with_node(node1)
+            .build();
+        let contents = vec![entry0_bytes, entry1_bytes]
+            .into_iter()
+            .flatten()
+            .collect_vec();
+        std::fs::write(temp.path().join("foo.i"), contents).unwrap();
+        let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+
+        // accessing the data shows the corruption
+        revlog.get_entry(0).unwrap().data().unwrap_err();
+
+        assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1);
+        assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
+        assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), 1);
+        assert_eq!(
+            revlog
+                .rev_from_node(NodePrefix::from_hex("000").unwrap())
+                .unwrap(),
+            -1
+        );
+        assert_eq!(
+            revlog
+                .rev_from_node(NodePrefix::from_hex("b00").unwrap())
+                .unwrap(),
+            1
+        );
+        // RevlogError does not implement PartialEq
+        // (ultimately because io::Error does not)
+        match revlog
+            .rev_from_node(NodePrefix::from_hex("00").unwrap())
+            .expect_err("Expected to give AmbiguousPrefix error")
+        {
+            RevlogError::AmbiguousPrefix => (),
+            e => {
+                panic!("Got another error than AmbiguousPrefix: {:?}", e);
+            }
+        };
+    }
 }