changeset 47961:4d2a5ca060e3

rust: Add a Filelog struct that wraps Revlog Some filelog-specific logic is moved from code `rhg cat` into this struct where it can better be reused. Additionally, a missing end delimiter for metadata causes an error to be returned instead of being silently ignored. Differential Revision: https://phab.mercurial-scm.org/D11408
author Simon Sapin <simon.sapin@octobus.net>
date Mon, 13 Sep 2021 15:42:39 +0200
parents cfb6e6699b25
children 8c29af0f6d6e
files rust/hg-core/src/operations/cat.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog.rs rust/hg-core/src/revlog/filelog.rs rust/hg-core/src/utils.rs
diffstat 5 files changed, 100 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/operations/cat.rs	Mon Sep 13 13:45:10 2021 +0200
+++ b/rust/hg-core/src/operations/cat.rs	Mon Sep 13 15:42:39 2021 +0200
@@ -5,15 +5,11 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
-use std::path::PathBuf;
-
 use crate::repo::Repo;
-use crate::revlog::path_encode::path_encode;
-use crate::revlog::revlog::Revlog;
 use crate::revlog::revlog::RevlogError;
 use crate::revlog::Node;
-use crate::utils::files::get_path_from_bytes;
-use crate::utils::hg_path::{HgPath, HgPathBuf};
+
+use crate::utils::hg_path::HgPathBuf;
 
 pub struct CatOutput {
     /// Whether any file in the manifest matched the paths given as CLI
@@ -27,8 +23,6 @@
     pub node: Node,
 }
 
-const METADATA_DELIMITER: [u8; 2] = [b'\x01', b'\n'];
-
 /// Output the given revision of files
 ///
 /// * `root`: Repository root
@@ -54,26 +48,10 @@
             if cat_file.as_bytes() == manifest_file.as_bytes() {
                 *is_matched = true;
                 found_any = true;
-                let index_path = store_path(manifest_file, b".i");
-                let data_path = store_path(manifest_file, b".d");
-
-                let file_log =
-                    Revlog::open(repo, &index_path, Some(&data_path))?;
+                let file_log = repo.filelog(manifest_file)?;
                 let file_node = Node::from_hex_for_repo(node_bytes)?;
-                let file_rev = file_log.get_node_rev(file_node.into())?;
-                let data = file_log.get_rev_data(file_rev)?;
-                if data.starts_with(&METADATA_DELIMITER) {
-                    let end_delimiter_position = data
-                        [METADATA_DELIMITER.len()..]
-                        .windows(METADATA_DELIMITER.len())
-                        .position(|bytes| bytes == METADATA_DELIMITER);
-                    if let Some(position) = end_delimiter_position {
-                        let offset = METADATA_DELIMITER.len() * 2;
-                        bytes.extend(data[position + offset..].iter());
-                    }
-                } else {
-                    bytes.extend(data);
-                }
+                let entry = file_log.get_node(file_node)?;
+                bytes.extend(entry.data()?)
             }
         }
     }
@@ -91,9 +69,3 @@
         node,
     })
 }
-
-fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
-    let encoded_bytes =
-        path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
-    get_path_from_bytes(&encoded_bytes).into()
-}
--- a/rust/hg-core/src/repo.rs	Mon Sep 13 13:45:10 2021 +0200
+++ b/rust/hg-core/src/repo.rs	Mon Sep 13 15:42:39 2021 +0200
@@ -7,8 +7,10 @@
 use crate::errors::HgResultExt;
 use crate::manifest::{Manifest, Manifestlog};
 use crate::requirements;
+use crate::revlog::filelog::Filelog;
 use crate::revlog::revlog::RevlogError;
 use crate::utils::files::get_path_from_bytes;
+use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::{is_dir, is_file, Vfs};
 use crate::{exit_codes, Node};
@@ -346,6 +348,10 @@
             Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
         manifest.get_node(manifest_node.into())
     }
+
+    pub fn filelog(&self, path: &HgPath) -> Result<Filelog, RevlogError> {
+        Filelog::open(self, path)
+    }
 }
 
 /// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/revlog.rs	Mon Sep 13 13:45:10 2021 +0200
+++ b/rust/hg-core/src/revlog.rs	Mon Sep 13 15:42:39 2021 +0200
@@ -11,6 +11,7 @@
 pub mod path_encode;
 pub use node::{FromHexError, Node, NodePrefix};
 pub mod changelog;
+pub mod filelog;
 pub mod index;
 pub mod manifest;
 pub mod patch;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/filelog.rs	Mon Sep 13 15:42:39 2021 +0200
@@ -0,0 +1,79 @@
+use crate::errors::HgError;
+use crate::repo::Repo;
+use crate::revlog::path_encode::path_encode;
+use crate::revlog::revlog::{Revlog, RevlogError};
+use crate::revlog::NodePrefix;
+use crate::revlog::Revision;
+use crate::utils::files::get_path_from_bytes;
+use crate::utils::hg_path::HgPath;
+use crate::utils::SliceExt;
+use std::borrow::Cow;
+use std::path::PathBuf;
+
+/// A specialized `Revlog` to work with file data logs.
+pub struct Filelog {
+    /// The generic `revlog` format.
+    revlog: Revlog,
+}
+
+impl Filelog {
+    pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, RevlogError> {
+        let index_path = store_path(file_path, b".i");
+        let data_path = store_path(file_path, b".d");
+        let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
+        Ok(Self { revlog })
+    }
+
+    /// The given node ID is that of the file as found in a manifest, not of a
+    /// changeset.
+    pub fn get_node(
+        &self,
+        file_node: impl Into<NodePrefix>,
+    ) -> Result<FilelogEntry, RevlogError> {
+        let file_rev = self.revlog.get_node_rev(file_node.into())?;
+        self.get_rev(file_rev)
+    }
+
+    /// The given revision is that of the file as found in a manifest, not of a
+    /// changeset.
+    pub fn get_rev(
+        &self,
+        file_rev: Revision,
+    ) -> Result<FilelogEntry, RevlogError> {
+        let data = self.revlog.get_rev_data(file_rev)?;
+        Ok(FilelogEntry(data.into()))
+    }
+}
+
+fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
+    let encoded_bytes =
+        path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
+    get_path_from_bytes(&encoded_bytes).into()
+}
+
+pub struct FilelogEntry<'filelog>(Cow<'filelog, [u8]>);
+
+impl<'filelog> FilelogEntry<'filelog> {
+    /// Split into metadata and data
+    pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
+        const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
+
+        if let Some(rest) = self.0.drop_prefix(DELIMITER) {
+            if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
+                Ok((Some(metadata), data))
+            } else {
+                Err(HgError::corrupted(
+                    "Missing metadata end delimiter in filelog entry",
+                ))
+            }
+        } else {
+            Ok((None, &self.0))
+        }
+    }
+
+    /// Returns the file contents at this revision, stripped of any metadata
+    pub fn data(&self) -> Result<&[u8], HgError> {
+        let (_metadata, data) = self.split()?;
+        Ok(data)
+    }
+}
--- a/rust/hg-core/src/utils.rs	Mon Sep 13 13:45:10 2021 +0200
+++ b/rust/hg-core/src/utils.rs	Mon Sep 13 15:42:39 2021 +0200
@@ -74,6 +74,7 @@
     fn trim(&self) -> &Self;
     fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
     fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
+    fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
 }
 
 impl SliceExt for [u8] {
@@ -134,6 +135,14 @@
         let b = iter.next()?;
         Some((a, b))
     }
+
+    fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
+        if let Some(pos) = find_slice_in_slice(self, separator) {
+            Some((&self[..pos], &self[pos + separator.len()..]))
+        } else {
+            None
+        }
+    }
 }
 
 pub trait Escaped {