Mercurial > hg-stable
view rust/hg-core/src/revlog/changelog.rs @ 51371:508fd40dc86a
branching: merge stable into default
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 12 Feb 2024 16:22:47 +0100 |
parents | 13f58ce70299 d626e5e7bbbe |
children | a96ed440450e |
line wrap: on
line source
use crate::errors::HgError; use crate::revlog::Revision; use crate::revlog::{Node, NodePrefix}; use crate::revlog::{Revlog, RevlogEntry, RevlogError}; use crate::utils::hg_path::HgPath; use crate::vfs::Vfs; use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision}; use itertools::{Either, Itertools}; use std::ascii::escape_default; use std::borrow::Cow; use std::fmt::{Debug, Formatter}; use std::iter; /// A specialized `Revlog` to work with changelog data format. pub struct Changelog { /// The generic `revlog` format. pub(crate) revlog: Revlog, } impl Changelog { /// Open the `changelog` of a repository given by its root. pub fn open( store_vfs: &Vfs, options: RevlogOpenOptions, ) -> Result<Self, HgError> { let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?; Ok(Self { revlog }) } /// Return the `ChangelogRevisionData` for the given node ID. pub fn data_for_node( &self, node: NodePrefix, ) -> Result<ChangelogRevisionData, RevlogError> { let rev = self.revlog.rev_from_node(node)?; self.entry_for_checked_rev(rev)?.data() } /// Return the [`ChangelogEntry`] for the given revision number. pub fn entry_for_rev( &self, rev: UncheckedRevision, ) -> Result<ChangelogEntry, RevlogError> { let revlog_entry = self.revlog.get_entry(rev)?; Ok(ChangelogEntry { revlog_entry }) } /// Same as [`Self::entry_for_rev`] for checked revisions. fn entry_for_checked_rev( &self, rev: Revision, ) -> Result<ChangelogEntry, RevlogError> { let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?; Ok(ChangelogEntry { revlog_entry }) } /// Return the [`ChangelogRevisionData`] for the given revision number. /// /// This is a useful shortcut in case the caller does not need the /// generic revlog information (parents, hashes etc). Otherwise /// consider taking a [`ChangelogEntry`] with /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. pub fn data_for_rev( &self, rev: UncheckedRevision, ) -> Result<ChangelogRevisionData, RevlogError> { self.entry_for_rev(rev)?.data() } pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { self.revlog.node_from_rev(rev) } pub fn rev_from_node( &self, node: NodePrefix, ) -> Result<Revision, RevlogError> { self.revlog.rev_from_node(node) } } impl Graph for Changelog { fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { self.revlog.parents(rev) } } /// A specialized `RevlogEntry` for `changelog` data format /// /// This is a `RevlogEntry` with the added semantics that the associated /// data should meet the requirements for `changelog`, materialized by /// the fact that `data()` constructs a `ChangelogRevisionData`. /// In case that promise would be broken, the `data` method returns an error. #[derive(Clone)] pub struct ChangelogEntry<'changelog> { /// Same data, as a generic `RevlogEntry`. pub(crate) revlog_entry: RevlogEntry<'changelog>, } impl<'changelog> ChangelogEntry<'changelog> { pub fn data<'a>( &'a self, ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> { let bytes = self.revlog_entry.data()?; if bytes.is_empty() { Ok(ChangelogRevisionData::null()) } else { Ok(ChangelogRevisionData::new(bytes).map_err(|err| { RevlogError::Other(HgError::CorruptedRepository(format!( "Invalid changelog data for revision {}: {:?}", self.revlog_entry.revision(), err ))) })?) } } /// Obtain a reference to the underlying `RevlogEntry`. /// /// This allows the caller to access the information that is common /// to all revlog entries: revision number, node id, parent revisions etc. pub fn as_revlog_entry(&self) -> &RevlogEntry { &self.revlog_entry } pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { Ok(self .revlog_entry .p1_entry()? .map(|revlog_entry| Self { revlog_entry })) } pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> { Ok(self .revlog_entry .p2_entry()? .map(|revlog_entry| Self { revlog_entry })) } } /// `Changelog` entry which knows how to interpret the `changelog` data bytes. #[derive(PartialEq)] pub struct ChangelogRevisionData<'changelog> { /// The data bytes of the `changelog` entry. bytes: Cow<'changelog, [u8]>, /// The end offset for the hex manifest (not including the newline) manifest_end: usize, /// The end offset for the user+email (not including the newline) user_end: usize, /// The end offset for the timestamp+timezone+extras (not including the /// newline) timestamp_end: usize, /// The end offset for the file list (not including the newline) files_end: usize, } impl<'changelog> ChangelogRevisionData<'changelog> { fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> { let mut line_iter = bytes.split(|b| b == &b'\n'); let manifest_end = line_iter .next() .expect("Empty iterator from split()?") .len(); let user_slice = line_iter.next().ok_or_else(|| { HgError::corrupted("Changeset data truncated after manifest line") })?; let user_end = manifest_end + 1 + user_slice.len(); let timestamp_slice = line_iter.next().ok_or_else(|| { HgError::corrupted("Changeset data truncated after user line") })?; let timestamp_end = user_end + 1 + timestamp_slice.len(); let mut files_end = timestamp_end + 1; loop { let line = line_iter.next().ok_or_else(|| { HgError::corrupted("Changeset data truncated in files list") })?; if line.is_empty() { if files_end == bytes.len() { // The list of files ended with a single newline (there // should be two) return Err(HgError::corrupted( "Changeset data truncated after files list", )); } files_end -= 1; break; } files_end += line.len() + 1; } Ok(Self { bytes, manifest_end, user_end, timestamp_end, files_end, }) } fn null() -> Self { Self::new(Cow::Borrowed( b"0000000000000000000000000000000000000000\n\n0 0\n\n", )) .unwrap() } /// Return an iterator over the lines of the entry. pub fn lines(&self) -> impl Iterator<Item = &[u8]> { self.bytes.split(|b| b == &b'\n') } /// Return the node id of the `manifest` referenced by this `changelog` /// entry. pub fn manifest_node(&self) -> Result<Node, HgError> { let manifest_node_hex = &self.bytes[..self.manifest_end]; Node::from_hex_for_repo(manifest_node_hex) } /// The full user string (usually a name followed by an email enclosed in /// angle brackets) pub fn user(&self) -> &[u8] { &self.bytes[self.manifest_end + 1..self.user_end] } /// The full timestamp line (timestamp in seconds, offset in seconds, and /// possibly extras) // TODO: We should expose this in a more useful way pub fn timestamp_line(&self) -> &[u8] { &self.bytes[self.user_end + 1..self.timestamp_end] } /// The files changed in this revision. pub fn files(&self) -> impl Iterator<Item = &HgPath> { if self.timestamp_end == self.files_end { Either::Left(iter::empty()) } else { Either::Right( self.bytes[self.timestamp_end + 1..self.files_end] .split(|b| b == &b'\n') .map(HgPath::new), ) } } /// The change description. pub fn description(&self) -> &[u8] { &self.bytes[self.files_end + 2..] } } impl Debug for ChangelogRevisionData<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("ChangelogRevisionData") .field("bytes", &debug_bytes(&self.bytes)) .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) .field( "user", &debug_bytes( &self.bytes[self.manifest_end + 1..self.user_end], ), ) .field( "timestamp", &debug_bytes( &self.bytes[self.user_end + 1..self.timestamp_end], ), ) .field( "files", &debug_bytes( &self.bytes[self.timestamp_end + 1..self.files_end], ), ) .field( "description", &debug_bytes(&self.bytes[self.files_end + 2..]), ) .finish() } } fn debug_bytes(bytes: &[u8]) -> String { String::from_utf8_lossy( &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(), ) .to_string() } #[cfg(test)] mod tests { use super::*; use crate::vfs::Vfs; use crate::NULL_REVISION; use pretty_assertions::assert_eq; #[test] fn test_create_changelogrevisiondata_invalid() { // Completely empty assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); // No newline after manifest assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err()); // No newline after user assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err()); // No newline after timestamp assert!( ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err() ); // Missing newline after files assert!(ChangelogRevisionData::new(Cow::Borrowed( b"abcd\n\n0 0\nfile1\nfile2" )) .is_err(),); // Only one newline after files assert!(ChangelogRevisionData::new(Cow::Borrowed( b"abcd\n\n0 0\nfile1\nfile2\n" )) .is_err(),); } #[test] fn test_create_changelogrevisiondata() { let data = ChangelogRevisionData::new(Cow::Borrowed( b"0123456789abcdef0123456789abcdef01234567 Some One <someone@example.com> 0 0 file1 file2 some commit message", )) .unwrap(); assert_eq!( data.manifest_node().unwrap(), Node::from_hex("0123456789abcdef0123456789abcdef01234567") .unwrap() ); assert_eq!(data.user(), b"Some One <someone@example.com>"); assert_eq!(data.timestamp_line(), b"0 0"); assert_eq!( data.files().collect_vec(), vec![HgPath::new("file1"), HgPath::new("file2")] ); assert_eq!(data.description(), b"some\ncommit\nmessage"); } #[test] fn test_data_from_rev_null() -> Result<(), RevlogError> { // an empty revlog will be enough for this case let temp = tempfile::tempdir().unwrap(); let vfs = Vfs { base: temp.path() }; std::fs::write(temp.path().join("foo.i"), b"").unwrap(); let revlog = Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new()) .unwrap(); let changelog = Changelog { revlog }; assert_eq!( changelog.data_for_rev(NULL_REVISION.into())?, ChangelogRevisionData::null() ); // same with the intermediate entry object assert_eq!( changelog.entry_for_rev(NULL_REVISION.into())?.data()?, ChangelogRevisionData::null() ); Ok(()) } #[test] fn test_empty_files_list() { assert!(ChangelogRevisionData::null() .files() .collect_vec() .is_empty()); } }