rust/hg-core/src/revlog/changelog.rs
changeset 49064 95da3e99cbd8
parent 49063 cc132255261b
child 49065 5d205e476057
equal deleted inserted replaced
49063:cc132255261b 49064:95da3e99cbd8
     1 use crate::errors::HgError;
     1 use crate::errors::HgError;
     2 use crate::repo::Repo;
     2 use crate::repo::Repo;
     3 use crate::revlog::revlog::{Revlog, RevlogError};
     3 use crate::revlog::revlog::{Revlog, RevlogError};
     4 use crate::revlog::Revision;
     4 use crate::revlog::Revision;
     5 use crate::revlog::{Node, NodePrefix};
     5 use crate::revlog::{Node, NodePrefix};
       
     6 use crate::utils::hg_path::HgPath;
       
     7 use itertools::Itertools;
       
     8 use std::ascii::escape_default;
       
     9 use std::fmt::{Debug, Formatter};
     6 
    10 
     7 /// A specialized `Revlog` to work with `changelog` data format.
    11 /// A specialized `Revlog` to work with `changelog` data format.
     8 pub struct Changelog {
    12 pub struct Changelog {
     9     /// The generic `revlog` format.
    13     /// The generic `revlog` format.
    10     pub(crate) revlog: Revlog,
    14     pub(crate) revlog: Revlog,
    33     ) -> Result<ChangelogRevisionData, RevlogError> {
    37     ) -> Result<ChangelogRevisionData, RevlogError> {
    34         let bytes = self.revlog.get_rev_data(rev)?.into_owned();
    38         let bytes = self.revlog.get_rev_data(rev)?.into_owned();
    35         if bytes.is_empty() {
    39         if bytes.is_empty() {
    36             Ok(ChangelogRevisionData::null())
    40             Ok(ChangelogRevisionData::null())
    37         } else {
    41         } else {
    38             Ok(ChangelogRevisionData::new(bytes))
    42             Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
       
    43                 RevlogError::Other(HgError::CorruptedRepository(format!(
       
    44                     "Invalid changelog data for revision {}: {:?}",
       
    45                     rev, err
       
    46                 )))
       
    47             })?)
    39         }
    48         }
    40     }
    49     }
    41 
    50 
    42     pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
    51     pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
    43         self.revlog.node_from_rev(rev)
    52         self.revlog.node_from_rev(rev)
    44     }
    53     }
    45 }
    54 }
    46 
    55 
    47 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
    56 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
    48 #[derive(Debug)]
    57 #[derive(PartialEq)]
    49 pub struct ChangelogRevisionData {
    58 pub struct ChangelogRevisionData {
    50     /// The data bytes of the `changelog` entry.
    59     /// The data bytes of the `changelog` entry.
    51     bytes: Vec<u8>,
    60     bytes: Vec<u8>,
       
    61     /// The end offset for the hex manifest (not including the newline)
       
    62     manifest_end: usize,
       
    63     /// The end offset for the user+email (not including the newline)
       
    64     user_end: usize,
       
    65     /// The end offset for the timestamp+timezone+extras (not including the
       
    66     /// newline)
       
    67     timestamp_end: usize,
       
    68     /// The end offset for the file list (not including the newline)
       
    69     files_end: usize,
    52 }
    70 }
    53 
    71 
    54 impl ChangelogRevisionData {
    72 impl ChangelogRevisionData {
    55     fn new(bytes: Vec<u8>) -> Self {
    73     fn new(bytes: Vec<u8>) -> Result<Self, HgError> {
    56         Self { bytes }
    74         let mut line_iter = bytes.split(|b| b == &b'\n');
       
    75         let manifest_end = line_iter
       
    76             .next()
       
    77             .expect("Empty iterator from split()?")
       
    78             .len();
       
    79         let user_slice = line_iter.next().ok_or_else(|| {
       
    80             HgError::corrupted("Changeset data truncated after manifest line")
       
    81         })?;
       
    82         let user_end = manifest_end + 1 + user_slice.len();
       
    83         let timestamp_slice = line_iter.next().ok_or_else(|| {
       
    84             HgError::corrupted("Changeset data truncated after user line")
       
    85         })?;
       
    86         let timestamp_end = user_end + 1 + timestamp_slice.len();
       
    87         let mut files_end = timestamp_end + 1;
       
    88         loop {
       
    89             let line = line_iter.next().ok_or_else(|| {
       
    90                 HgError::corrupted("Changeset data truncated in files list")
       
    91             })?;
       
    92             if line.is_empty() {
       
    93                 if files_end == bytes.len() {
       
    94                     // The list of files ended with a single newline (there
       
    95                     // should be two)
       
    96                     return Err(HgError::corrupted(
       
    97                         "Changeset data truncated after files list",
       
    98                     ));
       
    99                 }
       
   100                 files_end -= 1;
       
   101                 break;
       
   102             }
       
   103             files_end += line.len() + 1;
       
   104         }
       
   105 
       
   106         Ok(Self {
       
   107             bytes,
       
   108             manifest_end,
       
   109             user_end,
       
   110             timestamp_end,
       
   111             files_end,
       
   112         })
    57     }
   113     }
    58 
   114 
    59     fn null() -> Self {
   115     fn null() -> Self {
    60         Self::new(
   116         Self::new(
    61             b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
   117             b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
    62         )
   118         )
       
   119         .unwrap()
    63     }
   120     }
    64 
   121 
    65     /// Return an iterator over the lines of the entry.
   122     /// Return an iterator over the lines of the entry.
    66     pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
   123     pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
    67         self.bytes.split(|b| b == &b'\n')
   124         self.bytes.split(|b| b == &b'\n')
    68     }
   125     }
    69 
   126 
    70     /// Return the node id of the `manifest` referenced by this `changelog`
   127     /// Return the node id of the `manifest` referenced by this `changelog`
    71     /// entry.
   128     /// entry.
    72     pub fn manifest_node(&self) -> Result<Node, HgError> {
   129     pub fn manifest_node(&self) -> Result<Node, HgError> {
    73         let manifest_node_hex =
   130         let manifest_node_hex = &self.bytes[..self.manifest_end];
    74             self.lines().next().expect("Empty iterator from split()?");
       
    75         Node::from_hex_for_repo(manifest_node_hex)
   131         Node::from_hex_for_repo(manifest_node_hex)
    76     }
   132     }
    77 }
   133 
       
   134     /// The full user string (usually a name followed by an email enclosed in
       
   135     /// angle brackets)
       
   136     pub fn user(&self) -> &[u8] {
       
   137         &self.bytes[self.manifest_end + 1..self.user_end]
       
   138     }
       
   139 
       
   140     /// The full timestamp line (timestamp in seconds, offset in seconds, and
       
   141     /// possibly extras)
       
   142     // TODO: We should expose this in a more useful way
       
   143     pub fn timestamp_line(&self) -> &[u8] {
       
   144         &self.bytes[self.user_end + 1..self.timestamp_end]
       
   145     }
       
   146 
       
   147     /// The files changed in this revision.
       
   148     pub fn files(&self) -> impl Iterator<Item = &HgPath> {
       
   149         self.bytes[self.timestamp_end + 1..self.files_end]
       
   150             .split(|b| b == &b'\n')
       
   151             .map(|path| HgPath::new(path))
       
   152     }
       
   153 
       
   154     /// The change description.
       
   155     pub fn description(&self) -> &[u8] {
       
   156         &self.bytes[self.files_end + 2..]
       
   157     }
       
   158 }
       
   159 
       
   160 impl Debug for ChangelogRevisionData {
       
   161     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
       
   162         f.debug_struct("ChangelogRevisionData")
       
   163             .field("bytes", &debug_bytes(&self.bytes))
       
   164             .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
       
   165             .field(
       
   166                 "user",
       
   167                 &debug_bytes(
       
   168                     &self.bytes[self.manifest_end + 1..self.user_end],
       
   169                 ),
       
   170             )
       
   171             .field(
       
   172                 "timestamp",
       
   173                 &debug_bytes(
       
   174                     &self.bytes[self.user_end + 1..self.timestamp_end],
       
   175                 ),
       
   176             )
       
   177             .field(
       
   178                 "files",
       
   179                 &debug_bytes(
       
   180                     &self.bytes[self.timestamp_end + 1..self.files_end],
       
   181                 ),
       
   182             )
       
   183             .field(
       
   184                 "description",
       
   185                 &debug_bytes(&self.bytes[self.files_end + 2..]),
       
   186             )
       
   187             .finish()
       
   188     }
       
   189 }
       
   190 
       
   191 fn debug_bytes(bytes: &[u8]) -> String {
       
   192     String::from_utf8_lossy(
       
   193         &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
       
   194     )
       
   195     .to_string()
       
   196 }
       
   197 
       
   198 #[cfg(test)]
       
   199 mod tests {
       
   200     use super::*;
       
   201     use itertools::Itertools;
       
   202     use pretty_assertions::assert_eq;
       
   203 
       
   204     #[test]
       
   205     fn test_create_changelogrevisiondata_invalid() {
       
   206         // Completely empty
       
   207         assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
       
   208         // No newline after manifest
       
   209         assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err());
       
   210         // No newline after user
       
   211         assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err());
       
   212         // No newline after timestamp
       
   213         assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err());
       
   214         // Missing newline after files
       
   215         assert!(ChangelogRevisionData::new(
       
   216             b"abcd\n\n0 0\nfile1\nfile2".to_vec()
       
   217         )
       
   218         .is_err(),);
       
   219         // Only one newline after files
       
   220         assert!(ChangelogRevisionData::new(
       
   221             b"abcd\n\n0 0\nfile1\nfile2\n".to_vec()
       
   222         )
       
   223         .is_err(),);
       
   224     }
       
   225 
       
   226     #[test]
       
   227     fn test_create_changelogrevisiondata() {
       
   228         let data = ChangelogRevisionData::new(
       
   229             b"0123456789abcdef0123456789abcdef01234567
       
   230 Some One <someone@example.com>
       
   231 0 0
       
   232 file1
       
   233 file2
       
   234 
       
   235 some
       
   236 commit
       
   237 message"
       
   238                 .to_vec(),
       
   239         )
       
   240         .unwrap();
       
   241         assert_eq!(
       
   242             data.manifest_node().unwrap(),
       
   243             Node::from_hex("0123456789abcdef0123456789abcdef01234567")
       
   244                 .unwrap()
       
   245         );
       
   246         assert_eq!(data.user(), b"Some One <someone@example.com>");
       
   247         assert_eq!(data.timestamp_line(), b"0 0");
       
   248         assert_eq!(
       
   249             data.files().collect_vec(),
       
   250             vec![HgPath::new("file1"), HgPath::new("file2")]
       
   251         );
       
   252         assert_eq!(data.description(), b"some\ncommit\nmessage");
       
   253     }
       
   254 }