comparison rust/hg-core/src/revlog/changelog.rs @ 49096:07ec9f4f24bf

changelog: avoid copying changeset data into `ChangesetRevisionData` Differential Revision: https://phab.mercurial-scm.org/D12548
author Martin von Zweigbergk <martinvonz@google.com>
date Tue, 12 Apr 2022 23:15:25 -0700
parents a5ef50becea8
children e98fd81bb151
comparison
equal deleted inserted replaced
49095:b97835b2e2d4 49096:07ec9f4f24bf
4 use crate::revlog::{Node, NodePrefix}; 4 use crate::revlog::{Node, NodePrefix};
5 use crate::utils::hg_path::HgPath; 5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs; 6 use crate::vfs::Vfs;
7 use itertools::Itertools; 7 use itertools::Itertools;
8 use std::ascii::escape_default; 8 use std::ascii::escape_default;
9 use std::borrow::Cow;
9 use std::fmt::{Debug, Formatter}; 10 use std::fmt::{Debug, Formatter};
10 11
11 /// A specialized `Revlog` to work with `changelog` data format. 12 /// A specialized `Revlog` to work with `changelog` data format.
12 pub struct Changelog { 13 pub struct Changelog {
13 /// The generic `revlog` format. 14 /// The generic `revlog` format.
42 /// Return the `ChangelogEntry` of the given revision number. 43 /// Return the `ChangelogEntry` of the given revision number.
43 pub fn data_for_rev( 44 pub fn data_for_rev(
44 &self, 45 &self,
45 rev: Revision, 46 rev: Revision,
46 ) -> Result<ChangelogRevisionData, RevlogError> { 47 ) -> Result<ChangelogRevisionData, RevlogError> {
47 let bytes = self.revlog.get_rev_data(rev)?.into_owned(); 48 let bytes = self.revlog.get_rev_data(rev)?;
48 if bytes.is_empty() { 49 if bytes.is_empty() {
49 Ok(ChangelogRevisionData::null()) 50 Ok(ChangelogRevisionData::null())
50 } else { 51 } else {
51 Ok(ChangelogRevisionData::new(bytes).map_err(|err| { 52 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
52 RevlogError::Other(HgError::CorruptedRepository(format!( 53 RevlogError::Other(HgError::CorruptedRepository(format!(
69 } 70 }
70 } 71 }
71 72
72 /// `Changelog` entry which knows how to interpret the `changelog` data bytes. 73 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
73 #[derive(PartialEq)] 74 #[derive(PartialEq)]
74 pub struct ChangelogRevisionData { 75 pub struct ChangelogRevisionData<'changelog> {
75 /// The data bytes of the `changelog` entry. 76 /// The data bytes of the `changelog` entry.
76 bytes: Vec<u8>, 77 bytes: Cow<'changelog, [u8]>,
77 /// The end offset for the hex manifest (not including the newline) 78 /// The end offset for the hex manifest (not including the newline)
78 manifest_end: usize, 79 manifest_end: usize,
79 /// The end offset for the user+email (not including the newline) 80 /// The end offset for the user+email (not including the newline)
80 user_end: usize, 81 user_end: usize,
81 /// The end offset for the timestamp+timezone+extras (not including the 82 /// The end offset for the timestamp+timezone+extras (not including the
83 timestamp_end: usize, 84 timestamp_end: usize,
84 /// The end offset for the file list (not including the newline) 85 /// The end offset for the file list (not including the newline)
85 files_end: usize, 86 files_end: usize,
86 } 87 }
87 88
88 impl ChangelogRevisionData { 89 impl<'changelog> ChangelogRevisionData<'changelog> {
89 fn new(bytes: Vec<u8>) -> Result<Self, HgError> { 90 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
90 let mut line_iter = bytes.split(|b| b == &b'\n'); 91 let mut line_iter = bytes.split(|b| b == &b'\n');
91 let manifest_end = line_iter 92 let manifest_end = line_iter
92 .next() 93 .next()
93 .expect("Empty iterator from split()?") 94 .expect("Empty iterator from split()?")
94 .len(); 95 .len();
127 files_end, 128 files_end,
128 }) 129 })
129 } 130 }
130 131
131 fn null() -> Self { 132 fn null() -> Self {
132 Self::new( 133 Self::new(Cow::Borrowed(
133 b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(), 134 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
134 ) 135 ))
135 .unwrap() 136 .unwrap()
136 } 137 }
137 138
138 /// Return an iterator over the lines of the entry. 139 /// Return an iterator over the lines of the entry.
139 pub fn lines(&self) -> impl Iterator<Item = &[u8]> { 140 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
171 pub fn description(&self) -> &[u8] { 172 pub fn description(&self) -> &[u8] {
172 &self.bytes[self.files_end + 2..] 173 &self.bytes[self.files_end + 2..]
173 } 174 }
174 } 175 }
175 176
176 impl Debug for ChangelogRevisionData { 177 impl Debug for ChangelogRevisionData<'_> {
177 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 178 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
178 f.debug_struct("ChangelogRevisionData") 179 f.debug_struct("ChangelogRevisionData")
179 .field("bytes", &debug_bytes(&self.bytes)) 180 .field("bytes", &debug_bytes(&self.bytes))
180 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end])) 181 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
181 .field( 182 .field(
217 use pretty_assertions::assert_eq; 218 use pretty_assertions::assert_eq;
218 219
219 #[test] 220 #[test]
220 fn test_create_changelogrevisiondata_invalid() { 221 fn test_create_changelogrevisiondata_invalid() {
221 // Completely empty 222 // Completely empty
222 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err()); 223 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
223 // No newline after manifest 224 // No newline after manifest
224 assert!(ChangelogRevisionData::new(b"abcd".to_vec()).is_err()); 225 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
225 // No newline after user 226 // No newline after user
226 assert!(ChangelogRevisionData::new(b"abcd\n".to_vec()).is_err()); 227 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
227 // No newline after timestamp 228 // No newline after timestamp
228 assert!(ChangelogRevisionData::new(b"abcd\n\n0 0".to_vec()).is_err()); 229 assert!(
230 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
231 );
229 // Missing newline after files 232 // Missing newline after files
230 assert!(ChangelogRevisionData::new( 233 assert!(ChangelogRevisionData::new(Cow::Borrowed(
231 b"abcd\n\n0 0\nfile1\nfile2".to_vec() 234 b"abcd\n\n0 0\nfile1\nfile2"
232 ) 235 ))
233 .is_err(),); 236 .is_err(),);
234 // Only one newline after files 237 // Only one newline after files
235 assert!(ChangelogRevisionData::new( 238 assert!(ChangelogRevisionData::new(Cow::Borrowed(
236 b"abcd\n\n0 0\nfile1\nfile2\n".to_vec() 239 b"abcd\n\n0 0\nfile1\nfile2\n"
237 ) 240 ))
238 .is_err(),); 241 .is_err(),);
239 } 242 }
240 243
241 #[test] 244 #[test]
242 fn test_create_changelogrevisiondata() { 245 fn test_create_changelogrevisiondata() {
243 let data = ChangelogRevisionData::new( 246 let data = ChangelogRevisionData::new(Cow::Borrowed(
244 b"0123456789abcdef0123456789abcdef01234567 247 b"0123456789abcdef0123456789abcdef01234567
245 Some One <someone@example.com> 248 Some One <someone@example.com>
246 0 0 249 0 0
247 file1 250 file1
248 file2 251 file2
249 252
250 some 253 some
251 commit 254 commit
252 message" 255 message",
253 .to_vec(), 256 ))
254 )
255 .unwrap(); 257 .unwrap();
256 assert_eq!( 258 assert_eq!(
257 data.manifest_node().unwrap(), 259 data.manifest_node().unwrap(),
258 Node::from_hex("0123456789abcdef0123456789abcdef01234567") 260 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
259 .unwrap() 261 .unwrap()