changeset 51392:6603a1448f18

hg-core: separate timestamp and extra methods
author Arun Kulshreshtha <akulshreshtha@janestreet.com>
date Tue, 20 Feb 2024 10:47:47 -0500
parents a96ed440450e
children 3a7ef1398385
files rust/hg-core/src/revlog/changelog.rs
diffstat 1 files changed, 86 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/changelog.rs	Thu Feb 15 11:39:18 2024 -0500
+++ b/rust/hg-core/src/revlog/changelog.rs	Tue Feb 20 10:47:47 2024 -0500
@@ -233,9 +233,14 @@
         &self.bytes[self.user_end + 1..self.timestamp_end]
     }
 
-    /// Parsed timestamp line, including optional extras.
-    pub fn parsed_timestamp(&self) -> Result<TimestampAndExtra, HgError> {
-        TimestampAndExtra::from_bytes(self.timestamp_line())
+    /// Parsed timestamp.
+    pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
+        parse_timestamp(self.timestamp_line())
+    }
+
+    /// Optional commit extras.
+    pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+        parse_timestamp_line_extra(self.timestamp_line())
     }
 
     /// The files changed in this revision.
@@ -295,83 +300,66 @@
     .to_string()
 }
 
-/// Parsed timestamp line, including the timestamp and optional extras.
-#[derive(Clone, Debug)]
-pub struct TimestampAndExtra {
-    pub timestamp: DateTime<FixedOffset>,
-    pub extra: BTreeMap<String, Vec<u8>>,
-}
+/// Parse the raw bytes of the timestamp line from a changelog entry.
+///
+/// According to the documentation in `hg help dates` and the
+/// implementation in `changelog.py`, the format of the timestamp line
+/// is `time tz extra\n` where:
+///
+/// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
+///   as seconds since the UNIX epoch.
+///
+/// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
+///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
+///   opposite of the sign in ISO 8601 timestamps).
+///
+/// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
+///   and value in each pair separated by an ASCII colon. Keys are limited to
+///   ASCII letters, digits, hyphens, and underscores, whereas values can be
+///   arbitrary bytes.
+fn parse_timestamp(
+    timestamp_line: &[u8],
+) -> Result<DateTime<FixedOffset>, HgError> {
+    let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
 
-impl TimestampAndExtra {
-    /// Parse the raw bytes of the timestamp line from a changelog entry.
-    ///
-    /// According to the documentation in `hg help dates` and the
-    /// implementation in `changelog.py`, the format of the timestamp line
-    /// is `time tz extra\n` where:
-    ///
-    /// - `time` is an ASCII-encoded signed int or float denoting a UTC
-    ///   timestamp as seconds since the UNIX epoch.
-    ///
-    /// - `tz` is the timezone offset as an ASCII-encoded signed integer
-    ///   denoting seconds WEST of UTC (so negative for timezones east of UTC,
-    ///   which is the opposite of the sign in ISO 8601 timestamps).
-    ///
-    /// - `extra` is an optional set of NUL-delimited key-value pairs, with the
-    ///   key and value in each pair separated by an ASCII colon. Keys are
-    ///   limited to ASCII letters, digits, hyphens, and underscores, whereas
-    ///   values can be arbitrary bytes.
-    fn from_bytes(line: &[u8]) -> Result<Self, HgError> {
-        let mut parts = line.splitn(3, |c| *c == b' ');
-
-        let timestamp_bytes = parts
-            .next()
-            .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
-        let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
-            HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
-        })?;
-        let timestamp_utc = timestamp_str
-            .parse()
-            .map_err(|e| {
-                HgError::corrupted(format!("failed to parse timestamp: {e}"))
+    let timestamp_bytes = parts
+        .next()
+        .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
+    let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
+        HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
+    })?;
+    let timestamp_utc = timestamp_str
+        .parse()
+        .map_err(|e| {
+            HgError::corrupted(format!("failed to parse timestamp: {e}"))
+        })
+        .and_then(|secs| {
+            NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
+                HgError::corrupted(format!(
+                    "integer timestamp out of valid range: {secs}"
+                ))
             })
-            .and_then(|secs| {
-                NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
-                    HgError::corrupted(format!(
-                        "integer timestamp out of valid range: {secs}"
-                    ))
-                })
-            })
-            // Attempt to parse the timestamp as a float if we can't parse
-            // it as an int. It doesn't seem like float timestamps are actually
-            // used in practice, but the Python code supports them.
-            .or_else(|_| parse_float_timestamp(timestamp_str))?;
+        })
+        // Attempt to parse the timestamp as a float if we can't parse
+        // it as an int. It doesn't seem like float timestamps are actually
+        // used in practice, but the Python code supports them.
+        .or_else(|_| parse_float_timestamp(timestamp_str))?;
 
-        let timezone_bytes = parts
-            .next()
-            .ok_or_else(|| HgError::corrupted("missing timezone"))?;
-        let timezone_secs: i32 = str::from_utf8(timezone_bytes)
-            .map_err(|e| {
-                HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
-            })?
-            .parse()
-            .map_err(|e| {
-                HgError::corrupted(format!("timezone is not an integer: {e}"))
-            })?;
-        let timezone =
-            FixedOffset::west_opt(timezone_secs).ok_or_else(|| {
-                HgError::corrupted("timezone offset out of bounds")
-            })?;
+    let timezone_bytes = parts
+        .next()
+        .ok_or_else(|| HgError::corrupted("missing timezone"))?;
+    let timezone_secs: i32 = str::from_utf8(timezone_bytes)
+        .map_err(|e| {
+            HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
+        })?
+        .parse()
+        .map_err(|e| {
+            HgError::corrupted(format!("timezone is not an integer: {e}"))
+        })?;
+    let timezone = FixedOffset::west_opt(timezone_secs)
+        .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
 
-        let timestamp =
-            DateTime::from_naive_utc_and_offset(timestamp_utc, timezone);
-        let extra = parts
-            .next()
-            .map(parse_extra)
-            .transpose()?
-            .unwrap_or_default();
-
-        Ok(Self { timestamp, extra })
-    }
+    Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
 }
 
 /// Attempt to parse the given string as floating-point timestamp, and
@@ -413,12 +401,12 @@
     })
 }
 
-/// Parse the "extra" fields from a changeset's timestamp line.
+/// Decode changeset extra fields.
 ///
 /// Extras are null-delimited key-value pairs where the key consists of ASCII
 /// alphanumeric characters plus hyphens and underscores, and the value can
 /// contain arbitrary bytes.
-fn parse_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
     extra
         .split(|c| *c == b'\0')
         .map(|pair| {
@@ -456,6 +444,18 @@
         .collect()
 }
 
+/// Parse the extra fields from a changeset's timestamp line.
+fn parse_timestamp_line_extra(
+    timestamp_line: &[u8],
+) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
+    Ok(timestamp_line
+        .splitn(3, |c| *c == b' ')
+        .nth(2)
+        .map(decode_extra)
+        .transpose()?
+        .unwrap_or_default())
+}
+
 /// Decode Mercurial's escaping for changelog extras.
 ///
 /// The `_string_escape` function in `changelog.py` only escapes 4 characters
@@ -681,7 +681,7 @@
     }
 
     #[test]
-    fn test_parse_extra() {
+    fn test_decode_extra() {
         let extra = [
             ("branch".into(), b"default".to_vec()),
             ("key-with-hyphens".into(), b"value1".to_vec()),
@@ -693,9 +693,9 @@
         .collect::<BTreeMap<String, Vec<u8>>>();
 
         let encoded = encode_extra(&extra);
-        let parsed = parse_extra(&encoded).unwrap();
+        let decoded = decode_extra(&encoded).unwrap();
 
-        assert_eq!(extra, parsed);
+        assert_eq!(extra, decoded);
     }
 
     #[test]
@@ -713,7 +713,7 @@
 
         for (extra, msg) in test_cases {
             assert!(
-                parse_extra(&extra).is_err(),
+                decode_extra(&extra).is_err(),
                 "corrupt extra should have failed to parse: {}",
                 msg
             );
@@ -735,12 +735,10 @@
         let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
         line.extend_from_slice(&encode_extra(&extra));
 
-        let parsed = TimestampAndExtra::from_bytes(&line).unwrap();
+        let timestamp = parse_timestamp(&line).unwrap();
+        assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
 
-        assert_eq!(
-            &parsed.timestamp.to_rfc3339(),
-            "2005-05-03T13:16:10-08:00"
-        );
-        assert_eq!(extra, parsed.extra);
+        let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
+        assert_eq!(extra, parsed_extra);
     }
 }