dirstate-v2: Enforce data size read from the docket file
The data file may not be shorter than its size given by the docket.
It may be longer, but additional data is ignored.
Differential Revision: https://phab.mercurial-scm.org/D11089
--- a/mercurial/dirstatemap.py Thu Jul 08 12:18:21 2021 +0200
+++ b/mercurial/dirstatemap.py Mon Jul 12 22:46:52 2021 +0200
@@ -637,7 +637,9 @@
data = self._opener.read(self.docket.data_filename())
else:
data = b''
- self._rustmap = rustmod.DirstateMap.new_v2(data)
+ self._rustmap = rustmod.DirstateMap.new_v2(
+ data, self.docket.data_size
+ )
parents = self.docket.parents
else:
self._rustmap, parents = rustmod.DirstateMap.new_v1(
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Jul 08 12:18:21 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Jul 12 22:46:52 2021 +0200
@@ -410,8 +410,15 @@
}
#[timed]
- pub fn new_v2(on_disk: &'on_disk [u8]) -> Result<Self, DirstateError> {
- Ok(on_disk::read(on_disk)?)
+ pub fn new_v2(
+ on_disk: &'on_disk [u8],
+ data_size: usize,
+ ) -> Result<Self, DirstateError> {
+ if let Some(data) = on_disk.get(..data_size) {
+ Ok(on_disk::read(data)?)
+ } else {
+ Err(DirstateV2ParseError.into())
+ }
}
#[timed]
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jul 08 12:18:21 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Jul 12 22:46:52 2021 +0200
@@ -21,7 +21,7 @@
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
use std::borrow::Cow;
-use std::convert::TryFrom;
+use std::convert::{TryFrom, TryInto};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
/// Added at the start of `.hg/dirstate` when the "v2" format is used.
@@ -224,6 +224,11 @@
DirstateParents { p1, p2 }
}
+ pub fn data_size(&self) -> usize {
+ // This `unwrap` could only panic on a 16-bit CPU
+ self.header.data_size.get().try_into().unwrap()
+ }
+
pub fn data_filename(&self) -> String {
String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
}
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Jul 08 12:18:21 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Mon Jul 12 22:46:52 2021 +0200
@@ -83,11 +83,12 @@
@staticmethod
def new_v2(
on_disk: PyBytes,
+ data_size: usize,
) -> PyResult<PyObject> {
let dirstate_error = |e: DirstateError| {
PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
};
- let inner = OwningDirstateMap::new_v2(py, on_disk)
+ let inner = OwningDirstateMap::new_v2(py, on_disk, data_size)
.map_err(dirstate_error)?;
let map = Self::create_instance(py, Box::new(inner))?;
Ok(map.into_object())
--- a/rust/hg-cpython/src/dirstate/owning.rs Thu Jul 08 12:18:21 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/owning.rs Mon Jul 12 22:46:52 2021 +0200
@@ -48,9 +48,10 @@
pub fn new_v2(
py: Python,
on_disk: PyBytes,
+ data_size: usize,
) -> Result<Self, DirstateError> {
let bytes: &'_ [u8] = on_disk.data(py);
- let map = DirstateMap::new_v2(bytes)?;
+ let map = DirstateMap::new_v2(bytes, data_size)?;
// Like in `bytes` above, this `'_` lifetime parameter borrows from
// the bytes buffer owned by `on_disk`.
--- a/rust/rhg/src/commands/status.rs Thu Jul 08 12:18:21 2021 +0200
+++ b/rust/rhg/src/commands/status.rs Mon Jul 12 22:46:52 2021 +0200
@@ -170,11 +170,13 @@
let (mut dmap, parents) = if repo.has_dirstate_v2() {
let parents;
let dirstate_data;
+ let data_size;
if let Some(docket_data) =
repo.hg_vfs().read("dirstate").io_not_found_as_none()?
{
let docket = on_disk::read_docket(&docket_data)?;
parents = Some(docket.parents());
+ data_size = docket.data_size();
dirstate_data_mmap = repo
.hg_vfs()
.mmap_open(docket.data_filename())
@@ -182,9 +184,10 @@
dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
} else {
parents = None;
+ data_size = 0;
dirstate_data = b"";
}
- let dmap = DirstateMap::new_v2(dirstate_data)?;
+ let dmap = DirstateMap::new_v2(dirstate_data, data_size)?;
(dmap, parents)
} else {
dirstate_data_mmap =