dirstate-v2: Use 32-bit integers instead of 64-bit for offsets
This saves 12 bytes per node. (Nodes representing files or directories.)
These are offsets to other parts of the file. This would only be a limitation
for a `.hg/dirstate` file larger than 4 GiB, which would only happen for a
repository with dozens of millions of files and directories.
Differential Revision: https://phab.mercurial-scm.org/D10920
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jun 24 21:54:14 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Jun 28 15:41:50 2021 +0200
@@ -17,7 +17,7 @@
use crate::DirstateError;
use crate::DirstateParents;
use crate::EntryState;
-use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
+use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
use bytes_cast::BytesCast;
use std::borrow::Cow;
use std::convert::TryFrom;
@@ -135,9 +135,8 @@
/// Counted in bytes from the start of the file
///
-/// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
-/// we could save space by using `U32Be` instead.
-type Offset = U64Be;
+/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
+type Offset = U32Be;
/// Counted in number of items
///
@@ -172,8 +171,8 @@
/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
- let _ = std::mem::transmute::<Header, [u8; 92]>;
- let _ = std::mem::transmute::<Node, [u8; 57]>;
+ let _ = std::mem::transmute::<Header, [u8; 88]>;
+ let _ = std::mem::transmute::<Node, [u8; 45]>;
}
/// Unexpected file format found in `.hg/dirstate` with the "v2" format.
@@ -589,8 +588,8 @@
where
T: BytesCast,
{
- let start = u64::try_from(out.len())
- // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
+ let start = u32::try_from(out.len())
+ // Could only panic for a dirstate file larger than 4 GiB
.expect("dirstate-v2 offset overflow")
.into();
let len = u32::try_from(slice.len())
--- a/tests/test-hgignore.t Thu Jun 24 21:54:14 2021 +0200
+++ b/tests/test-hgignore.t Mon Jun 28 15:41:50 2021 +0200
@@ -406,19 +406,19 @@
#if dirstate-v2
Check the hash of ignore patterns written in the dirstate at offset
-12 + 20 + 20 + 8 + 4 + 4 + 4 = 72
+12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
$ hg status > /dev/null
$ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
- >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+ >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
$ echo rel > .hg/testhgignorerel
$ hg status > /dev/null
$ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
- >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+ >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
dea19cc7119213f24b6b582a4bae7b0cb063e34e
#endif