dirstate-v2: Use 32-bit integers instead of 64-bit for offsets
authorSimon Sapin <simon.sapin@octobus.net>
Mon, 28 Jun 2021 15:41:50 +0200
changeset 47476 f23eafb036af
parent 47475 94e38822d395
child 47477 eb416759af7e
dirstate-v2: Use 32-bit integers instead of 64-bit for offsets This saves 12 bytes per node. (Nodes representing files or directories.) These are offsets to other parts of the file. This would only be a limitation for a `.hg/dirstate` file larger than 4 GiB, which would only happen for a repository with dozens of millions of files and directories. Differential Revision: https://phab.mercurial-scm.org/D10920
rust/hg-core/src/dirstate_tree/on_disk.rs
tests/test-hgignore.t
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Jun 24 21:54:14 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon Jun 28 15:41:50 2021 +0200
@@ -17,7 +17,7 @@
 use crate::DirstateError;
 use crate::DirstateParents;
 use crate::EntryState;
-use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
+use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
 use bytes_cast::BytesCast;
 use std::borrow::Cow;
 use std::convert::TryFrom;
@@ -135,9 +135,8 @@
 
 /// Counted in bytes from the start of the file
 ///
-/// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
-/// we could save space by using `U32Be` instead.
-type Offset = U64Be;
+/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
+type Offset = U32Be;
 
 /// Counted in number of items
 ///
@@ -172,8 +171,8 @@
 
 /// Make sure that size-affecting changes are made knowingly
 fn _static_assert_size_of() {
-    let _ = std::mem::transmute::<Header, [u8; 92]>;
-    let _ = std::mem::transmute::<Node, [u8; 57]>;
+    let _ = std::mem::transmute::<Header, [u8; 88]>;
+    let _ = std::mem::transmute::<Node, [u8; 45]>;
 }
 
 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
@@ -589,8 +588,8 @@
 where
     T: BytesCast,
 {
-    let start = u64::try_from(out.len())
-        // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
+    let start = u32::try_from(out.len())
+        // Could only panic for a dirstate file larger than 4 GiB
         .expect("dirstate-v2 offset overflow")
         .into();
     let len = u32::try_from(slice.len())
--- a/tests/test-hgignore.t	Thu Jun 24 21:54:14 2021 +0200
+++ b/tests/test-hgignore.t	Mon Jun 28 15:41:50 2021 +0200
@@ -406,19 +406,19 @@
 #if dirstate-v2
 
 Check the hash of ignore patterns written in the dirstate at offset
-12 + 20 + 20 + 8 + 4 + 4 + 4 = 72
+12 + 20 + 20 + 4 + 4 + 4 + 4 = 68
 
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
   6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
 
   $ echo rel > .hg/testhgignorerel
   $ hg status > /dev/null
   $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
   sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
-  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode())
+  >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode())
   dea19cc7119213f24b6b582a4bae7b0cb063e34e
 
 #endif