# HG changeset patch # User Simon Sapin # Date 1624887710 -7200 # Node ID f23eafb036af68ce4416bac3c1705f4be78c39e0 # Parent 94e38822d39543ef6bdbfa3644ae3c04dcbda364 dirstate-v2: Use 32-bit integers instead of 64-bit for offsets This saves 12 bytes per node. (Nodes representing files or directories.) These are offsets to other parts of the file. This would only be a limitation for a `.hg/dirstate` file larger than 4 GiB, which would only happen for a repository with dozens of millions of files and directories. Differential Revision: https://phab.mercurial-scm.org/D10920 diff -r 94e38822d395 -r f23eafb036af rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jun 24 21:54:14 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Jun 28 15:41:50 2021 +0200 @@ -17,7 +17,7 @@ use crate::DirstateError; use crate::DirstateParents; use crate::EntryState; -use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be}; +use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; use bytes_cast::BytesCast; use std::borrow::Cow; use std::convert::TryFrom; @@ -135,9 +135,8 @@ /// Counted in bytes from the start of the file /// -/// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB -/// we could save space by using `U32Be` instead. -type Offset = U64Be; +/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. +type Offset = U32Be; /// Counted in number of items /// @@ -172,8 +171,8 @@ /// Make sure that size-affecting changes are made knowingly fn _static_assert_size_of() { - let _ = std::mem::transmute::; - let _ = std::mem::transmute::; + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; } /// Unexpected file format found in `.hg/dirstate` with the "v2" format. @@ -589,8 +588,8 @@ where T: BytesCast, { - let start = u64::try_from(out.len()) - // Could only panic on a 128-bit CPU with a dirstate over 16 EiB + let start = u32::try_from(out.len()) + // Could only panic for a dirstate file larger than 4 GiB .expect("dirstate-v2 offset overflow") .into(); let len = u32::try_from(slice.len()) diff -r 94e38822d395 -r f23eafb036af tests/test-hgignore.t --- a/tests/test-hgignore.t Thu Jun 24 21:54:14 2021 +0200 +++ b/tests/test-hgignore.t Mon Jun 28 15:41:50 2021 +0200 @@ -406,19 +406,19 @@ #if dirstate-v2 Check the hash of ignore patterns written in the dirstate at offset -12 + 20 + 20 + 8 + 4 + 4 + 4 = 72 +12 + 20 + 20 + 4 + 4 + 4 + 4 = 68 $ hg status > /dev/null $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff - >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode()) + >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode()) 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff $ echo rel > .hg/testhgignorerel $ hg status > /dev/null $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1 sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e - >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[72:][:20]).decode()) + >>> import binascii; print(binascii.hexlify(open(".hg/dirstate", "rb").read()[68:][:20]).decode()) dea19cc7119213f24b6b582a4bae7b0cb063e34e #endif