update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
# dirstatedocket.py - docket file for dirstate-v2
#
# Copyright Mercurial Contributors
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import annotations
import struct
from ..revlogutils import docket as docket_mod
from . import v2
V2_FORMAT_MARKER = b"dirstate-v2\n"
# * 12 bytes: format marker
# * 32 bytes: node ID of the working directory's first parent
# * 32 bytes: node ID of the working directory's second parent
# * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
# * 4 bytes: big-endian used size of the data file
# * 1 byte: length of the data file's UUID
# * variable: data file's UUID
#
# Node IDs are null-padded if shorter than 32 bytes.
# A data file shorter than the specified used size is corrupted (truncated)
HEADER = struct.Struct(
">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE)
)
class DirstateDocket:
data_filename_pattern = b'dirstate.%s'
def __init__(self, parents, data_size, tree_metadata, uuid):
self.parents = parents
self.data_size = data_size
self.tree_metadata = tree_metadata
self.uuid = uuid
@classmethod
def with_new_uuid(cls, parents, data_size, tree_metadata):
return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
@classmethod
def parse(cls, data, nodeconstants):
if not data:
parents = (nodeconstants.nullid, nodeconstants.nullid)
return cls(parents, 0, b'', None)
marker, p1, p2, meta, data_size, uuid_size = HEADER.unpack_from(data)
if marker != V2_FORMAT_MARKER:
raise ValueError("expected dirstate-v2 marker")
uuid = data[HEADER.size : HEADER.size + uuid_size]
p1 = p1[: nodeconstants.nodelen]
p2 = p2[: nodeconstants.nodelen]
return cls((p1, p2), data_size, meta, uuid)
def serialize(self):
p1, p2 = self.parents
header = HEADER.pack(
V2_FORMAT_MARKER,
p1,
p2,
self.tree_metadata,
self.data_size,
len(self.uuid),
)
return header + self.uuid
def data_filename(self):
return self.data_filename_pattern % self.uuid