mercurial/dirstateutils/docket.py
author Raphaël Gomès <rgomes@octobus.net>
Tue, 01 Oct 2024 13:49:11 +0200
changeset 52060 8b7123c8947b
parent 51863 f4733654f144
permissions -rw-r--r--
update: add a Rust fast-path when updating from null (and clean) This case is easy to detect and we have all we need to generate a valid working copy and dirstate entirely in Rust, which speeds things up considerably: On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s, all while consuming 50% less system time, with all caches hot. Something to note is that further improvements will probably happen with the upcoming `InnerRevlog` series that does smarter mmap hanlding, especially for filelogs. Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled) ``` ### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 5.328762 ~~~~~ rust: 1.308654 (-75.44%, -4.02) ### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 1.693271 ~~~~~ rust: 1.151053 (-32.02%, -0.54) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 38.901613 ~~~~~ rust: 11.637880 (-70.08%, -27.26) ### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 4.793727 ~~~~~ rust: 1.505905 (-68.59%, -3.29) ```

# dirstatedocket.py - docket file for dirstate-v2
#
# Copyright Mercurial Contributors
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import annotations

import struct

from ..revlogutils import docket as docket_mod
from . import v2

V2_FORMAT_MARKER = b"dirstate-v2\n"

# * 12 bytes: format marker
# * 32 bytes: node ID of the working directory's first parent
# * 32 bytes: node ID of the working directory's second parent
# * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
# * 4 bytes: big-endian used size of the data file
# * 1 byte: length of the data file's UUID
# * variable: data file's UUID
#
# Node IDs are null-padded if shorter than 32 bytes.
# A data file shorter than the specified used size is corrupted (truncated)
HEADER = struct.Struct(
    ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE)
)


class DirstateDocket:
    data_filename_pattern = b'dirstate.%s'

    def __init__(self, parents, data_size, tree_metadata, uuid):
        self.parents = parents
        self.data_size = data_size
        self.tree_metadata = tree_metadata
        self.uuid = uuid

    @classmethod
    def with_new_uuid(cls, parents, data_size, tree_metadata):
        return cls(parents, data_size, tree_metadata, docket_mod.make_uid())

    @classmethod
    def parse(cls, data, nodeconstants):
        if not data:
            parents = (nodeconstants.nullid, nodeconstants.nullid)
            return cls(parents, 0, b'', None)
        marker, p1, p2, meta, data_size, uuid_size = HEADER.unpack_from(data)
        if marker != V2_FORMAT_MARKER:
            raise ValueError("expected dirstate-v2 marker")
        uuid = data[HEADER.size : HEADER.size + uuid_size]
        p1 = p1[: nodeconstants.nodelen]
        p2 = p2[: nodeconstants.nodelen]
        return cls((p1, p2), data_size, meta, uuid)

    def serialize(self):
        p1, p2 = self.parents
        header = HEADER.pack(
            V2_FORMAT_MARKER,
            p1,
            p2,
            self.tree_metadata,
            self.data_size,
            len(self.uuid),
        )
        return header + self.uuid

    def data_filename(self):
        return self.data_filename_pattern % self.uuid