view mercurial/revlogutils/constants.py @ 52292:085cc409847d

sslutil: bump the default minimum TLS version of the client to 1.2 (BC) TLS v1.0 and v1.1 are deprecated by RFC8996[1]: These versions lack support for current and recommended cryptographic algorithms and mechanisms, and various government and industry profiles of applications using TLS now mandate avoiding these old TLS versions. TLS version 1.2 became the recommended version for IETF protocols in 2008 (subsequently being obsoleted by TLS version 1.3 in 2018)... Various browsers have disabled or removed it[2][3][4], as have various internet services, and Windows 11 has it disabled by default[5]. We should move on too. (We should also bump it on the server side, as this config only affects clients not allowing a server to negotiate down. But the only server-side config is a `devel` option to pick exactly one protocol version and is commented as a footgun, so I'm hesitant to touch that. See 7dec5e441bf7 for details, which states that using `hg serve` directly isn't expected for a web service.) I'm not knowledgeable enough in this area to know if we should follow up with disabling certain ciphers too. But this should provide better security on its own. [1] https://datatracker.ietf.org/doc/rfc8996/ [2] https://learn.microsoft.com/en-us/DeployEdge/microsoft-edge-policies#sslversionmin [3] https://hacks.mozilla.org/2020/02/its-the-boot-for-tls-1-0-and-tls-1-1/ [4] https://security.googleblog.com/2018/10/modernizing-transport-security.html [5] https://techcommunity.microsoft.com/blog/windows-itpro-blog/tls-1-0-and-tls-1-1-soon-to-be-disabled-in-windows/3887947
author Matt Harbison <matt_harbison@yahoo.com>
date Mon, 11 Nov 2024 21:25:03 -0500
parents f4733654f144
children
line wrap: on
line source

# revlogdeltas.py - constant used for revlog logic.
#
# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
# Copyright 2018 Octobus <contact@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""Helper class to compute deltas stored inside revlogs"""

from __future__ import annotations

import struct

from ..interfaces import repository
from .. import revlogutils

### Internal utily constants

KIND_CHANGELOG = 1001  # over 256 to not be comparable with a bytes
KIND_MANIFESTLOG = 1002
KIND_FILELOG = 1003
KIND_OTHER = 1004

ALL_KINDS = {
    KIND_CHANGELOG,
    KIND_MANIFESTLOG,
    KIND_FILELOG,
    KIND_OTHER,
}

### Index entry key
#
#
#    Internal details
#    ----------------
#
#    A large part of the revlog logic deals with revisions' "index entries", tuple
#    objects that contains the same "items" whatever the revlog version.
#    Different versions will have different ways of storing these items (sometimes
#    not having them at all), but the tuple will always be the same. New fields
#    are usually added at the end to avoid breaking existing code that relies
#    on the existing order. The field are defined as follows:

#    [0] offset:
#            The byte index of the start of revision data chunk.
#            That value is shifted up by 16 bits. use "offset = field >> 16" to
#            retrieve it.
#
#        flags:
#            A flag field that carries special information or changes the behavior
#            of the revision. (see `REVIDX_*` constants for details)
#            The flag field only occupies the first 16 bits of this field,
#            use "flags = field & 0xFFFF" to retrieve the value.
ENTRY_DATA_OFFSET = 0

#    [1] compressed length:
#            The size, in bytes, of the chunk on disk
ENTRY_DATA_COMPRESSED_LENGTH = 1

#    [2] uncompressed length:
#            The size, in bytes, of the full revision once reconstructed.
ENTRY_DATA_UNCOMPRESSED_LENGTH = 2

#    [3] base rev:
#            Either the base of the revision delta chain (without general
#            delta), or the base of the delta (stored in the data chunk)
#            with general delta.
ENTRY_DELTA_BASE = 3

#    [4] link rev:
#            Changelog revision number of the changeset introducing this
#            revision.
ENTRY_LINK_REV = 4

#    [5] parent 1 rev:
#            Revision number of the first parent
ENTRY_PARENT_1 = 5

#    [6] parent 2 rev:
#            Revision number of the second parent
ENTRY_PARENT_2 = 6

#    [7] node id:
#            The node id of the current revision
ENTRY_NODE_ID = 7

#    [8] sidedata offset:
#            The byte index of the start of the revision's side-data chunk.
ENTRY_SIDEDATA_OFFSET = 8

#    [9] sidedata chunk length:
#            The size, in bytes, of the revision's side-data chunk.
ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9

#    [10] data compression mode:
#            two bits that detail the way the data chunk is compressed on disk.
#            (see "COMP_MODE_*" constants for details). For revlog version 0 and
#            1 this will always be COMP_MODE_INLINE.
ENTRY_DATA_COMPRESSION_MODE = 10

#    [11] side-data compression mode:
#            two bits that detail the way the sidedata chunk is compressed on disk.
#            (see "COMP_MODE_*" constants for details)
ENTRY_SIDEDATA_COMPRESSION_MODE = 11

#    [12] Revision rank:
#            The number of revision under this one.
#
#            Formally this is defined as : rank(X) = len(ancestors(X) + X)
#
#            If rank == -1; then we do not have this information available.
#            Only `null` has a rank of 0.
ENTRY_RANK = 12

RANK_UNKNOWN = -1

### main revlog header

# We cannot rely on  Struct.format is inconsistent for python <=3.6 versus above
INDEX_HEADER_FMT = b">I"
INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)

## revlog version
REVLOGV0 = 0
REVLOGV1 = 1
# Dummy value until file format is finalized.
REVLOGV2 = 0xDEAD
# Dummy value until file format is finalized.
CHANGELOGV2 = 0xD34D

##  global revlog header flags
# Shared across v1 and v2.
FLAG_INLINE_DATA = 1 << 16
# Only used by v1, implied by v2.
FLAG_GENERALDELTA = 1 << 17
REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
REVLOG_DEFAULT_FORMAT = REVLOGV1
REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
REVLOGV0_FLAGS = 0
REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
REVLOGV2_FLAGS = FLAG_INLINE_DATA
CHANGELOGV2_FLAGS = 0

### individual entry

## index v0:
#  4 bytes: offset
#  4 bytes: compressed length
#  4 bytes: base rev
#  4 bytes: link rev
# 20 bytes: parent 1 nodeid
# 20 bytes: parent 2 nodeid
# 20 bytes: nodeid
INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")

## index v1
#  6 bytes: offset
#  2 bytes: flags
#  4 bytes: compressed length
#  4 bytes: uncompressed length
#  4 bytes: base rev
#  4 bytes: link rev
#  4 bytes: parent 1 rev
#  4 bytes: parent 2 rev
# 32 bytes: nodeid
INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
assert INDEX_ENTRY_V1.size == 32 * 2

#  6 bytes: offset
#  2 bytes: flags
#  4 bytes: compressed length
#  4 bytes: uncompressed length
#  4 bytes: base rev
#  4 bytes: link rev
#  4 bytes: parent 1 rev
#  4 bytes: parent 2 rev
# 32 bytes: nodeid
#  8 bytes: sidedata offset
#  4 bytes: sidedata compressed length
#  1 bytes: compression mode (2 lower bit are data_compression_mode)
#  19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size

#  6 bytes: offset
#  2 bytes: flags
#  4 bytes: compressed length
#  4 bytes: uncompressed length
#  4 bytes: parent 1 rev
#  4 bytes: parent 2 rev
# 32 bytes: nodeid
#  8 bytes: sidedata offset
#  4 bytes: sidedata compressed length
#  1 bytes: compression mode (2 lower bit are data_compression_mode)
#  4 bytes: changeset rank (i.e. `len(::REV)`)
#  23 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiBi23x")
assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_CL_V2.size
INDEX_ENTRY_V2_IDX_OFFSET = 0
INDEX_ENTRY_V2_IDX_COMPRESSED_LENGTH = 1
INDEX_ENTRY_V2_IDX_UNCOMPRESSED_LENGTH = 2
INDEX_ENTRY_V2_IDX_PARENT_1 = 3
INDEX_ENTRY_V2_IDX_PARENT_2 = 4
INDEX_ENTRY_V2_IDX_NODEID = 5
INDEX_ENTRY_V2_IDX_SIDEDATA_OFFSET = 6
INDEX_ENTRY_V2_IDX_SIDEDATA_COMPRESSED_LENGTH = 7
INDEX_ENTRY_V2_IDX_COMPRESSION_MODE = 8
INDEX_ENTRY_V2_IDX_RANK = 9

# revlog index flags

# For historical reasons, revlog's internal flags were exposed via the
# wire protocol and are even exposed in parts of the storage APIs.

# revision has censor metadata, must be verified
REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
# revision hash does not match data (narrowhg)
REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
# revision data is stored externally
REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
# revision changes files in a way that could affect copy tracing.
REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
REVIDX_DEFAULT_FLAGS = 0
# stable order in which flags need to be processed and their processors applied
REVIDX_FLAGS_ORDER = [
    REVIDX_ISCENSORED,
    REVIDX_ELLIPSIS,
    REVIDX_EXTSTORED,
    REVIDX_HASCOPIESINFO,
]

# bitmark for flags that could cause rawdata content change
REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

## chunk compression mode constants:
# These constants are used in revlog version >=2 to denote the compression used
# for a chunk.

# Chunk use no compression, the data stored on disk can be directly use as
# chunk value. Without any header information prefixed.
COMP_MODE_PLAIN = 0

# Chunk use the "default compression" for the revlog (usually defined in the
# revlog docket). A header is still used.
#
# XXX: keeping a header is probably not useful and we should probably drop it.
#
# XXX: The value of allow mixed type of compression in the revlog is unclear
#      and we should consider making PLAIN/DEFAULT the only available mode for
#      revlog v2, disallowing INLINE mode.
COMP_MODE_DEFAULT = 1

# Chunk use a compression mode stored "inline" at the start of the chunk
# itself.  This is the mode always used for revlog version "0" and "1"
COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE

SUPPORTED_FLAGS = {
    REVLOGV0: REVLOGV0_FLAGS,
    REVLOGV1: REVLOGV1_FLAGS,
    REVLOGV2: REVLOGV2_FLAGS,
    CHANGELOGV2: CHANGELOGV2_FLAGS,
}

_no = lambda flags: False
_yes = lambda flags: True


def _from_flag(flag):
    return lambda flags: bool(flags & flag)


FEATURES_BY_VERSION = {
    REVLOGV0: {
        b'inline': _no,
        b'generaldelta': _no,
        b'sidedata': False,
        b'docket': False,
    },
    REVLOGV1: {
        b'inline': _from_flag(FLAG_INLINE_DATA),
        b'generaldelta': _from_flag(FLAG_GENERALDELTA),
        b'sidedata': False,
        b'docket': False,
    },
    REVLOGV2: {
        # The point of inline-revlog is to reduce the number of files used in
        # the store. Using a docket defeat this purpose. So we needs other
        # means to reduce the number of files for revlogv2.
        b'inline': _no,
        b'generaldelta': _yes,
        b'sidedata': True,
        b'docket': True,
    },
    CHANGELOGV2: {
        b'inline': _no,
        # General delta is useless for changelog since we don't do any delta
        b'generaldelta': _no,
        b'sidedata': True,
        b'docket': True,
    },
}


SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000

### What should be done with a cached delta and its base ?

# Ignore the cache when considering candidates.
#
# The cached delta might be used, but the delta base will not be scheduled for
# usage earlier than in "normal" order.
DELTA_BASE_REUSE_NO = 0

# Prioritize trying the cached delta base
#
# The delta base will be tested for validy first. So that the cached deltas get
# used when possible.
DELTA_BASE_REUSE_TRY = 1
DELTA_BASE_REUSE_FORCE = 2