Mercurial > hg
view mercurial/revlogutils/constants.py @ 52178:bcd4962e0df9
rust-revlog: don't create an in-memory nodemap for filelogs from Python
Explanations inline.
Benchmarks from this change affect positively the only repo that showed this
being a problem:
```
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
default: 62.848869 ~~~~~
before-this-patch: 58.113051 (-7.54%, -4.74)
this-patch: 57.407533 (-8.66%, -5.44)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
default: 3.173532 ~~~~~
before-this-patch: 3.543591 (+11.66%, +0.37)
this-patch: 3.297235 (+3.90%, +0.12)
```
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 31 Jul 2024 13:35:54 +0200 |
parents | f4733654f144 |
children |
line wrap: on
line source
# revlogdeltas.py - constant used for revlog logic. # # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> # Copyright 2018 Octobus <contact@octobus.net> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. """Helper class to compute deltas stored inside revlogs""" from __future__ import annotations import struct from ..interfaces import repository from .. import revlogutils ### Internal utily constants KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes KIND_MANIFESTLOG = 1002 KIND_FILELOG = 1003 KIND_OTHER = 1004 ALL_KINDS = { KIND_CHANGELOG, KIND_MANIFESTLOG, KIND_FILELOG, KIND_OTHER, } ### Index entry key # # # Internal details # ---------------- # # A large part of the revlog logic deals with revisions' "index entries", tuple # objects that contains the same "items" whatever the revlog version. # Different versions will have different ways of storing these items (sometimes # not having them at all), but the tuple will always be the same. New fields # are usually added at the end to avoid breaking existing code that relies # on the existing order. The field are defined as follows: # [0] offset: # The byte index of the start of revision data chunk. # That value is shifted up by 16 bits. use "offset = field >> 16" to # retrieve it. # # flags: # A flag field that carries special information or changes the behavior # of the revision. (see `REVIDX_*` constants for details) # The flag field only occupies the first 16 bits of this field, # use "flags = field & 0xFFFF" to retrieve the value. ENTRY_DATA_OFFSET = 0 # [1] compressed length: # The size, in bytes, of the chunk on disk ENTRY_DATA_COMPRESSED_LENGTH = 1 # [2] uncompressed length: # The size, in bytes, of the full revision once reconstructed. ENTRY_DATA_UNCOMPRESSED_LENGTH = 2 # [3] base rev: # Either the base of the revision delta chain (without general # delta), or the base of the delta (stored in the data chunk) # with general delta. ENTRY_DELTA_BASE = 3 # [4] link rev: # Changelog revision number of the changeset introducing this # revision. ENTRY_LINK_REV = 4 # [5] parent 1 rev: # Revision number of the first parent ENTRY_PARENT_1 = 5 # [6] parent 2 rev: # Revision number of the second parent ENTRY_PARENT_2 = 6 # [7] node id: # The node id of the current revision ENTRY_NODE_ID = 7 # [8] sidedata offset: # The byte index of the start of the revision's side-data chunk. ENTRY_SIDEDATA_OFFSET = 8 # [9] sidedata chunk length: # The size, in bytes, of the revision's side-data chunk. ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9 # [10] data compression mode: # two bits that detail the way the data chunk is compressed on disk. # (see "COMP_MODE_*" constants for details). For revlog version 0 and # 1 this will always be COMP_MODE_INLINE. ENTRY_DATA_COMPRESSION_MODE = 10 # [11] side-data compression mode: # two bits that detail the way the sidedata chunk is compressed on disk. # (see "COMP_MODE_*" constants for details) ENTRY_SIDEDATA_COMPRESSION_MODE = 11 # [12] Revision rank: # The number of revision under this one. # # Formally this is defined as : rank(X) = len(ancestors(X) + X) # # If rank == -1; then we do not have this information available. # Only `null` has a rank of 0. ENTRY_RANK = 12 RANK_UNKNOWN = -1 ### main revlog header # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above INDEX_HEADER_FMT = b">I" INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT) ## revlog version REVLOGV0 = 0 REVLOGV1 = 1 # Dummy value until file format is finalized. REVLOGV2 = 0xDEAD # Dummy value until file format is finalized. CHANGELOGV2 = 0xD34D ## global revlog header flags # Shared across v1 and v2. FLAG_INLINE_DATA = 1 << 16 # Only used by v1, implied by v2. FLAG_GENERALDELTA = 1 << 17 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA REVLOG_DEFAULT_FORMAT = REVLOGV1 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS REVLOGV0_FLAGS = 0 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA REVLOGV2_FLAGS = FLAG_INLINE_DATA CHANGELOGV2_FLAGS = 0 ### individual entry ## index v0: # 4 bytes: offset # 4 bytes: compressed length # 4 bytes: base rev # 4 bytes: link rev # 20 bytes: parent 1 nodeid # 20 bytes: parent 2 nodeid # 20 bytes: nodeid INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s") ## index v1 # 6 bytes: offset # 2 bytes: flags # 4 bytes: compressed length # 4 bytes: uncompressed length # 4 bytes: base rev # 4 bytes: link rev # 4 bytes: parent 1 rev # 4 bytes: parent 2 rev # 32 bytes: nodeid INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x") assert INDEX_ENTRY_V1.size == 32 * 2 # 6 bytes: offset # 2 bytes: flags # 4 bytes: compressed length # 4 bytes: uncompressed length # 4 bytes: base rev # 4 bytes: link rev # 4 bytes: parent 1 rev # 4 bytes: parent 2 rev # 32 bytes: nodeid # 8 bytes: sidedata offset # 4 bytes: sidedata compressed length # 1 bytes: compression mode (2 lower bit are data_compression_mode) # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x") assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size # 6 bytes: offset # 2 bytes: flags # 4 bytes: compressed length # 4 bytes: uncompressed length # 4 bytes: parent 1 rev # 4 bytes: parent 2 rev # 32 bytes: nodeid # 8 bytes: sidedata offset # 4 bytes: sidedata compressed length # 1 bytes: compression mode (2 lower bit are data_compression_mode) # 4 bytes: changeset rank (i.e. `len(::REV)`) # 23 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiBi23x") assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_CL_V2.size INDEX_ENTRY_V2_IDX_OFFSET = 0 INDEX_ENTRY_V2_IDX_COMPRESSED_LENGTH = 1 INDEX_ENTRY_V2_IDX_UNCOMPRESSED_LENGTH = 2 INDEX_ENTRY_V2_IDX_PARENT_1 = 3 INDEX_ENTRY_V2_IDX_PARENT_2 = 4 INDEX_ENTRY_V2_IDX_NODEID = 5 INDEX_ENTRY_V2_IDX_SIDEDATA_OFFSET = 6 INDEX_ENTRY_V2_IDX_SIDEDATA_COMPRESSED_LENGTH = 7 INDEX_ENTRY_V2_IDX_COMPRESSION_MODE = 8 INDEX_ENTRY_V2_IDX_RANK = 9 # revlog index flags # For historical reasons, revlog's internal flags were exposed via the # wire protocol and are even exposed in parts of the storage APIs. # revision has censor metadata, must be verified REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED # revision hash does not match data (narrowhg) REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS # revision data is stored externally REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED # revision changes files in a way that could affect copy tracing. REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO REVIDX_DEFAULT_FLAGS = 0 # stable order in which flags need to be processed and their processors applied REVIDX_FLAGS_ORDER = [ REVIDX_ISCENSORED, REVIDX_ELLIPSIS, REVIDX_EXTSTORED, REVIDX_HASCOPIESINFO, ] # bitmark for flags that could cause rawdata content change REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED ## chunk compression mode constants: # These constants are used in revlog version >=2 to denote the compression used # for a chunk. # Chunk use no compression, the data stored on disk can be directly use as # chunk value. Without any header information prefixed. COMP_MODE_PLAIN = 0 # Chunk use the "default compression" for the revlog (usually defined in the # revlog docket). A header is still used. # # XXX: keeping a header is probably not useful and we should probably drop it. # # XXX: The value of allow mixed type of compression in the revlog is unclear # and we should consider making PLAIN/DEFAULT the only available mode for # revlog v2, disallowing INLINE mode. COMP_MODE_DEFAULT = 1 # Chunk use a compression mode stored "inline" at the start of the chunk # itself. This is the mode always used for revlog version "0" and "1" COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE SUPPORTED_FLAGS = { REVLOGV0: REVLOGV0_FLAGS, REVLOGV1: REVLOGV1_FLAGS, REVLOGV2: REVLOGV2_FLAGS, CHANGELOGV2: CHANGELOGV2_FLAGS, } _no = lambda flags: False _yes = lambda flags: True def _from_flag(flag): return lambda flags: bool(flags & flag) FEATURES_BY_VERSION = { REVLOGV0: { b'inline': _no, b'generaldelta': _no, b'sidedata': False, b'docket': False, }, REVLOGV1: { b'inline': _from_flag(FLAG_INLINE_DATA), b'generaldelta': _from_flag(FLAG_GENERALDELTA), b'sidedata': False, b'docket': False, }, REVLOGV2: { # The point of inline-revlog is to reduce the number of files used in # the store. Using a docket defeat this purpose. So we needs other # means to reduce the number of files for revlogv2. b'inline': _no, b'generaldelta': _yes, b'sidedata': True, b'docket': True, }, CHANGELOGV2: { b'inline': _no, # General delta is useless for changelog since we don't do any delta b'generaldelta': _no, b'sidedata': True, b'docket': True, }, } SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 ### What should be done with a cached delta and its base ? # Ignore the cache when considering candidates. # # The cached delta might be used, but the delta base will not be scheduled for # usage earlier than in "normal" order. DELTA_BASE_REUSE_NO = 0 # Prioritize trying the cached delta base # # The delta base will be tested for validy first. So that the cached deltas get # used when possible. DELTA_BASE_REUSE_TRY = 1 DELTA_BASE_REUSE_FORCE = 2