Mercurial > hg
changeset 48221:a32a96079e2d
dirstate-v2: initial Python parser
The dirstate-v2 file format should be supported even if Rust extensions are
not enabled. This changeset adds parsing code that is not used yet.
Differential Revision: https://phab.mercurial-scm.org/D11518
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Sun, 03 Oct 2021 13:18:03 +0200 |
parents | e7b5e8ba7cab |
children | 7e78c72ee3ea |
files | mercurial/cext/parsers.c mercurial/cext/util.h mercurial/dirstatemap.py mercurial/dirstateutils/docket.py mercurial/dirstateutils/v2.py mercurial/pure/parsers.py rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 7 files changed, 187 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/cext/parsers.c Fri Oct 08 13:15:22 2021 +0200 +++ b/mercurial/cext/parsers.c Sun Oct 03 13:18:03 2021 +0200 @@ -347,6 +347,33 @@ return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime); }; +static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype, + PyObject *args) +{ + dirstateItemObject *t = + PyObject_New(dirstateItemObject, &dirstateItemType); + if (!t) { + return NULL; + } + if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) { + return NULL; + } + t->mode = 0; + if (t->flags & dirstate_flag_has_meaningful_data) { + if (t->flags & dirstate_flag_mode_exec_perm) { + t->mode = 0755; + } else { + t->mode = 0644; + } + if (t->flags & dirstate_flag_mode_is_symlink) { + t->mode |= S_IFLNK; + } else { + t->mode |= S_IFREG; + } + } + return (PyObject *)t; +}; + /* This means the next status call will have to actually check its content to make sure it is correct. */ static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self) @@ -413,6 +440,8 @@ "True if the stored mtime would be ambiguous with the current time"}, {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth, METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"}, + {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth, + METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"}, {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty, METH_NOARGS, "mark a file as \"possibly dirty\""}, {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
--- a/mercurial/cext/util.h Fri Oct 08 13:15:22 2021 +0200 +++ b/mercurial/cext/util.h Sun Oct 03 13:18:03 2021 +0200 @@ -36,6 +36,8 @@ static const unsigned char dirstate_flag_p2_info = 1 << 2; static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3; static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4; +static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5; +static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6; extern PyTypeObject dirstateItemType; #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
--- a/mercurial/dirstatemap.py Fri Oct 08 13:15:22 2021 +0200 +++ b/mercurial/dirstatemap.py Sun Oct 03 13:18:03 2021 +0200 @@ -20,6 +20,7 @@ from .dirstateutils import ( docket as docketmod, + v2, ) parsers = policy.importmod('parsers')
--- a/mercurial/dirstateutils/docket.py Fri Oct 08 13:15:22 2021 +0200 +++ b/mercurial/dirstateutils/docket.py Sun Oct 03 13:18:03 2021 +0200 @@ -10,14 +10,10 @@ import struct from ..revlogutils import docket as docket_mod - +from . import v2 V2_FORMAT_MARKER = b"dirstate-v2\n" -# Must match the constant of the same name in -# `rust/hg-core/src/dirstate_tree/on_disk.rs` -TREE_METADATA_SIZE = 44 - # * 12 bytes: format marker # * 32 bytes: node ID of the working directory's first parent # * 32 bytes: node ID of the working directory's second parent @@ -29,7 +25,7 @@ # Node IDs are null-padded if shorter than 32 bytes. # A data file shorter than the specified used size is corrupted (truncated) HEADER = struct.Struct( - ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE) + ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE) )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/dirstateutils/v2.py Sun Oct 03 13:18:03 2021 +0200 @@ -0,0 +1,118 @@ +# v2.py - Pure-Python implementation of the dirstate-v2 file format +# +# Copyright Mercurial Contributors +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import struct + +from .. import policy + +parsers = policy.importmod('parsers') + + +# Must match the constant of the same name in +# `rust/hg-core/src/dirstate_tree/on_disk.rs` +TREE_METADATA_SIZE = 44 +NODE_SIZE = 43 + + +# Must match the `TreeMetadata` Rust struct in +# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there. +# +# * 4 bytes: start offset of root nodes +# * 4 bytes: number of root nodes +# * 4 bytes: total number of nodes in the tree that have an entry +# * 4 bytes: total number of nodes in the tree that have a copy source +# * 4 bytes: number of bytes in the data file that are not used anymore +# * 4 bytes: unused +# * 20 bytes: SHA-1 hash of ignore patterns +TREE_METADATA = struct.Struct('>LLLLL4s20s') + + +# Must match the `Node` Rust struct in +# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there. +# +# * 4 bytes: start offset of full path +# * 2 bytes: length of the full path +# * 2 bytes: length within the full path before its "base name" +# * 4 bytes: start offset of the copy source if any, or zero for no copy source +# * 2 bytes: length of the copy source if any, or unused +# * 4 bytes: start offset of child nodes +# * 4 bytes: number of child nodes +# * 4 bytes: number of descendant nodes that have an entry +# * 4 bytes: number of descendant nodes that have a "tracked" state +# * 1 byte: flags +# * 4 bytes: expected size +# * 4 bytes: mtime seconds +# * 4 bytes: mtime nanoseconds +NODE = struct.Struct('>LHHLHLLLLBlll') + + +assert TREE_METADATA_SIZE == TREE_METADATA.size +assert NODE_SIZE == NODE.size + + +def parse_dirstate(map, copy_map, data, tree_metadata): + """parse a full v2-dirstate from a binary data into dictionnaries: + + - map: a {path: entry} mapping that will be filled + - copy_map: a {path: copy-source} mapping that will be filled + - data: a binary blob contains v2 nodes data + - tree_metadata:: a binary blob of the top level node (from the docket) + """ + ( + root_nodes_start, + root_nodes_len, + _nodes_with_entry_count, + _nodes_with_copy_source_count, + _unreachable_bytes, + _unused, + _ignore_patterns_hash, + ) = TREE_METADATA.unpack(tree_metadata) + parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len) + + +def parse_nodes(map, copy_map, data, start, len): + """parse <len> nodes from <data> starting at offset <start> + + This is used by parse_dirstate to recursively fill `map` and `copy_map`. + """ + for i in range(len): + node_start = start + NODE_SIZE * i + node_bytes = slice_with_len(data, node_start, NODE_SIZE) + ( + path_start, + path_len, + _basename_start, + copy_source_start, + copy_source_len, + children_start, + children_count, + _descendants_with_entry_count, + _tracked_descendants_count, + flags, + size, + mtime_s, + _mtime_ns, + ) = NODE.unpack(node_bytes) + + # Parse child nodes of this node recursively + parse_nodes(map, copy_map, data, children_start, children_count) + + item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s) + if not item.any_tracked: + continue + path = slice_with_len(data, path_start, path_len) + map[path] = item + if copy_source_start: + copy_map[path] = slice_with_len( + data, copy_source_start, copy_source_len + ) + + +def slice_with_len(data, start, len): + return data[start : start + len]
--- a/mercurial/pure/parsers.py Fri Oct 08 13:15:22 2021 +0200 +++ b/mercurial/pure/parsers.py Sun Oct 03 13:18:03 2021 +0200 @@ -7,6 +7,7 @@ from __future__ import absolute_import +import stat import struct import zlib @@ -43,6 +44,15 @@ # a special value used internally for `time` if the time is ambigeous AMBIGUOUS_TIME = -1 +# Bits of the `flags` byte inside a node in the file format +DIRSTATE_V2_WDIR_TRACKED = 1 << 0 +DIRSTATE_V2_P1_TRACKED = 1 << 1 +DIRSTATE_V2_P2_INFO = 1 << 2 +DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3 +DIRSTATE_V2_HAS_MTIME = 1 << 4 +DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5 +DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6 + @attr.s(slots=True, init=False) class DirstateItem(object): @@ -109,6 +119,30 @@ self._mtime = parentfiledata[2] @classmethod + def from_v2_data(cls, flags, size, mtime): + """Build a new DirstateItem object from V2 data""" + has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE) + mode = None + if has_mode_size: + assert stat.S_IXUSR == 0o100 + if flags & DIRSTATE_V2_MODE_EXEC_PERM: + mode = 0o755 + else: + mode = 0o644 + if flags & DIRSTATE_V2_MODE_IS_SYMLINK: + mode |= stat.S_IFLNK + else: + mode |= stat.S_IFREG + return cls( + wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED), + p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED), + p2_info=bool(flags & DIRSTATE_V2_P2_INFO), + has_meaningful_data=has_mode_size, + has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME), + parentfiledata=(mode, size, mtime), + ) + + @classmethod def from_v1_data(cls, state, mode, size, mtime): """Build a new DirstateItem object from V1 data
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Fri Oct 08 13:15:22 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Sun Oct 03 13:18:03 2021 +0200 @@ -31,10 +31,8 @@ pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; -/// Must match the constant of the same name in -/// `mercurial/dirstateutils/docket.py` +/// Must match constants of the same names in `mercurial/dirstateutils/v2.py` const TREE_METADATA_SIZE: usize = 44; - const NODE_SIZE: usize = 43; /// Make sure that size-affecting changes are made knowingly