Mercurial > hg-stable
changeset 46717:913485776542
revlog: introduce v2 format
As documented in [1], this is still tentative and could be subject to change,
but we need to lay down the foundations in order to work on the next abstraction
layers.
[1] https://www.mercurial-scm.org/wiki/RevlogV2Plan
Differential Revision: https://phab.mercurial-scm.org/D9843
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 28 Jan 2021 15:28:57 +0100 |
parents | 7ed7b13fc00a |
children | fd55a9eb1507 |
files | mercurial/pure/parsers.py mercurial/requirements.py mercurial/revlog.py mercurial/revlogutils/constants.py tests/test-parseindex2.py tests/test-revlog-v2.t tests/test-revlog.t |
diffstat | 7 files changed, 88 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/pure/parsers.py Fri Feb 12 16:13:34 2021 -0800 +++ b/mercurial/pure/parsers.py Thu Jan 28 15:28:57 2021 +0100 @@ -233,10 +233,61 @@ return self._offsets[i] -def parse_index2(data, inline): +def parse_index2(data, inline, revlogv2=False): if not inline: - return IndexObject(data), None - return InlinedIndexObject(data, inline), (0, data) + cls = IndexObject2 if revlogv2 else IndexObject + return cls(data), None + cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject + return cls(data, inline), (0, data) + + +class Index2Mixin(object): + # 6 bytes: offset + # 2 bytes: flags + # 4 bytes: compressed length + # 4 bytes: uncompressed length + # 4 bytes: base rev + # 4 bytes: link rev + # 4 bytes: parent 1 rev + # 4 bytes: parent 2 rev + # 32 bytes: nodeid + # 8 bytes: sidedata offset + # 4 bytes: sidedata compressed length + # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) + index_format = b">Qiiiiii20s12xQi20x" + index_size = struct.calcsize(index_format) + assert index_size == 96, index_size + null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0) + + +class IndexObject2(Index2Mixin, IndexObject): + pass + + +class InlinedIndexObject2(Index2Mixin, InlinedIndexObject): + def _inline_scan(self, lgt): + sidedata_length_pos = 72 + off = 0 + if lgt is not None: + self._offsets = [0] * lgt + count = 0 + while off <= len(self._data) - self.index_size: + start = off + self.big_int_size + (data_size,) = struct.unpack( + b'>i', + self._data[start : start + self.int_size], + ) + start = off + sidedata_length_pos + (side_data_size,) = struct.unpack( + b'>i', self._data[start : start + self.int_size] + ) + if lgt is not None: + self._offsets[count] = off + count += 1 + off += self.index_size + data_size + side_data_size + if off != len(self._data): + raise ValueError(b"corrupted data") + return count def parse_index_devel_nodemap(data, inline):
--- a/mercurial/requirements.py Fri Feb 12 16:13:34 2021 -0800 +++ b/mercurial/requirements.py Thu Jan 28 15:28:57 2021 +0100 @@ -30,7 +30,7 @@ # Increment the sub-version when the revlog v2 format changes to lock out old # clients. -REVLOGV2_REQUIREMENT = b'exp-revlogv2.1' +REVLOGV2_REQUIREMENT = b'exp-revlogv2.2' # A repository with the sparserevlog feature will have delta chains that # can spread over a larger span. Sparse reading cuts these large spans into
--- a/mercurial/revlog.py Fri Feb 12 16:13:34 2021 -0800 +++ b/mercurial/revlog.py Thu Jan 28 15:28:57 2021 +0100 @@ -83,6 +83,7 @@ storageutil, stringutil, ) +from .pure import parsers as pureparsers # blanked usage of all the name to prevent pyflakes constraints # We need these name available in the module for extensions. @@ -364,6 +365,25 @@ return p +indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format) +indexformatv2_pack = indexformatv2.pack + + +class revlogv2io(object): + def __init__(self): + self.size = indexformatv2.size + + def parseindex(self, data, inline): + index, cache = parsers.parse_index2(data, inline, revlogv2=True) + return index, cache + + def packentry(self, entry, node, version, rev): + p = indexformatv2_pack(*entry) + if rev == 0: + p = versionformat_pack(version) + p[4:] + return p + + NodemapRevlogIO = None if util.safehasattr(parsers, 'parse_index_devel_nodemap'): @@ -650,6 +670,8 @@ self._io = revlogio() if self.version == REVLOGV0: self._io = revlogoldio() + elif fmt == REVLOGV2: + self._io = revlogv2io() elif devel_nodemap: self._io = NodemapRevlogIO() elif use_rust_index: @@ -2337,7 +2359,13 @@ p1r, p2r, node, + 0, + 0, ) + + if self.version & 0xFFFF != REVLOGV2: + e = e[:8] + self.index.append(e) entry = self._io.packentry(e, self.node, self.version, curr)
--- a/mercurial/revlogutils/constants.py Fri Feb 12 16:13:34 2021 -0800 +++ b/mercurial/revlogutils/constants.py Thu Jan 28 15:28:57 2021 +0100 @@ -15,7 +15,6 @@ REVLOGV0 = 0 REVLOGV1 = 1 # Dummy value until file format is finalized. -# Reminder: change the bounds check in revlog.__init__ when this is changed. REVLOGV2 = 0xDEAD # Shared across v1 and v2. FLAG_INLINE_DATA = 1 << 16
--- a/tests/test-parseindex2.py Fri Feb 12 16:13:34 2021 -0800 +++ b/tests/test-parseindex2.py Thu Jan 28 15:28:57 2021 +0100 @@ -117,8 +117,8 @@ ) -def parse_index2(data, inline): - index, chunkcache = parsers.parse_index2(data, inline) +def parse_index2(data, inline, revlogv2=False): + index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2) return list(index), chunkcache
--- a/tests/test-revlog-v2.t Fri Feb 12 16:13:34 2021 -0800 +++ b/tests/test-revlog-v2.t Thu Jan 28 15:28:57 2021 +0100 @@ -22,7 +22,7 @@ $ cd empty-repo $ cat .hg/requires dotencode - exp-revlogv2.1 + exp-revlogv2.2 fncache sparserevlog store
--- a/tests/test-revlog.t Fri Feb 12 16:13:34 2021 -0800 +++ b/tests/test-revlog.t Thu Jan 28 15:28:57 2021 +0100 @@ -22,10 +22,10 @@ Unknown version is rejected >>> with open('.hg/store/00changelog.i', 'wb') as fh: - ... fh.write(b'\x00\x00\x00\x02') and None + ... fh.write(b'\x00\x00\xbe\xef') and None $ hg log - abort: unknown version (2) in revlog 00changelog.i + abort: unknown version (48879) in revlog 00changelog.i [50] $ cd ..