revlog: introduce v2 format
As documented in [1], this is still tentative and could be subject to change,
but we need to lay down the foundations in order to work on the next abstraction
layers.
[1] https://www.mercurial-scm.org/wiki/RevlogV2Plan
Differential Revision: https://phab.mercurial-scm.org/D9843
--- a/mercurial/pure/parsers.py Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/pure/parsers.py Thu Jan 28 15:28:57 2021 +0100
@@ -233,10 +233,61 @@
return self._offsets[i]
-def parse_index2(data, inline):
+def parse_index2(data, inline, revlogv2=False):
if not inline:
- return IndexObject(data), None
- return InlinedIndexObject(data, inline), (0, data)
+ cls = IndexObject2 if revlogv2 else IndexObject
+ return cls(data), None
+ cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
+ return cls(data, inline), (0, data)
+
+
+class Index2Mixin(object):
+ # 6 bytes: offset
+ # 2 bytes: flags
+ # 4 bytes: compressed length
+ # 4 bytes: uncompressed length
+ # 4 bytes: base rev
+ # 4 bytes: link rev
+ # 4 bytes: parent 1 rev
+ # 4 bytes: parent 2 rev
+ # 32 bytes: nodeid
+ # 8 bytes: sidedata offset
+ # 4 bytes: sidedata compressed length
+ # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
+ index_format = b">Qiiiiii20s12xQi20x"
+ index_size = struct.calcsize(index_format)
+ assert index_size == 96, index_size
+ null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
+
+
+class IndexObject2(Index2Mixin, IndexObject):
+ pass
+
+
+class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
+ def _inline_scan(self, lgt):
+ sidedata_length_pos = 72
+ off = 0
+ if lgt is not None:
+ self._offsets = [0] * lgt
+ count = 0
+ while off <= len(self._data) - self.index_size:
+ start = off + self.big_int_size
+ (data_size,) = struct.unpack(
+ b'>i',
+ self._data[start : start + self.int_size],
+ )
+ start = off + sidedata_length_pos
+ (side_data_size,) = struct.unpack(
+ b'>i', self._data[start : start + self.int_size]
+ )
+ if lgt is not None:
+ self._offsets[count] = off
+ count += 1
+ off += self.index_size + data_size + side_data_size
+ if off != len(self._data):
+ raise ValueError(b"corrupted data")
+ return count
def parse_index_devel_nodemap(data, inline):
--- a/mercurial/requirements.py Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/requirements.py Thu Jan 28 15:28:57 2021 +0100
@@ -30,7 +30,7 @@
# Increment the sub-version when the revlog v2 format changes to lock out old
# clients.
-REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
+REVLOGV2_REQUIREMENT = b'exp-revlogv2.2'
# A repository with the sparserevlog feature will have delta chains that
# can spread over a larger span. Sparse reading cuts these large spans into
--- a/mercurial/revlog.py Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/revlog.py Thu Jan 28 15:28:57 2021 +0100
@@ -83,6 +83,7 @@
storageutil,
stringutil,
)
+from .pure import parsers as pureparsers
# blanked usage of all the name to prevent pyflakes constraints
# We need these name available in the module for extensions.
@@ -364,6 +365,25 @@
return p
+indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
+indexformatv2_pack = indexformatv2.pack
+
+
+class revlogv2io(object):
+ def __init__(self):
+ self.size = indexformatv2.size
+
+ def parseindex(self, data, inline):
+ index, cache = parsers.parse_index2(data, inline, revlogv2=True)
+ return index, cache
+
+ def packentry(self, entry, node, version, rev):
+ p = indexformatv2_pack(*entry)
+ if rev == 0:
+ p = versionformat_pack(version) + p[4:]
+ return p
+
+
NodemapRevlogIO = None
if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
@@ -650,6 +670,8 @@
self._io = revlogio()
if self.version == REVLOGV0:
self._io = revlogoldio()
+ elif fmt == REVLOGV2:
+ self._io = revlogv2io()
elif devel_nodemap:
self._io = NodemapRevlogIO()
elif use_rust_index:
@@ -2337,7 +2359,13 @@
p1r,
p2r,
node,
+ 0,
+ 0,
)
+
+ if self.version & 0xFFFF != REVLOGV2:
+ e = e[:8]
+
self.index.append(e)
entry = self._io.packentry(e, self.node, self.version, curr)
--- a/mercurial/revlogutils/constants.py Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/revlogutils/constants.py Thu Jan 28 15:28:57 2021 +0100
@@ -15,7 +15,6 @@
REVLOGV0 = 0
REVLOGV1 = 1
# Dummy value until file format is finalized.
-# Reminder: change the bounds check in revlog.__init__ when this is changed.
REVLOGV2 = 0xDEAD
# Shared across v1 and v2.
FLAG_INLINE_DATA = 1 << 16
--- a/tests/test-parseindex2.py Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-parseindex2.py Thu Jan 28 15:28:57 2021 +0100
@@ -117,8 +117,8 @@
)
-def parse_index2(data, inline):
- index, chunkcache = parsers.parse_index2(data, inline)
+def parse_index2(data, inline, revlogv2=False):
+ index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
return list(index), chunkcache
--- a/tests/test-revlog-v2.t Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-revlog-v2.t Thu Jan 28 15:28:57 2021 +0100
@@ -22,7 +22,7 @@
$ cd empty-repo
$ cat .hg/requires
dotencode
- exp-revlogv2.1
+ exp-revlogv2.2
fncache
sparserevlog
store
--- a/tests/test-revlog.t Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-revlog.t Thu Jan 28 15:28:57 2021 +0100
@@ -22,10 +22,10 @@
Unknown version is rejected
>>> with open('.hg/store/00changelog.i', 'wb') as fh:
- ... fh.write(b'\x00\x00\x00\x02') and None
+ ... fh.write(b'\x00\x00\xbe\xef') and None
$ hg log
- abort: unknown version (2) in revlog 00changelog.i
+ abort: unknown version (48879) in revlog 00changelog.i
[50]
$ cd ..