revlog: introduce v2 format
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 28 Jan 2021 15:28:57 +0100
changeset 46704 913485776542
parent 46677 7ed7b13fc00a
child 46705 fd55a9eb1507
revlog: introduce v2 format As documented in [1], this is still tentative and could be subject to change, but we need to lay down the foundations in order to work on the next abstraction layers. [1] https://www.mercurial-scm.org/wiki/RevlogV2Plan Differential Revision: https://phab.mercurial-scm.org/D9843
mercurial/pure/parsers.py
mercurial/requirements.py
mercurial/revlog.py
mercurial/revlogutils/constants.py
tests/test-parseindex2.py
tests/test-revlog-v2.t
tests/test-revlog.t
--- a/mercurial/pure/parsers.py	Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/pure/parsers.py	Thu Jan 28 15:28:57 2021 +0100
@@ -233,10 +233,61 @@
         return self._offsets[i]
 
 
-def parse_index2(data, inline):
+def parse_index2(data, inline, revlogv2=False):
     if not inline:
-        return IndexObject(data), None
-    return InlinedIndexObject(data, inline), (0, data)
+        cls = IndexObject2 if revlogv2 else IndexObject
+        return cls(data), None
+    cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
+    return cls(data, inline), (0, data)
+
+
+class Index2Mixin(object):
+    #  6 bytes: offset
+    #  2 bytes: flags
+    #  4 bytes: compressed length
+    #  4 bytes: uncompressed length
+    #  4 bytes: base rev
+    #  4 bytes: link rev
+    #  4 bytes: parent 1 rev
+    #  4 bytes: parent 2 rev
+    # 32 bytes: nodeid
+    #  8 bytes: sidedata offset
+    #  4 bytes: sidedata compressed length
+    #  20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
+    index_format = b">Qiiiiii20s12xQi20x"
+    index_size = struct.calcsize(index_format)
+    assert index_size == 96, index_size
+    null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
+
+
+class IndexObject2(Index2Mixin, IndexObject):
+    pass
+
+
+class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
+    def _inline_scan(self, lgt):
+        sidedata_length_pos = 72
+        off = 0
+        if lgt is not None:
+            self._offsets = [0] * lgt
+        count = 0
+        while off <= len(self._data) - self.index_size:
+            start = off + self.big_int_size
+            (data_size,) = struct.unpack(
+                b'>i',
+                self._data[start : start + self.int_size],
+            )
+            start = off + sidedata_length_pos
+            (side_data_size,) = struct.unpack(
+                b'>i', self._data[start : start + self.int_size]
+            )
+            if lgt is not None:
+                self._offsets[count] = off
+            count += 1
+            off += self.index_size + data_size + side_data_size
+        if off != len(self._data):
+            raise ValueError(b"corrupted data")
+        return count
 
 
 def parse_index_devel_nodemap(data, inline):
--- a/mercurial/requirements.py	Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/requirements.py	Thu Jan 28 15:28:57 2021 +0100
@@ -30,7 +30,7 @@
 
 # Increment the sub-version when the revlog v2 format changes to lock out old
 # clients.
-REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
+REVLOGV2_REQUIREMENT = b'exp-revlogv2.2'
 
 # A repository with the sparserevlog feature will have delta chains that
 # can spread over a larger span. Sparse reading cuts these large spans into
--- a/mercurial/revlog.py	Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/revlog.py	Thu Jan 28 15:28:57 2021 +0100
@@ -83,6 +83,7 @@
     storageutil,
     stringutil,
 )
+from .pure import parsers as pureparsers
 
 # blanked usage of all the name to prevent pyflakes constraints
 # We need these name available in the module for extensions.
@@ -364,6 +365,25 @@
         return p
 
 
+indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
+indexformatv2_pack = indexformatv2.pack
+
+
+class revlogv2io(object):
+    def __init__(self):
+        self.size = indexformatv2.size
+
+    def parseindex(self, data, inline):
+        index, cache = parsers.parse_index2(data, inline, revlogv2=True)
+        return index, cache
+
+    def packentry(self, entry, node, version, rev):
+        p = indexformatv2_pack(*entry)
+        if rev == 0:
+            p = versionformat_pack(version) + p[4:]
+        return p
+
+
 NodemapRevlogIO = None
 
 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
@@ -650,6 +670,8 @@
         self._io = revlogio()
         if self.version == REVLOGV0:
             self._io = revlogoldio()
+        elif fmt == REVLOGV2:
+            self._io = revlogv2io()
         elif devel_nodemap:
             self._io = NodemapRevlogIO()
         elif use_rust_index:
@@ -2337,7 +2359,13 @@
             p1r,
             p2r,
             node,
+            0,
+            0,
         )
+
+        if self.version & 0xFFFF != REVLOGV2:
+            e = e[:8]
+
         self.index.append(e)
 
         entry = self._io.packentry(e, self.node, self.version, curr)
--- a/mercurial/revlogutils/constants.py	Fri Feb 12 16:13:34 2021 -0800
+++ b/mercurial/revlogutils/constants.py	Thu Jan 28 15:28:57 2021 +0100
@@ -15,7 +15,6 @@
 REVLOGV0 = 0
 REVLOGV1 = 1
 # Dummy value until file format is finalized.
-# Reminder: change the bounds check in revlog.__init__ when this is changed.
 REVLOGV2 = 0xDEAD
 # Shared across v1 and v2.
 FLAG_INLINE_DATA = 1 << 16
--- a/tests/test-parseindex2.py	Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-parseindex2.py	Thu Jan 28 15:28:57 2021 +0100
@@ -117,8 +117,8 @@
 )
 
 
-def parse_index2(data, inline):
-    index, chunkcache = parsers.parse_index2(data, inline)
+def parse_index2(data, inline, revlogv2=False):
+    index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
     return list(index), chunkcache
 
 
--- a/tests/test-revlog-v2.t	Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-revlog-v2.t	Thu Jan 28 15:28:57 2021 +0100
@@ -22,7 +22,7 @@
   $ cd empty-repo
   $ cat .hg/requires
   dotencode
-  exp-revlogv2.1
+  exp-revlogv2.2
   fncache
   sparserevlog
   store
--- a/tests/test-revlog.t	Fri Feb 12 16:13:34 2021 -0800
+++ b/tests/test-revlog.t	Thu Jan 28 15:28:57 2021 +0100
@@ -22,10 +22,10 @@
 Unknown version is rejected
 
   >>> with open('.hg/store/00changelog.i', 'wb') as fh:
-  ...     fh.write(b'\x00\x00\x00\x02') and None
+  ...     fh.write(b'\x00\x00\xbe\xef') and None
 
   $ hg log
-  abort: unknown version (2) in revlog 00changelog.i
+  abort: unknown version (48879) in revlog 00changelog.i
   [50]
 
   $ cd ..