revlogv2: track pending write in the docket and expose it to hooks
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 03 May 2021 12:35:25 +0200
changeset 47252 2219853a1503
parent 47251 4f38ada3fc26
child 47253 4abd474a10af
revlogv2: track pending write in the docket and expose it to hooks The docket is now able to write pending data. We could have used a distinct intermediate files, however keeping everything in the same file will make it simpler to keep track of the various involved files if necessary. However it might prove more complicated for streaming clone. This will be dealt with later. Note that we lifted the stderr redirection in the test since we no longer suffer from "unkown working directory parent" message. Differential Revision: https://phab.mercurial-scm.org/D10631
mercurial/changelog.py
mercurial/configitems.py
mercurial/revlog.py
mercurial/revlogutils/docket.py
tests/test-transaction-safety.t
--- a/mercurial/changelog.py	Mon May 03 12:35:14 2021 +0200
+++ b/mercurial/changelog.py	Mon May 03 12:35:25 2021 +0200
@@ -395,7 +395,6 @@
         ``concurrencychecker`` will be passed to the revlog init function, see
         the documentation there.
         """
-
         revlog.revlog.__init__(
             self,
             opener,
@@ -484,6 +483,8 @@
     def _writepending(self, tr):
         """create a file containing the unfinalized state for
         pretxnchangegroup"""
+        if self._docket:
+            return self._docket.write(tr, pending=True)
         if self._delaybuf:
             # make a temporary copy of the index
             fp1 = self._realopener(self._indexfile)
--- a/mercurial/configitems.py	Mon May 03 12:35:14 2021 +0200
+++ b/mercurial/configitems.py	Mon May 03 12:35:25 2021 +0200
@@ -1150,7 +1150,6 @@
 )
 # "out of experimental" todo list.
 #
-# * expose transaction content hooks during pre-commit validation
 # * include management of a persistent nodemap in the main docket
 # * enforce a "no-truncate" policy for mmap safety
 #      - for censoring operation
--- a/mercurial/revlog.py	Mon May 03 12:35:14 2021 +0200
+++ b/mercurial/revlog.py	Mon May 03 12:35:25 2021 +0200
@@ -529,7 +529,9 @@
             if self._initempty:
                 self._docket = docketutil.default_docket(self, header)
             else:
-                self._docket = docketutil.parse_docket(self, entry_data)
+                self._docket = docketutil.parse_docket(
+                    self, entry_data, use_pending=self._trypending
+                )
             self._indexfile = self._docket.index_filepath()
             index_data = b''
             index_size = self._docket.index_end
--- a/mercurial/revlogutils/docket.py	Mon May 03 12:35:14 2021 +0200
+++ b/mercurial/revlogutils/docket.py	Mon May 03 12:35:25 2021 +0200
@@ -19,6 +19,10 @@
 
 import struct
 
+from .. import (
+    error,
+)
+
 from . import (
     constants,
 )
@@ -29,19 +33,35 @@
 #          |   This is mandatory as docket must be compatible with the previous
 #          |   revlog index header.
 # * 8 bytes: size of index data
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'L')
+# * 8 bytes: pending size of index data
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LL')
 
 
 class RevlogDocket(object):
     """metadata associated with revlog"""
 
-    def __init__(self, revlog, version_header=None, index_end=0):
+    def __init__(
+        self,
+        revlog,
+        use_pending=False,
+        version_header=None,
+        index_end=0,
+        pending_index_end=0,
+    ):
         self._version_header = version_header
+        self._read_only = bool(use_pending)
         self._dirty = False
         self._radix = revlog.radix
         self._path = revlog._docket_file
         self._opener = revlog.opener
-        self._index_end = index_end
+        # this assert should be True as long as we have a single index filename
+        assert index_end <= pending_index_end
+        self._initial_index_end = index_end
+        self._pending_index_end = pending_index_end
+        if use_pending:
+            self._index_end = self._pending_index_end
+        else:
+            self._index_end = self._initial_index_end
 
     def index_filepath(self):
         """file path to the current index file associated to this docket"""
@@ -58,22 +78,38 @@
             self._index_end = new_size
             self._dirty = True
 
-    def write(self, transaction, stripping=False):
+    def write(self, transaction, pending=False, stripping=False):
         """write the modification of disk if any
 
         This make the new content visible to all process"""
-        if self._dirty:
+        if not self._dirty:
+            return False
+        else:
+            if self._read_only:
+                msg = b'writing read-only docket: %s'
+                msg %= self._path
+                raise error.ProgrammingError(msg)
             if not stripping:
                 # XXX we could, leverage the docket while stripping. However it
                 # is not powerfull enough at the time of this comment
                 transaction.addbackup(self._path, location=b'store')
             with self._opener(self._path, mode=b'w', atomictemp=True) as f:
-                f.write(self._serialize())
-            self._dirty = False
+                f.write(self._serialize(pending=pending))
+            # if pending we still need to the write final data eventually
+            self._dirty = pending
+            return True
 
-    def _serialize(self):
+    def _serialize(self, pending=False):
+        if pending:
+            official_index_end = self._initial_index_end
+        else:
+            official_index_end = self._index_end
+
+        # this assert should be True as long as we have a single index filename
+        assert official_index_end <= self._index_end
         data = (
             self._version_header,
+            official_index_end,
             self._index_end,
         )
         return S_HEADER.pack(*data)
@@ -88,13 +124,15 @@
     return docket
 
 
-def parse_docket(revlog, data):
+def parse_docket(revlog, data, use_pending=False):
     """given some docket data return a docket object for the given revlog"""
     header = S_HEADER.unpack(data[: S_HEADER.size])
-    version_header, index_size = header
+    version_header, index_size, pending_index_size = header
     docket = RevlogDocket(
         revlog,
+        use_pending=use_pending,
         version_header=version_header,
         index_end=index_size,
+        pending_index_end=pending_index_size,
     )
     return docket
--- a/tests/test-transaction-safety.t	Mon May 03 12:35:14 2021 +0200
+++ b/tests/test-transaction-safety.t	Mon May 03 12:35:25 2021 +0200
@@ -46,13 +46,13 @@
   $ cat << EOF > script/external.sh
   > #!/bin/sh
   > $RUNTESTDIR/testlib/wait-on-file 5 $HG_TEST_FILE_EXT_UNLOCK $HG_TEST_FILE_EXT_WAITING
-  > hg log --rev 'tip' -T 'external: {rev} {desc}\n' > $TESTTMP/output/external.out 2>/dev/null
+  > hg log --rev 'tip' -T 'external: {rev} {desc}\n' > $TESTTMP/output/external.out
   > touch $HG_TEST_FILE_EXT_DONE
   > EOF
   $ chmod +x script/external.sh
   $ cat << EOF > script/internal.sh
   > #!/bin/sh
-  > hg log --rev 'tip' -T 'internal: {rev} {desc}\n' > $TESTTMP/output/internal.out 2>/dev/null
+  > hg log --rev 'tip' -T 'internal: {rev} {desc}\n' > $TESTTMP/output/internal.out
   > $RUNTESTDIR/testlib/wait-on-file 5 $HG_TEST_FILE_EXT_DONE $HG_TEST_FILE_EXT_UNLOCK
   > EOF
   $ chmod +x script/internal.sh
@@ -124,8 +124,7 @@
   $ make_one_commit first
   pre-commit: -1 
   external: -1 
-  internal: 0 first (revlogv1 !)
-  internal: -1  (revlogv2 known-bad-output !)
+  internal: 0 first
   post-tr:  0 first
 
 #if revlogv1
@@ -150,8 +149,7 @@
   $ make_one_commit second
   pre-commit: 0 first
   external: 0 first
-  internal: 1 second (revlogv1 !)
-  internal: 0 first (revlogv2 known-bad-output !)
+  internal: 1 second
   post-tr:  1 second
 
 #if revlogv1
@@ -177,8 +175,7 @@
   pre-commit: 1 second
   warning: repository is unrelated
   external: 1 second
-  internal: 5 r3 (revlogv1 !)
-  internal: 1 second (revlogv2 known-bad-output !)
+  internal: 5 r3
   post-tr:  5 r3
 
 #if revlogv1
@@ -203,8 +200,7 @@
   $ make_one_pull 400
   pre-commit: 5 r3
   external: 5 r3
-  internal: 402 r400 (revlogv1 !)
-  internal: 5 r3 (revlogv2 known-bad-output !)
+  internal: 402 r400
   post-tr:  402 r400
 
 #if revlogv1
@@ -229,8 +225,7 @@
   $ make_one_commit third
   pre-commit: 402 r400
   external: 402 r400
-  internal: 403 third (revlogv1 !)
-  internal: 402 r400 (revlogv2 known-bad-output !)
+  internal: 403 third
   post-tr:  403 third
 
 #if revlogv1
@@ -256,8 +251,7 @@
   $ make_one_pull tip
   pre-commit: 403 third
   external: 403 third
-  internal: 503 r500 (revlogv1 !)
-  internal: 403 third (revlogv2 known-bad-output !)
+  internal: 503 r500
   post-tr:  503 r500
 
 #if revlogv1