rev-branch-cache: properly ignores unaligned trailing data
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 27 Sep 2024 15:19:10 +0200
changeset 51975 76416b6e9d9b
parent 51974 4c885d5ff132
child 51976 41b8892a2054
rev-branch-cache: properly ignores unaligned trailing data Previously, trailing data could lead to crash and would be written back to disk, disaligning all new data… This is no longer the cases. This was detected while playing with branchmap-v3 that access the rev-branch-cache much more aggressively.
mercurial/branching/rev_cache.py
tests/test-branches.t
--- a/mercurial/branching/rev_cache.py	Fri Sep 27 15:01:43 2024 +0200
+++ b/mercurial/branching/rev_cache.py	Fri Sep 27 15:19:10 2024 +0200
@@ -56,23 +56,28 @@
         self._prefix = revs
         self._rest = bytearray()
 
+    @property
+    def len_prefix(self):
+        size = len(self._prefix)
+        return size - (size % _rbcrecsize)
+
     def __len__(self):
-        return len(self._prefix) + len(self._rest)
+        return self.len_prefix + len(self._rest)
 
     def unpack_record(self, rbcrevidx):
-        if rbcrevidx < len(self._prefix):
+        if rbcrevidx < self.len_prefix:
             return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
         else:
             return unpack_from(
                 _rbcrecfmt,
                 util.buffer(self._rest),
-                rbcrevidx - len(self._prefix),
+                rbcrevidx - self.len_prefix,
             )
 
     def make_mutable(self):
-        if len(self._prefix) > 0:
+        if self.len_prefix > 0:
             entirety = bytearray()
-            entirety[:] = self._prefix
+            entirety[:] = self._prefix[: self.len_prefix]
             entirety.extend(self._rest)
             self._rest = entirety
             self._prefix = bytearray()
@@ -82,10 +87,10 @@
         del self._rest[pos:]
 
     def pack_into(self, rbcrevidx, node, branchidx):
-        if rbcrevidx < len(self._prefix):
+        if rbcrevidx < self.len_prefix:
             self.make_mutable()
         buf = self._rest
-        start_offset = rbcrevidx - len(self._prefix)
+        start_offset = rbcrevidx - self.len_prefix
         end_offset = start_offset + _rbcrecsize
 
         if len(self._rest) < end_offset:
@@ -107,14 +112,14 @@
         return self._rest.extend(extension)
 
     def slice(self, begin, end):
-        if begin < len(self._prefix):
+        if begin < self.len_prefix:
             acc = bytearray()
-            acc[:] = self._prefix[begin:end]
+            acc[:] = self._prefix[begin : min(end, self.len_prefix)]
             acc.extend(
-                self._rest[begin - len(self._prefix) : end - len(self._prefix)]
+                self._rest[begin - self.len_prefix : end - self.len_prefix]
             )
             return acc
-        return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
+        return self._rest[begin - self.len_prefix : end - self.len_prefix]
 
 
 class revbranchcache:
@@ -388,6 +393,9 @@
         if self._force_overwrite:
             start = 0
 
+        # align start on entry boundary
+        start = _rbcrecsize * (start // _rbcrecsize)
+
         with repo.cachevfs.open(_rbcrevs, b'a+b') as f:
             pass  # this make sure the file exist…
         with repo.cachevfs.open(_rbcrevs, b'r+b') as f:
--- a/tests/test-branches.t	Fri Sep 27 15:01:43 2024 +0200
+++ b/tests/test-branches.t	Fri Sep 27 15:19:10 2024 +0200
@@ -831,15 +831,30 @@
   $ mv .hg/cache/rbc-revs-v2_ .hg/cache/rbc-revs-v2
 #endif
 
-recovery from invalid cache revs file with trailing data
-  $ echo >> .hg/cache/rbc-revs-v2
+dealing with valid cache revs file but for extra trailing data
+--------------------------------------------------------------
+
+When the trailing data are smaller than a record, they are practically
+invisible to the cache and ignored. No warning is issued about them.
+
+  $ echo '42' >> .hg/cache/rbc-revs-v2
   $ rm -f .hg/cache/branch* && hg head a -T '{rev}\n' --debug
   5
-  cache/rbc-revs-v2 contains 2 unknown trailing bytes
   $ f --size .hg/cache/rbc-revs*
-  .hg/cache/rbc-revs-v2: size=162
+  .hg/cache/rbc-revs-v2: size=164
+
+When the trailing data are larger than a record, they are seens as extra
+(probably invalid) data. We warn about them when writing.
+
+  $ echo 'abracadabra!' >> .hg/cache/rbc-revs-v2
+  $ rm -f .hg/cache/branch* && hg head a -T '{rev}\n' --debug
+  5
+  cache/rbc-revs-v2 contains 17 unknown trailing bytes
+  $ f --size .hg/cache/rbc-revs*
+  .hg/cache/rbc-revs-v2: size=177
 
 recovery from invalid cache file with partial last record
+---------------------------------------------------------
   $ mv .hg/cache/rbc-revs-v2 .
   $ f -qDB 119 rbc-revs-v2 > .hg/cache/rbc-revs-v2
   $ f --size .hg/cache/rbc-revs*