revlog: clear revision cache on hash verification failure
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 03 Oct 2018 10:57:35 -0700
changeset 40054 801ccd8e67c0
parent 40053 8c692a6b5ad1
child 40055 0a4625ffd6c0
revlog: clear revision cache on hash verification failure The revision cache is populated after raw revision fulltext is retrieved but before hash verification. If hash verification fails, the revision cache will be populated and subsequent operations to retrieve the invalid fulltext may return the cached fulltext instead of raising. This commit changes hash verification so it will invalidate the revision cache if the cached node fails hash verification. The side-effect is that subsequent operations to request the revision text - even the raw revision text - will always fail. The new behavior is consistent and is definitely less wrong. There is an open question of whether revision(raw=True) should validate hashes. But I'm going to punt on this problem. We can always change behavior later. And to be honest, I'm not sure we should expose raw=True on the storage interface at all. Another day... Differential Revision: https://phab.mercurial-scm.org/D4867
mercurial/revlog.py
mercurial/testing/storage.py
--- a/mercurial/revlog.py	Thu Sep 06 02:36:25 2018 -0400
+++ b/mercurial/revlog.py	Wed Oct 03 10:57:35 2018 -0700
@@ -1659,6 +1659,15 @@
             if p1 is None and p2 is None:
                 p1, p2 = self.parents(node)
             if node != self.hash(text, p1, p2):
+                # Clear the revision cache on hash failure. The revision cache
+                # only stores the raw revision and clearing the cache does have
+                # the side-effect that we won't have a cache hit when the raw
+                # revision data is accessed. But this case should be rare and
+                # it is extra work to teach the cache about the hash
+                # verification state.
+                if self._revisioncache and self._revisioncache[0] == node:
+                    self._revisioncache = None
+
                 revornode = rev
                 if revornode is None:
                     revornode = templatefilters.short(hex(node))
--- a/mercurial/testing/storage.py	Thu Sep 06 02:36:25 2018 -0400
+++ b/mercurial/testing/storage.py	Wed Oct 03 10:57:35 2018 -0700
@@ -881,13 +881,14 @@
         with self.assertRaises(error.StorageError):
             f.revision(node1)
 
-        # revision(raw=True) still verifies hashes.
-        # TODO this is buggy because of cache interaction.
-        self.assertEqual(f.revision(node1, raw=True), fulltext1)
+        # raw=True still verifies because there are no special storage
+        # settings.
+        with self.assertRaises(error.StorageError):
+            f.revision(node1, raw=True)
 
         # read() behaves like revision().
-        # TODO this is buggy because of cache interaction.
-        f.read(node1)
+        with self.assertRaises(error.StorageError):
+            f.read(node1)
 
         # We can't test renamed() here because some backends may not require
         # reading/validating the fulltext to return rename metadata.
@@ -931,8 +932,8 @@
         with self.assertRaises(error.StorageError):
             f.read(node1)
 
-        # TODO this should raise error.StorageError.
-        f.read(node1)
+        with self.assertRaises(error.StorageError):
+            f.read(node1)
 
     def testbadnodedelta(self):
         f = self._makefilefn()
@@ -986,7 +987,8 @@
         with self.assertRaises(error.CensoredNodeError):
             f.revision(1)
 
-        self.assertEqual(f.revision(1, raw=True), stored1)
+        with self.assertRaises(error.CensoredNodeError):
+            f.revision(1, raw=True)
 
         with self.assertRaises(error.CensoredNodeError):
             f.read(1)