# HG changeset patch # User Gregory Szorc # Date 1538589455 25200 # Node ID 801ccd8e67c080909b6361a1ca99d119d214a8da # Parent 8c692a6b5ad19f5d8f44ebd3a4f81d183cf438b2 revlog: clear revision cache on hash verification failure The revision cache is populated after raw revision fulltext is retrieved but before hash verification. If hash verification fails, the revision cache will be populated and subsequent operations to retrieve the invalid fulltext may return the cached fulltext instead of raising. This commit changes hash verification so it will invalidate the revision cache if the cached node fails hash verification. The side-effect is that subsequent operations to request the revision text - even the raw revision text - will always fail. The new behavior is consistent and is definitely less wrong. There is an open question of whether revision(raw=True) should validate hashes. But I'm going to punt on this problem. We can always change behavior later. And to be honest, I'm not sure we should expose raw=True on the storage interface at all. Another day... Differential Revision: https://phab.mercurial-scm.org/D4867 diff -r 8c692a6b5ad1 -r 801ccd8e67c0 mercurial/revlog.py --- a/mercurial/revlog.py Thu Sep 06 02:36:25 2018 -0400 +++ b/mercurial/revlog.py Wed Oct 03 10:57:35 2018 -0700 @@ -1659,6 +1659,15 @@ if p1 is None and p2 is None: p1, p2 = self.parents(node) if node != self.hash(text, p1, p2): + # Clear the revision cache on hash failure. The revision cache + # only stores the raw revision and clearing the cache does have + # the side-effect that we won't have a cache hit when the raw + # revision data is accessed. But this case should be rare and + # it is extra work to teach the cache about the hash + # verification state. + if self._revisioncache and self._revisioncache[0] == node: + self._revisioncache = None + revornode = rev if revornode is None: revornode = templatefilters.short(hex(node)) diff -r 8c692a6b5ad1 -r 801ccd8e67c0 mercurial/testing/storage.py --- a/mercurial/testing/storage.py Thu Sep 06 02:36:25 2018 -0400 +++ b/mercurial/testing/storage.py Wed Oct 03 10:57:35 2018 -0700 @@ -881,13 +881,14 @@ with self.assertRaises(error.StorageError): f.revision(node1) - # revision(raw=True) still verifies hashes. - # TODO this is buggy because of cache interaction. - self.assertEqual(f.revision(node1, raw=True), fulltext1) + # raw=True still verifies because there are no special storage + # settings. + with self.assertRaises(error.StorageError): + f.revision(node1, raw=True) # read() behaves like revision(). - # TODO this is buggy because of cache interaction. - f.read(node1) + with self.assertRaises(error.StorageError): + f.read(node1) # We can't test renamed() here because some backends may not require # reading/validating the fulltext to return rename metadata. @@ -931,8 +932,8 @@ with self.assertRaises(error.StorageError): f.read(node1) - # TODO this should raise error.StorageError. - f.read(node1) + with self.assertRaises(error.StorageError): + f.read(node1) def testbadnodedelta(self): f = self._makefilefn() @@ -986,7 +987,8 @@ with self.assertRaises(error.CensoredNodeError): f.revision(1) - self.assertEqual(f.revision(1, raw=True), stored1) + with self.assertRaises(error.CensoredNodeError): + f.revision(1, raw=True) with self.assertRaises(error.CensoredNodeError): f.read(1)