mercurial/revlog.py
changeset 39877 733db72f0f54
parent 39874 14e500b58263
child 39882 f8eb71f9e3bd
--- a/mercurial/revlog.py	Wed Sep 26 12:06:44 2018 -0700
+++ b/mercurial/revlog.py	Mon Sep 24 11:27:47 2018 -0700
@@ -29,6 +29,7 @@
     nullhex,
     nullid,
     nullrev,
+    short,
     wdirfilenodeids,
     wdirhex,
     wdirid,
@@ -260,6 +261,7 @@
 class revlogproblem(object):
     warning = attr.ib(default=None)
     error = attr.ib(default=None)
+    node = attr.ib(default=None)
 
 # index v0:
 #  4 bytes: offset
@@ -2644,6 +2646,89 @@
                 warning=_("warning: '%s' uses revlog format %d; expected %d") %
                         (self.indexfile, version, state['expectedversion']))
 
+        state['skipread'] = set()
+
+        for rev in self:
+            node = self.node(rev)
+
+            # Verify contents. 4 cases to care about:
+            #
+            #   common: the most common case
+            #   rename: with a rename
+            #   meta: file content starts with b'\1\n', the metadata
+            #         header defined in filelog.py, but without a rename
+            #   ext: content stored externally
+            #
+            # More formally, their differences are shown below:
+            #
+            #                       | common | rename | meta  | ext
+            #  -------------------------------------------------------
+            #   flags()             | 0      | 0      | 0     | not 0
+            #   renamed()           | False  | True   | False | ?
+            #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
+            #
+            # "rawtext" means the raw text stored in revlog data, which
+            # could be retrieved by "revision(rev, raw=True)". "text"
+            # mentioned below is "revision(rev, raw=False)".
+            #
+            # There are 3 different lengths stored physically:
+            #  1. L1: rawsize, stored in revlog index
+            #  2. L2: len(rawtext), stored in revlog data
+            #  3. L3: len(text), stored in revlog data if flags==0, or
+            #     possibly somewhere else if flags!=0
+            #
+            # L1 should be equal to L2. L3 could be different from them.
+            # "text" may or may not affect commit hash depending on flag
+            # processors (see revlog.addflagprocessor).
+            #
+            #              | common  | rename | meta  | ext
+            # -------------------------------------------------
+            #    rawsize() | L1      | L1     | L1    | L1
+            #       size() | L1      | L2-LM  | L1(*) | L1 (?)
+            # len(rawtext) | L2      | L2     | L2    | L2
+            #    len(text) | L2      | L2     | L2    | L3
+            #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
+            #
+            # LM:  length of metadata, depending on rawtext
+            # (*): not ideal, see comment in filelog.size
+            # (?): could be "- len(meta)" if the resolved content has
+            #      rename metadata
+            #
+            # Checks needed to be done:
+            #  1. length check: L1 == L2, in all cases.
+            #  2. hash check: depending on flag processor, we may need to
+            #     use either "text" (external), or "rawtext" (in revlog).
+
+            try:
+                skipflags = state.get('skipflags', 0)
+                if skipflags:
+                    skipflags &= self.flags(rev)
+
+                if skipflags:
+                    state['skipread'].add(node)
+                else:
+                    # Side-effect: read content and verify hash.
+                    self.revision(node)
+
+                l1 = self.rawsize(rev)
+                l2 = len(self.revision(node, raw=True))
+
+                if l1 != l2:
+                    yield revlogproblem(
+                        error=_('unpacked size is %d, %d expected') % (l2, l1),
+                        node=node)
+
+            except error.CensoredNodeError:
+                if state['erroroncensored']:
+                    yield revlogproblem(error=_('censored file data'),
+                                        node=node)
+                    state['skipread'].add(node)
+            except Exception as e:
+                yield revlogproblem(
+                    error=_('unpacking %s: %s') % (short(node), e),
+                    node=node)
+                state['skipread'].add(node)
+
     def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                     revisionscount=False, trackedsize=False,
                     storedsize=False):