--- a/mercurial/revlog.py Wed Sep 26 12:06:44 2018 -0700
+++ b/mercurial/revlog.py Mon Sep 24 11:27:47 2018 -0700
@@ -29,6 +29,7 @@
nullhex,
nullid,
nullrev,
+ short,
wdirfilenodeids,
wdirhex,
wdirid,
@@ -260,6 +261,7 @@
class revlogproblem(object):
warning = attr.ib(default=None)
error = attr.ib(default=None)
+ node = attr.ib(default=None)
# index v0:
# 4 bytes: offset
@@ -2644,6 +2646,89 @@
warning=_("warning: '%s' uses revlog format %d; expected %d") %
(self.indexfile, version, state['expectedversion']))
+ state['skipread'] = set()
+
+ for rev in self:
+ node = self.node(rev)
+
+ # Verify contents. 4 cases to care about:
+ #
+ # common: the most common case
+ # rename: with a rename
+ # meta: file content starts with b'\1\n', the metadata
+ # header defined in filelog.py, but without a rename
+ # ext: content stored externally
+ #
+ # More formally, their differences are shown below:
+ #
+ # | common | rename | meta | ext
+ # -------------------------------------------------------
+ # flags() | 0 | 0 | 0 | not 0
+ # renamed() | False | True | False | ?
+ # rawtext[0:2]=='\1\n'| False | True | True | ?
+ #
+ # "rawtext" means the raw text stored in revlog data, which
+ # could be retrieved by "revision(rev, raw=True)". "text"
+ # mentioned below is "revision(rev, raw=False)".
+ #
+ # There are 3 different lengths stored physically:
+ # 1. L1: rawsize, stored in revlog index
+ # 2. L2: len(rawtext), stored in revlog data
+ # 3. L3: len(text), stored in revlog data if flags==0, or
+ # possibly somewhere else if flags!=0
+ #
+ # L1 should be equal to L2. L3 could be different from them.
+ # "text" may or may not affect commit hash depending on flag
+ # processors (see revlog.addflagprocessor).
+ #
+ # | common | rename | meta | ext
+ # -------------------------------------------------
+ # rawsize() | L1 | L1 | L1 | L1
+ # size() | L1 | L2-LM | L1(*) | L1 (?)
+ # len(rawtext) | L2 | L2 | L2 | L2
+ # len(text) | L2 | L2 | L2 | L3
+ # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
+ #
+ # LM: length of metadata, depending on rawtext
+ # (*): not ideal, see comment in filelog.size
+ # (?): could be "- len(meta)" if the resolved content has
+ # rename metadata
+ #
+ # Checks needed to be done:
+ # 1. length check: L1 == L2, in all cases.
+ # 2. hash check: depending on flag processor, we may need to
+ # use either "text" (external), or "rawtext" (in revlog).
+
+ try:
+ skipflags = state.get('skipflags', 0)
+ if skipflags:
+ skipflags &= self.flags(rev)
+
+ if skipflags:
+ state['skipread'].add(node)
+ else:
+ # Side-effect: read content and verify hash.
+ self.revision(node)
+
+ l1 = self.rawsize(rev)
+ l2 = len(self.revision(node, raw=True))
+
+ if l1 != l2:
+ yield revlogproblem(
+ error=_('unpacked size is %d, %d expected') % (l2, l1),
+ node=node)
+
+ except error.CensoredNodeError:
+ if state['erroroncensored']:
+ yield revlogproblem(error=_('censored file data'),
+ node=node)
+ state['skipread'].add(node)
+ except Exception as e:
+ yield revlogproblem(
+ error=_('unpacking %s: %s') % (short(node), e),
+ node=node)
+ state['skipread'].add(node)
+
def storageinfo(self, exclusivefiles=False, sharedfiles=False,
revisionscount=False, trackedsize=False,
storedsize=False):