view mercurial/verify.py @ 45987:8b99c473aae2

copies-rust: move is_ancestor caching within the rust code Now that the OrdMap merging is fast, smaller things start to matters. We move the caching of `is_ancestor` call within the Rust code. This avoid round-trip to Python and help us to shave more time on our slower case: Repo Cases Source-Rev Dest-Rev Old-Time New-Time Difference Factor ------------------------------------------------------------------------------------------------------------------------------------ pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 2.780174 s, 2.137894 s, -0.642280 s, × 0.7690 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 9.843481 s, 8.100385 s, -1.743096 s, × 0.8229 Note: I would happily have used native code for ancestors computation, however I failed (did not tried hard) to created a rust version that goes as fast as the current C version. Below are full tables for: - this change compared to the previous change - this change compared to filelog performance Repo Cases Source-Rev Dest-Rev Old-Time New-Time Difference Factor ------------------------------------------------------------------------------------------------------------------------------------ mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000049 s, 0.000047 s, -0.000002 s, × 0.9592 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.000182 s, 0.000181 s, -0.000001 s, × 0.9945 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.005872 s, 0.005852 s, -0.000020 s, × 0.9966 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.000229 s, 0.000229 s, +0.000000 s, × 1.0000 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.000058 s, 0.000058 s, +0.000000 s, × 1.0000 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.000148 s, 0.000146 s, -0.000002 s, × 0.9865 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.001205 s, 0.001206 s, +0.000001 s, × 1.0008 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.025662 s, 0.025275 s, -0.000387 s, × 0.9849 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 0.080113 s, 0.080303 s, +0.000190 s, × 1.0024 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 0.153030 s, 0.152641 s, -0.000389 s, × 0.9975 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 0.098774 s, 0.099107 s, +0.000333 s, × 1.0034 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 2.780174 s, 2.137894 s, -0.642280 s, × 0.7690 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 0.022218 s, 0.022202 s, -0.000016 s, × 0.9993 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 0.252125 s, 0.228946 s, -0.023179 s, × 0.9081 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.000186 s, 0.000186 s, +0.000000 s, × 1.0000 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.000133 s, 0.000133 s, +0.000000 s, × 1.0000 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.000320 s, 0.000320 s, +0.000000 s, × 1.0000 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.001336 s, 0.001339 s, +0.000003 s, × 1.0022 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.015573 s, 0.015694 s, +0.000121 s, × 1.0078 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.018667 s, 0.018457 s, -0.000210 s, × 0.9888 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.112534 s, 0.111691 s, -0.000843 s, × 0.9925 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 1.231869 s, 1.166017 s, -0.065852 s, × 0.9465 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.000197 s, 0.000197 s, +0.000000 s, × 1.0000 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.000637 s, 0.000626 s, -0.000011 s, × 0.9827 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.000303 s, 0.000303 s, +0.000000 s, × 1.0000 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.001663 s, 0.001679 s, +0.000016 s, × 1.0096 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.007008 s, 0.006947 s, -0.000061 s, × 0.9913 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.127385 s, 0.133070 s, +0.005685 s, × 1.0446 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.008740 s, 0.008705 s, -0.000035 s, × 0.9960 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.005783 s, 0.005913 s, +0.000130 s, × 1.0225 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 0.102184 s, 0.101373 s, -0.000811 s, × 0.9921 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.046220 s, 0.046526 s, +0.000306 s, × 1.0066 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 0.315271 s, 0.313954 s, -0.001317 s, × 0.9958 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 3.478747 s, 3.367395 s, -0.111352 s, × 0.9680 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 4.766435 s, 4.691820 s, -0.074615 s, × 0.9843 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.001214 s, 0.001199 s, -0.000015 s, × 0.9876 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.001221 s, 0.001216 s, -0.000005 s, × 0.9959 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.000613 s, 0.000613 s, +0.000000 s, × 1.0000 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.001904 s, 0.001906 s, +0.000002 s, × 1.0011 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 0.093000 s, 0.092766 s, -0.000234 s, × 0.9975 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.132194 s, 0.136074 s, +0.003880 s, × 1.0294 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.009069 s, 0.009067 s, -0.000002 s, × 0.9998 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.006169 s, 0.006243 s, +0.000074 s, × 1.0120 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 0.115540 s, 0.114463 s, -0.001077 s, × 0.9907 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.435381 s, 0.433683 s, -0.001698 s, × 0.9961 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.415461 s, 0.411278 s, -0.004183 s, × 0.9899 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 0.155946 s, 0.155133 s, -0.000813 s, × 0.9948 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.048521 s, 0.048933 s, +0.000412 s, × 1.0085 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 9.843481 s, 8.100385 s, -1.743096 s, × 0.8229 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 1.465128 s, 1.446720 s, -0.018408 s, × 0.9874 mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 1.374283 s, 1.369537 s, -0.004746 s, × 0.9965 mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 5.255158 s, 5.186079 s, -0.069079 s, × 0.9869 Repo Case Source-Rev Dest-Rev filelog sidedata Difference Factor -------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000892 s, 0.000047 s, -0.000845 s, × 0.052691 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.001823 s, 0.000181 s, -0.001642 s, × 0.099287 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.018063 s, 0.005852 s, -0.012211 s, × 0.323977 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.001505 s, 0.000229 s, -0.001276 s, × 0.152159 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.205895 s, 0.000058 s, -0.205837 s, × 0.000282 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.017021 s, 0.000146 s, -0.016875 s, × 0.008578 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.019422 s, 0.001206 s, -0.018216 s, × 0.062095 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.767740 s, 0.025275 s, -0.742465 s, × 0.032921 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 1.188515 s, 0.080303 s, -1.108212 s, × 0.067566 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 1.251968 s, 0.152641 s, -1.099327 s, × 0.121921 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 1.616799 s, 0.099107 s, -1.517692 s, × 0.061298 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 0.001057 s, 2.137894 s, +2.136837 s, × 2022.605487 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 1.069485 s, 0.022202 s, -1.047283 s, × 0.020760 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 1.350162 s, 0.228946 s, -1.121216 s, × 0.169569 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.028008 s, 0.000186 s, -0.027822 s, × 0.006641 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.132281 s, 0.000133 s, -0.132148 s, × 0.001005 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.025311 s, 0.000320 s, -0.024991 s, × 0.012643 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.052957 s, 0.001339 s, -0.051618 s, × 0.025285 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.038011 s, 0.015694 s, -0.022317 s, × 0.412880 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.198639 s, 0.018457 s, -0.180182 s, × 0.092917 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.955713 s, 0.111691 s, -0.844022 s, × 0.116867 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 3.838886 s, 1.166017 s, -2.672869 s, × 0.303738 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.024548 s, 0.000197 s, -0.024351 s, × 0.008025 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.143394 s, 0.000626 s, -0.142768 s, × 0.004366 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.026046 s, 0.000303 s, -0.025743 s, × 0.011633 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.085440 s, 0.001679 s, -0.083761 s, × 0.019651 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.195656 s, 0.006947 s, -0.188709 s, × 0.035506 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.190874 s, 0.133070 s, -2.057804 s, × 0.060738 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.090208 s, 0.008705 s, -0.081503 s, × 0.096499 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.747367 s, 0.005913 s, -0.741454 s, × 0.007912 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 1.152863 s, 0.101373 s, -1.051490 s, × 0.087932 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.598336 s, 0.046526 s, -6.551810 s, × 0.007051 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 3.255015 s, 0.313954 s, -2.941061 s, × 0.096452 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 15.668041 s, 3.367395 s, -12.300646 s, × 0.214921 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 20.439638 s, 4.691820 s, -15.747818 s, × 0.229545 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.080923 s, 0.001199 s, -0.079724 s, × 0.014817 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.498456 s, 0.001216 s, -0.497240 s, × 0.002440 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.020798 s, 0.000613 s, -0.020185 s, × 0.029474 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.226930 s, 0.001906 s, -0.225024 s, × 0.008399 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1.113005 s, 0.092766 s, -1.020239 s, × 0.083347 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.230671 s, 0.136074 s, -2.094597 s, × 0.061001 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.089672 s, 0.009067 s, -0.080605 s, × 0.101113 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.740221 s, 0.006243 s, -0.733978 s, × 0.008434 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 1.185881 s, 0.114463 s, -1.071418 s, × 0.096521 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.086072 s, 0.433683 s, +0.347611 s, × 5.038607 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.081321 s, 0.411278 s, +0.329957 s, × 5.057464 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 7.528370 s, 0.155133 s, -7.373237 s, × 0.020606 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.757368 s, 0.048933 s, -6.708435 s, × 0.007241 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 7.643752 s, 8.100385 s, +0.456633 s, × 1.059739 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 9.704242 s, 1.446720 s, -8.257522 s, × 0.149081 mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 0.092845 s, killed mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 26.626870 s, 1.369537 s, -25.257333 s, × 0.051434 mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 0.092953 s, killed mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 0.227131 s, killed mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 18.884666 s, 5.186079 s, -13.698587 s, × 0.274619 mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 21.451622 s, killed mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 25.152558 s, killed Differential Revision: https://phab.mercurial-scm.org/D9303
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 02 Oct 2020 15:41:31 +0200
parents e77b57e09bfa
children ed0899e01628
line wrap: on
line source

# verify.py - repository integrity checking for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import os

from .i18n import _
from .node import (
    nullid,
    short,
)

from . import (
    error,
    pycompat,
    revlog,
    util,
)

VERIFY_DEFAULT = 0
VERIFY_FULL = 1


def verify(repo, level=None):
    with repo.lock():
        v = verifier(repo, level)
        return v.verify()


def _normpath(f):
    # under hg < 2.4, convert didn't sanitize paths properly, so a
    # converted repo may contain repeated slashes
    while b'//' in f:
        f = f.replace(b'//', b'/')
    return f


class verifier(object):
    def __init__(self, repo, level=None):
        self.repo = repo.unfiltered()
        self.ui = repo.ui
        self.match = repo.narrowmatch()
        if level is None:
            level = VERIFY_DEFAULT
        self._level = level
        self.badrevs = set()
        self.errors = 0
        self.warnings = 0
        self.havecl = len(repo.changelog) > 0
        self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
        self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
        self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
        self.refersmf = False
        self.fncachewarned = False
        # developer config: verify.skipflags
        self.skipflags = repo.ui.configint(b'verify', b'skipflags')
        self.warnorphanstorefiles = True

    def _warn(self, msg):
        """record a "warning" level issue"""
        self.ui.warn(msg + b"\n")
        self.warnings += 1

    def _err(self, linkrev, msg, filename=None):
        """record a "error" level issue"""
        if linkrev is not None:
            self.badrevs.add(linkrev)
            linkrev = b"%d" % linkrev
        else:
            linkrev = b'?'
        msg = b"%s: %s" % (linkrev, msg)
        if filename:
            msg = b"%s@%s" % (filename, msg)
        self.ui.warn(b" " + msg + b"\n")
        self.errors += 1

    def _exc(self, linkrev, msg, inst, filename=None):
        """record exception raised during the verify process"""
        fmsg = pycompat.bytestr(inst)
        if not fmsg:
            fmsg = pycompat.byterepr(inst)
        self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)

    def _checkrevlog(self, obj, name, linkrev):
        """verify high level property of a revlog

        - revlog is present,
        - revlog is non-empty,
        - sizes (index and data) are correct,
        - revlog's format version is correct.
        """
        if not len(obj) and (self.havecl or self.havemf):
            self._err(linkrev, _(b"empty or missing %s") % name)
            return

        d = obj.checksize()
        if d[0]:
            self._err(None, _(b"data length off by %d bytes") % d[0], name)
        if d[1]:
            self._err(None, _(b"index contains %d extra bytes") % d[1], name)

        if obj.version != revlog.REVLOGV0:
            if not self.revlogv1:
                self._warn(_(b"warning: `%s' uses revlog format 1") % name)
        elif self.revlogv1:
            self._warn(_(b"warning: `%s' uses revlog format 0") % name)

    def _checkentry(self, obj, i, node, seen, linkrevs, f):
        """verify a single revlog entry

        arguments are:
        - obj:      the source revlog
        - i:        the revision number
        - node:        the revision node id
        - seen:     nodes previously seen for this revlog
        - linkrevs: [changelog-revisions] introducing "node"
        - f:        string label ("changelog", "manifest", or filename)

        Performs the following checks:
        - linkrev points to an existing changelog revision,
        - linkrev points to a changelog revision that introduces this revision,
        - linkrev points to the lowest of these changesets,
        - both parents exist in the revlog,
        - the revision is not duplicated.

        Return the linkrev of the revision (or None for changelog's revisions).
        """
        lr = obj.linkrev(obj.rev(node))
        if lr < 0 or (self.havecl and lr not in linkrevs):
            if lr < 0 or lr >= len(self.repo.changelog):
                msg = _(b"rev %d points to nonexistent changeset %d")
            else:
                msg = _(b"rev %d points to unexpected changeset %d")
            self._err(None, msg % (i, lr), f)
            if linkrevs:
                if f and len(linkrevs) > 1:
                    try:
                        # attempt to filter down to real linkrevs
                        linkrevs = [
                            l
                            for l in linkrevs
                            if self.lrugetctx(l)[f].filenode() == node
                        ]
                    except Exception:
                        pass
                self._warn(
                    _(b" (expected %s)")
                    % b" ".join(map(pycompat.bytestr, linkrevs))
                )
            lr = None  # can't be trusted

        try:
            p1, p2 = obj.parents(node)
            if p1 not in seen and p1 != nullid:
                self._err(
                    lr,
                    _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
                    f,
                )
            if p2 not in seen and p2 != nullid:
                self._err(
                    lr,
                    _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
                    f,
                )
        except Exception as inst:
            self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)

        if node in seen:
            self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
        seen[node] = i
        return lr

    def verify(self):
        """verify the content of the Mercurial repository

        This method run all verifications, displaying issues as they are found.

        return 1 if any error have been encountered, 0 otherwise."""
        # initial validation and generic report
        repo = self.repo
        ui = repo.ui
        if not repo.url().startswith(b'file:'):
            raise error.Abort(_(b"cannot verify bundle or remote repos"))

        if os.path.exists(repo.sjoin(b"journal")):
            ui.warn(_(b"abandoned transaction found - run hg recover\n"))

        if ui.verbose or not self.revlogv1:
            ui.status(
                _(b"repository uses revlog format %d\n")
                % (self.revlogv1 and 1 or 0)
            )

        # data verification
        mflinkrevs, filelinkrevs = self._verifychangelog()
        filenodes = self._verifymanifest(mflinkrevs)
        del mflinkrevs
        self._crosscheckfiles(filelinkrevs, filenodes)
        totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)

        # final report
        ui.status(
            _(b"checked %d changesets with %d changes to %d files\n")
            % (len(repo.changelog), filerevisions, totalfiles)
        )
        if self.warnings:
            ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
        if self.fncachewarned:
            ui.warn(
                _(
                    b'hint: run "hg debugrebuildfncache" to recover from '
                    b'corrupt fncache\n'
                )
            )
        if self.errors:
            ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
            if self.badrevs:
                ui.warn(
                    _(b"(first damaged changeset appears to be %d)\n")
                    % min(self.badrevs)
                )
            return 1
        return 0

    def _verifychangelog(self):
        """verify the changelog of a repository

        The following checks are performed:
        - all of `_checkrevlog` checks,
        - all of `_checkentry` checks (for each revisions),
        - each revision can be read.

        The function returns some of the data observed in the changesets as a
        (mflinkrevs, filelinkrevs) tuples:
        - mflinkrevs:   is a { manifest-node -> [changelog-rev] } mapping
        - filelinkrevs: is a { file-path -> [changelog-rev] } mapping

        If a matcher was specified, filelinkrevs will only contains matched
        files.
        """
        ui = self.ui
        repo = self.repo
        match = self.match
        cl = repo.changelog

        ui.status(_(b"checking changesets\n"))
        mflinkrevs = {}
        filelinkrevs = {}
        seen = {}
        self._checkrevlog(cl, b"changelog", 0)
        progress = ui.makeprogress(
            _(b'checking'), unit=_(b'changesets'), total=len(repo)
        )
        for i in repo:
            progress.update(i)
            n = cl.node(i)
            self._checkentry(cl, i, n, seen, [i], b"changelog")

            try:
                changes = cl.read(n)
                if changes[0] != nullid:
                    mflinkrevs.setdefault(changes[0], []).append(i)
                    self.refersmf = True
                for f in changes[3]:
                    if match(f):
                        filelinkrevs.setdefault(_normpath(f), []).append(i)
            except Exception as inst:
                self.refersmf = True
                self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
        progress.complete()
        return mflinkrevs, filelinkrevs

    def _verifymanifest(
        self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
    ):
        """verify the manifestlog content

        Inputs:
        - mflinkrevs:     a {manifest-node -> [changelog-revisions]} mapping
        - dir:            a subdirectory to check (for tree manifest repo)
        - storefiles:     set of currently "orphan" files.
        - subdirprogress: a progress object

        This function checks:
        * all of `_checkrevlog` checks (for all manifest related revlogs)
        * all of `_checkentry` checks (for all manifest related revisions)
        * nodes for subdirectory exists in the sub-directory manifest
        * each manifest entries have a file path
        * each manifest node refered in mflinkrevs exist in the manifest log

        If tree manifest is in use and a matchers is specified, only the
        sub-directories matching it will be verified.

        return a two level mapping:
            {"path" -> { filenode -> changelog-revision}}

        This mapping primarily contains entries for every files in the
        repository. In addition, when tree-manifest is used, it also contains
        sub-directory entries.

        If a matcher is provided, only matching paths will be included.
        """
        repo = self.repo
        ui = self.ui
        match = self.match
        mfl = self.repo.manifestlog
        mf = mfl.getstorage(dir)

        if not dir:
            self.ui.status(_(b"checking manifests\n"))

        filenodes = {}
        subdirnodes = {}
        seen = {}
        label = b"manifest"
        if dir:
            label = dir
            revlogfiles = mf.files()
            storefiles.difference_update(revlogfiles)
            if subdirprogress:  # should be true since we're in a subdirectory
                subdirprogress.increment()
        if self.refersmf:
            # Do not check manifest if there are only changelog entries with
            # null manifests.
            self._checkrevlog(mf, label, 0)
        progress = ui.makeprogress(
            _(b'checking'), unit=_(b'manifests'), total=len(mf)
        )
        for i in mf:
            if not dir:
                progress.update(i)
            n = mf.node(i)
            lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
            if n in mflinkrevs:
                del mflinkrevs[n]
            elif dir:
                self._err(
                    lr,
                    _(b"%s not in parent-directory manifest") % short(n),
                    label,
                )
            else:
                self._err(lr, _(b"%s not in changesets") % short(n), label)

            try:
                mfdelta = mfl.get(dir, n).readdelta(shallow=True)
                for f, fn, fl in mfdelta.iterentries():
                    if not f:
                        self._err(lr, _(b"entry without name in manifest"))
                    elif f == b"/dev/null":  # ignore this in very old repos
                        continue
                    fullpath = dir + _normpath(f)
                    if fl == b't':
                        if not match.visitdir(fullpath):
                            continue
                        subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
                            fn, []
                        ).append(lr)
                    else:
                        if not match(fullpath):
                            continue
                        filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
            except Exception as inst:
                self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
            if self._level >= VERIFY_FULL:
                try:
                    # Various issues can affect manifest. So we read each full
                    # text from storage. This triggers the checks from the core
                    # code (eg: hash verification, filename are ordered, etc.)
                    mfdelta = mfl.get(dir, n).read()
                except Exception as inst:
                    self._exc(
                        lr,
                        _(b"reading full manifest %s") % short(n),
                        inst,
                        label,
                    )

        if not dir:
            progress.complete()

        if self.havemf:
            # since we delete entry in `mflinkrevs` during iteration, any
            # remaining entries are "missing". We need to issue errors for them.
            changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
            for c, m in sorted(changesetpairs):
                if dir:
                    self._err(
                        c,
                        _(
                            b"parent-directory manifest refers to unknown"
                            b" revision %s"
                        )
                        % short(m),
                        label,
                    )
                else:
                    self._err(
                        c,
                        _(b"changeset refers to unknown revision %s")
                        % short(m),
                        label,
                    )

        if not dir and subdirnodes:
            self.ui.status(_(b"checking directory manifests\n"))
            storefiles = set()
            subdirs = set()
            revlogv1 = self.revlogv1
            for f, f2, size in repo.store.datafiles():
                if not f:
                    self._err(None, _(b"cannot decode filename '%s'") % f2)
                elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                    storefiles.add(_normpath(f))
                    subdirs.add(os.path.dirname(f))
            subdirprogress = ui.makeprogress(
                _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
            )

        for subdir, linkrevs in pycompat.iteritems(subdirnodes):
            subdirfilenodes = self._verifymanifest(
                linkrevs, subdir, storefiles, subdirprogress
            )
            for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
                filenodes.setdefault(f, {}).update(onefilenodes)

        if not dir and subdirnodes:
            subdirprogress.complete()
            if self.warnorphanstorefiles:
                for f in sorted(storefiles):
                    self._warn(_(b"warning: orphan data file '%s'") % f)

        return filenodes

    def _crosscheckfiles(self, filelinkrevs, filenodes):
        repo = self.repo
        ui = self.ui
        ui.status(_(b"crosschecking files in changesets and manifests\n"))

        total = len(filelinkrevs) + len(filenodes)
        progress = ui.makeprogress(
            _(b'crosschecking'), unit=_(b'files'), total=total
        )
        if self.havemf:
            for f in sorted(filelinkrevs):
                progress.increment()
                if f not in filenodes:
                    lr = filelinkrevs[f][0]
                    self._err(lr, _(b"in changeset but not in manifest"), f)

        if self.havecl:
            for f in sorted(filenodes):
                progress.increment()
                if f not in filelinkrevs:
                    try:
                        fl = repo.file(f)
                        lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                    except Exception:
                        lr = None
                    self._err(lr, _(b"in manifest but not in changeset"), f)

        progress.complete()

    def _verifyfiles(self, filenodes, filelinkrevs):
        repo = self.repo
        ui = self.ui
        lrugetctx = self.lrugetctx
        revlogv1 = self.revlogv1
        havemf = self.havemf
        ui.status(_(b"checking files\n"))

        storefiles = set()
        for f, f2, size in repo.store.datafiles():
            if not f:
                self._err(None, _(b"cannot decode filename '%s'") % f2)
            elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
                storefiles.add(_normpath(f))

        state = {
            # TODO this assumes revlog storage for changelog.
            b'expectedversion': self.repo.changelog.version & 0xFFFF,
            b'skipflags': self.skipflags,
            # experimental config: censor.policy
            b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
        }

        files = sorted(set(filenodes) | set(filelinkrevs))
        revisions = 0
        progress = ui.makeprogress(
            _(b'checking'), unit=_(b'files'), total=len(files)
        )
        for i, f in enumerate(files):
            progress.update(i, item=f)
            try:
                linkrevs = filelinkrevs[f]
            except KeyError:
                # in manifest but not in changelog
                linkrevs = []

            if linkrevs:
                lr = linkrevs[0]
            else:
                lr = None

            try:
                fl = repo.file(f)
            except error.StorageError as e:
                self._err(lr, _(b"broken revlog! (%s)") % e, f)
                continue

            for ff in fl.files():
                try:
                    storefiles.remove(ff)
                except KeyError:
                    if self.warnorphanstorefiles:
                        self._warn(
                            _(b" warning: revlog '%s' not in fncache!") % ff
                        )
                        self.fncachewarned = True

            if not len(fl) and (self.havecl or self.havemf):
                self._err(lr, _(b"empty or missing %s") % f)
            else:
                # Guard against implementations not setting this.
                state[b'skipread'] = set()
                state[b'safe_renamed'] = set()

                for problem in fl.verifyintegrity(state):
                    if problem.node is not None:
                        linkrev = fl.linkrev(fl.rev(problem.node))
                    else:
                        linkrev = None

                    if problem.warning:
                        self._warn(problem.warning)
                    elif problem.error:
                        self._err(
                            linkrev if linkrev is not None else lr,
                            problem.error,
                            f,
                        )
                    else:
                        raise error.ProgrammingError(
                            b'problem instance does not set warning or error '
                            b'attribute: %s' % problem.msg
                        )

            seen = {}
            for i in fl:
                revisions += 1
                n = fl.node(i)
                lr = self._checkentry(fl, i, n, seen, linkrevs, f)
                if f in filenodes:
                    if havemf and n not in filenodes[f]:
                        self._err(lr, _(b"%s not in manifests") % (short(n)), f)
                    else:
                        del filenodes[f][n]

                if n in state[b'skipread'] and n not in state[b'safe_renamed']:
                    continue

                # check renames
                try:
                    # This requires resolving fulltext (at least on revlogs,
                    # though not with LFS revisions). We may want
                    # ``verifyintegrity()`` to pass a set of nodes with
                    # rename metadata as an optimization.
                    rp = fl.renamed(n)
                    if rp:
                        if lr is not None and ui.verbose:
                            ctx = lrugetctx(lr)
                            if not any(rp[0] in pctx for pctx in ctx.parents()):
                                self._warn(
                                    _(
                                        b"warning: copy source of '%s' not"
                                        b" in parents of %s"
                                    )
                                    % (f, ctx)
                                )
                        fl2 = repo.file(rp[0])
                        if not len(fl2):
                            self._err(
                                lr,
                                _(
                                    b"empty or missing copy source revlog "
                                    b"%s:%s"
                                )
                                % (rp[0], short(rp[1])),
                                f,
                            )
                        elif rp[1] == nullid:
                            ui.note(
                                _(
                                    b"warning: %s@%s: copy source"
                                    b" revision is nullid %s:%s\n"
                                )
                                % (f, lr, rp[0], short(rp[1]))
                            )
                        else:
                            fl2.rev(rp[1])
                except Exception as inst:
                    self._exc(
                        lr, _(b"checking rename of %s") % short(n), inst, f
                    )

            # cross-check
            if f in filenodes:
                fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
                for lr, node in sorted(fns):
                    self._err(
                        lr,
                        _(b"manifest refers to unknown revision %s")
                        % short(node),
                        f,
                    )
        progress.complete()

        if self.warnorphanstorefiles:
            for f in sorted(storefiles):
                self._warn(_(b"warning: orphan data file '%s'") % f)

        return len(files), revisions