# HG changeset patch # User Martin von Zweigbergk # Date 1485197335 28800 # Node ID 5249b6470de9fbb4da7c32506e4560abc9c24751 # Parent 01512564ddd5770d4d3f0c77597f64be8c80aba7 verify: replace _validpath() by matcher The verifier calls out to _validpath() to check if it should verify that path and the narrowhg extension overrides _validpath() to tell the verifier to skip that path. In treemanifest repos, the verifier calls the same method to check if it should visit a directory. However, the decision to visit a directory is different from the condition that it's a matching path, and narrowhg was working around it by returning True from its _validpath() override if *either* was true. Similar to how one can do "hg files -I foo/bar/ -X foo/" (making the include pointless), narrowhg can be configured to track the same paths. In that case match("foo/bar/baz") would be false, but match.visitdir("foo/bar/baz") turns out to be true, causing verify to fail. This may seem like a bug in visitdir(), but it's explicitly documented to be undefined for subdirectories of excluded directories. When using treemanifests, the walk would not descend into foo/, so verification would pass. However, when using flat manifests, there is no recursive directory walk and the file path "foo/bar/baz" would be passed to _validpath() without "foo/" (actually without the slash) being passed first. As explained above, _validpath() would return true for the file path and "hg verify" would fail. Replacing the _validpath() method by a matcher seems like the obvious fix. Narrowhg can then pass in its own matcher and not have to conflate the two matching functions (for dirs and files). I think it also makes the code clearer. diff -r 01512564ddd5 -r 5249b6470de9 mercurial/verify.py --- a/mercurial/verify.py Wed Feb 01 08:47:27 2017 -0800 +++ b/mercurial/verify.py Mon Jan 23 10:48:55 2017 -0800 @@ -18,6 +18,7 @@ from . import ( error, revlog, + scmutil, util, ) @@ -32,21 +33,13 @@ f = f.replace('//', '/') return f -def _validpath(repo, path): - """Returns False if a path should NOT be treated as part of a repo. - - For all in-core cases, this returns True, as we have no way for a - path to be mentioned in the history but not actually be - relevant. For narrow clones, this is important because many - filelogs will be missing, and changelog entries may mention - modified files that are outside the narrow scope. - """ - return True - class verifier(object): - def __init__(self, repo): + # The match argument is always None in hg core, but e.g. the narrowhg + # extension will pass in a matcher here. + def __init__(self, repo, match=None): self.repo = repo.unfiltered() self.ui = repo.ui + self.match = match or scmutil.matchall(repo) self.badrevs = set() self.errors = 0 self.warnings = 0 @@ -170,6 +163,7 @@ def _verifychangelog(self): ui = self.ui repo = self.repo + match = self.match cl = repo.changelog ui.status(_("checking changesets\n")) @@ -189,7 +183,7 @@ mflinkrevs.setdefault(changes[0], []).append(i) self.refersmf = True for f in changes[3]: - if _validpath(repo, f): + if match(f): filelinkrevs.setdefault(_normpath(f), []).append(i) except Exception as inst: self.refersmf = True @@ -201,6 +195,7 @@ progress=None): repo = self.repo ui = self.ui + match = self.match mfl = self.repo.manifestlog mf = mfl._revlog.dirlog(dir) @@ -243,12 +238,14 @@ elif f == "/dev/null": # ignore this in very old repos continue fullpath = dir + _normpath(f) - if not _validpath(repo, fullpath): - continue if fl == 't': + if not match.visitdir(fullpath): + continue subdirnodes.setdefault(fullpath + '/', {}).setdefault( fn, []).append(lr) else: + if not match(fullpath): + continue filenodes.setdefault(fullpath, {}).setdefault(fn, lr) except Exception as inst: self.exc(lr, _("reading delta %s") % short(n), inst, label)