comparison mercurial/manifest.py @ 28203:7297e9e13a8a

verify: check directory manifests In repos with treemanifests, there is no specific verification of directory manifest revlogs. It simply collects all file nodes by reading each manifest delta. With treemanifests, that's means calling the manifest._slowreaddelta(). If there are missing revlog entries in a subdirectory revlog, 'hg verify' will simply report the exception that occurred while trying to read the root manifest: manifest@0: reading delta 1700e2e92882: meta/b/00manifest.i@67688a370455: no node This patch changes the verify code to load only the root manifest at first and verify all revisions of it, then verify all revisions of each direct subdirectory, and so on, recursively. The above message becomes b/@0: parent-directory manifest refers to unknown revision 67688a370455 Since the new algorithm reads a single revlog at a time and in order, 'hg verify' on a treemanifest version of the hg core repo goes from ~50s to ~14s. As expected, there is no significant difference on a repo with flat manifests.
author Martin von Zweigbergk <martinvonz@google.com>
date Sun, 07 Feb 2016 21:13:24 -0800
parents 2df7f5c09c34
children 8ab91d9290ce
comparison
equal deleted inserted replaced
28202:a4692267bc2d 28203:7297e9e13a8a
322 c._lm = self._lm.copy() 322 c._lm = self._lm.copy()
323 return c 323 return c
324 324
325 def iteritems(self): 325 def iteritems(self):
326 return (x[:2] for x in self._lm.iterentries()) 326 return (x[:2] for x in self._lm.iterentries())
327
328 def iterentries(self):
329 return self._lm.iterentries()
327 330
328 def text(self, usemanifestv2=False): 331 def text(self, usemanifestv2=False):
329 if usemanifestv2: 332 if usemanifestv2:
330 return _textv2(self._lm.iterentries()) 333 return _textv2(self._lm.iterentries())
331 else: 334 else:
918 if self._treeinmem: 921 if self._treeinmem:
919 return treemanifest(self._dir, data) 922 return treemanifest(self._dir, data)
920 return manifestdict(data) 923 return manifestdict(data)
921 924
922 def dirlog(self, dir): 925 def dirlog(self, dir):
923 assert self._treeondisk 926 if dir:
927 assert self._treeondisk
924 if dir not in self._dirlogcache: 928 if dir not in self._dirlogcache:
925 self._dirlogcache[dir] = manifest(self.opener, dir, 929 self._dirlogcache[dir] = manifest(self.opener, dir,
926 self._dirlogcache) 930 self._dirlogcache)
927 return self._dirlogcache[dir] 931 return self._dirlogcache[dir]
928 932
942 if self._usemanifestv2 or self._treeondisk: 946 if self._usemanifestv2 or self._treeondisk:
943 return self._slowreaddelta(node) 947 return self._slowreaddelta(node)
944 r = self.rev(node) 948 r = self.rev(node)
945 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r)) 949 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
946 return self._newmanifest(d) 950 return self._newmanifest(d)
951
952 def readshallowdelta(self, node):
953 '''For flat manifests, this is the same as readdelta(). For
954 treemanifests, this will read the delta for this revlog's directory,
955 without recursively reading subdirectory manifests. Instead, any
956 subdirectory entry will be reported as it appears in the manifests, i.e.
957 the subdirectory will be reported among files and distinguished only by
958 its 't' flag.'''
959 if not self._treeondisk:
960 return self.readdelta(node)
961 if self._usemanifestv2:
962 raise error.Abort(
963 "readshallowdelta() not implemented for manifestv2")
964 r = self.rev(node)
965 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
966 return manifestdict(d)
947 967
948 def readfast(self, node): 968 def readfast(self, node):
949 '''use the faster of readdelta or read 969 '''use the faster of readdelta or read
950 970
951 This will return a manifest which is either only the files 971 This will return a manifest which is either only the files