Mercurial > hg-stable
changeset 47918:2174f54aab18
store: return just one filename in walk functions
Various walk functions return `(revlog_type, decoded, encoded)` where
decoded could be None. But no-one cares about `encoded` and expects
`unencoded` to be present, except verify (because this can only happen
with old repo formats).
Simplify all this by either failing outright if a decoding a filename
fails (instead of almost certainly failing with a type error due to
treating None as a bytes), or skipping the filename but providing in
an out argument for hg verify.
Differential Revision: https://phab.mercurial-scm.org/D11248
author | Valentin Gatien-Baron <valentin.gatienbaron@gmail.com> |
---|---|
date | Mon, 02 Aug 2021 08:05:13 -0400 |
parents | 517a2c1cb788 |
children | 31a72e5e9200 |
files | hgext/narrow/narrowcommands.py hgext/remotefilelog/contentstore.py hgext/remotefilelog/remotefilelogserver.py mercurial/repair.py mercurial/revlogutils/rewrite.py mercurial/store.py mercurial/streamclone.py mercurial/upgrade_utils/engine.py mercurial/verify.py mercurial/wireprotov2server.py tests/simplestorerepo.py |
diffstat | 11 files changed, 61 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/narrow/narrowcommands.py Sun Aug 01 10:57:21 2021 -0400 +++ b/hgext/narrow/narrowcommands.py Mon Aug 02 08:05:13 2021 -0400 @@ -289,7 +289,7 @@ repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup) todelete = [] - for t, f, f2, size in repo.store.datafiles(): + for t, f, size in repo.store.datafiles(): if f.startswith(b'data/'): file = f[5:-2] if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py Sun Aug 01 10:57:21 2021 -0400 +++ b/hgext/remotefilelog/contentstore.py Mon Aug 02 08:05:13 2021 -0400 @@ -378,7 +378,7 @@ ledger.markdataentry(self, treename, node) ledger.markhistoryentry(self, treename, node) - for t, path, encoded, size in self._store.datafiles(): + for t, path, size in self._store.datafiles(): if path[:5] != b'meta/' or path[-2:] != b'.i': continue
--- a/hgext/remotefilelog/remotefilelogserver.py Sun Aug 01 10:57:21 2021 -0400 +++ b/hgext/remotefilelog/remotefilelogserver.py Mon Aug 02 08:05:13 2021 -0400 @@ -166,24 +166,24 @@ n = util.pconvert(fp[striplen:]) d = store.decodedir(n) t = store.FILETYPE_OTHER - yield (t, d, n, st.st_size) + yield (t, d, st.st_size) if kind == stat.S_IFDIR: visit.append(fp) if scmutil.istreemanifest(repo): - for (t, u, e, s) in repo.store.datafiles(): + for (t, u, s) in repo.store.datafiles(): if u.startswith(b'meta/') and ( u.endswith(b'.i') or u.endswith(b'.d') ): - yield (t, u, e, s) + yield (t, u, s) # Return .d and .i files that do not match the shallow pattern match = state.match if match and not match.always(): - for (t, u, e, s) in repo.store.datafiles(): + for (t, u, s) in repo.store.datafiles(): f = u[5:-2] # trim data/... and .i/.d if not state.match(f): - yield (t, u, e, s) + yield (t, u, s) for x in repo.store.topfiles(): if state.noflatmf and x[1][:11] == b'00manifest.':
--- a/mercurial/repair.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/repair.py Mon Aug 02 08:05:13 2021 -0400 @@ -433,7 +433,7 @@ if scmutil.istreemanifest(repo): # This logic is safe if treemanifest isn't enabled, but also # pointless, so we skip it if treemanifest isn't enabled. - for t, unencoded, encoded, size in repo.store.datafiles(): + for t, unencoded, size in repo.store.datafiles(): if unencoded.startswith(b'meta/') and unencoded.endswith( b'00manifest.i' ):
--- a/mercurial/revlogutils/rewrite.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/revlogutils/rewrite.py Mon Aug 02 08:05:13 2021 -0400 @@ -824,7 +824,7 @@ with context(): files = list( (file_type, path) - for (file_type, path, _e, _s) in repo.store.datafiles() + for (file_type, path, _s) in repo.store.datafiles() if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG )
--- a/mercurial/store.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/store.py Mon Aug 02 08:05:13 2021 -0400 @@ -472,7 +472,7 @@ return self.path + b'/' + encodedir(f) def _walk(self, relpath, recurse): - '''yields (unencoded, encoded, size)''' + '''yields (revlog_type, unencoded, size)''' path = self.path if relpath: path += b'/' + relpath @@ -488,7 +488,7 @@ rl_type = is_revlog(f, kind, st) if rl_type is not None: n = util.pconvert(fp[striplen:]) - l.append((rl_type, decodedir(n), n, st.st_size)) + l.append((rl_type, decodedir(n), st.st_size)) elif kind == stat.S_IFDIR and recurse: visit.append(fp) l.sort() @@ -505,26 +505,32 @@ rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs) return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch) - def datafiles(self, matcher=None): + def datafiles(self, matcher=None, undecodable=None): + """Like walk, but excluding the changelog and root manifest. + + When [undecodable] is None, revlogs names that can't be + decoded cause an exception. When it is provided, it should + be a list and the filenames that can't be decoded are added + to it instead. This is very rarely needed.""" files = self._walk(b'data', True) + self._walk(b'meta', True) - for (t, u, e, s) in files: - yield (FILEFLAGS_FILELOG | t, u, e, s) + for (t, u, s) in files: + yield (FILEFLAGS_FILELOG | t, u, s) def topfiles(self): # yield manifest before changelog files = reversed(self._walk(b'', False)) - for (t, u, e, s) in files: + for (t, u, s) in files: if u.startswith(b'00changelog'): - yield (FILEFLAGS_CHANGELOG | t, u, e, s) + yield (FILEFLAGS_CHANGELOG | t, u, s) elif u.startswith(b'00manifest'): - yield (FILEFLAGS_MANIFESTLOG | t, u, e, s) + yield (FILEFLAGS_MANIFESTLOG | t, u, s) else: - yield (FILETYPE_OTHER | t, u, e, s) + yield (FILETYPE_OTHER | t, u, s) def walk(self, matcher=None): """return file related to data storage (ie: revlogs) - yields (file_type, unencoded, encoded, size) + yields (file_type, unencoded, size) if a matcher is passed, storage files of only those tracked paths are passed with matches the matcher @@ -574,15 +580,20 @@ # However that might change so we should probably add a test and encoding # decoding for it too. see issue6548 - def datafiles(self, matcher=None): - for t, a, b, size in super(encodedstore, self).datafiles(): + def datafiles(self, matcher=None, undecodable=None): + for t, f1, size in super(encodedstore, self).datafiles(): try: - a = decodefilename(a) + f2 = decodefilename(f1) except KeyError: - a = None - if a is not None and not _matchtrackedpath(a, matcher): + if undecodable is None: + msg = _(b'undecodable revlog name %s') % f1 + raise error.StorageError(msg) + else: + undecodable.append(f1) + continue + if not _matchtrackedpath(f2, matcher): continue - yield t, a, b, size + yield t, f2, size def join(self, f): return self.path + b'/' + encodefilename(f) @@ -770,7 +781,7 @@ def getsize(self, path): return self.rawvfs.stat(path).st_size - def datafiles(self, matcher=None): + def datafiles(self, matcher=None, undecodable=None): for f in sorted(self.fncache): if not _matchtrackedpath(f, matcher): continue @@ -779,7 +790,7 @@ t = revlog_type(f) assert t is not None, f t |= FILEFLAGS_FILELOG - yield t, f, ef, self.getsize(ef) + yield t, f, self.getsize(ef) except OSError as err: if err.errno != errno.ENOENT: raise
--- a/mercurial/streamclone.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/streamclone.py Mon Aug 02 08:05:13 2021 -0400 @@ -248,7 +248,7 @@ # Get consistent snapshot of repo, lock during scan. with repo.lock(): repo.ui.debug(b'scanning\n') - for file_type, name, ename, size in _walkstreamfiles(repo): + for file_type, name, size in _walkstreamfiles(repo): if size: entries.append((name, size)) total_bytes += size @@ -650,7 +650,7 @@ if includes or excludes: matcher = narrowspec.match(repo.root, includes, excludes) - for rl_type, name, ename, size in _walkstreamfiles(repo, matcher): + for rl_type, name, size in _walkstreamfiles(repo, matcher): if size: ft = _fileappend if rl_type & store.FILEFLAGS_VOLATILE:
--- a/mercurial/upgrade_utils/engine.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/upgrade_utils/engine.py Mon Aug 02 08:05:13 2021 -0400 @@ -201,7 +201,7 @@ # Perform a pass to collect metadata. This validates we can open all # source files and allows a unified progress bar to be displayed. - for rl_type, unencoded, encoded, size in alldatafiles: + for rl_type, unencoded, size in alldatafiles: if not rl_type & store.FILEFLAGS_REVLOG_MAIN: continue
--- a/mercurial/verify.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/verify.py Mon Aug 02 08:05:13 2021 -0400 @@ -395,12 +395,13 @@ storefiles = set() subdirs = set() revlogv1 = self.revlogv1 - for t, f, f2, size in repo.store.datafiles(): - if not f: - self._err(None, _(b"cannot decode filename '%s'") % f2) - elif (size > 0 or not revlogv1) and f.startswith(b'meta/'): + undecodable = [] + for t, f, size in repo.store.datafiles(undecodable=undecodable): + if (size > 0 or not revlogv1) and f.startswith(b'meta/'): storefiles.add(_normpath(f)) subdirs.add(os.path.dirname(f)) + for f in undecodable: + self._err(None, _(b"cannot decode filename '%s'") % f) subdirprogress = ui.makeprogress( _(b'checking'), unit=_(b'manifests'), total=len(subdirs) ) @@ -459,11 +460,12 @@ ui.status(_(b"checking files\n")) storefiles = set() - for rl_type, f, f2, size in repo.store.datafiles(): - if not f: - self._err(None, _(b"cannot decode filename '%s'") % f2) - elif (size > 0 or not revlogv1) and f.startswith(b'data/'): + undecodable = [] + for t, f, size in repo.store.datafiles(undecodable=undecodable): + if (size > 0 or not revlogv1) and f.startswith(b'data/'): storefiles.add(_normpath(f)) + for f in undecodable: + self._err(None, _(b"cannot decode filename '%s'") % f) state = { # TODO this assumes revlog storage for changelog.
--- a/mercurial/wireprotov2server.py Sun Aug 01 10:57:21 2021 -0400 +++ b/mercurial/wireprotov2server.py Mon Aug 02 08:05:13 2021 -0400 @@ -1579,7 +1579,7 @@ # TODO this is a bunch of storage layer interface abstractions because # it assumes revlogs. - for rl_type, name, encodedname, size in topfiles: + for rl_type, name, size in topfiles: # XXX use the `rl_type` for that if b'changelog' in files and name.startswith(b'00changelog'): pass
--- a/tests/simplestorerepo.py Sun Aug 01 10:57:21 2021 -0400 +++ b/tests/simplestorerepo.py Mon Aug 02 08:05:13 2021 -0400 @@ -665,20 +665,24 @@ class simplestore(store.encodedstore): - def datafiles(self): + def datafiles(self, undecodable=None): for x in super(simplestore, self).datafiles(): yield x # Supplement with non-revlog files. extrafiles = self._walk('data', True, filefilter=issimplestorefile) - for unencoded, encoded, size in extrafiles: + for f1, size in extrafiles: try: - unencoded = store.decodefilename(unencoded) + f2 = store.decodefilename(f1) except KeyError: - unencoded = None + if undecodable is None: + raise error.StorageError(b'undecodable revlog name %s' % f1) + else: + undecodable.append(f1) + continue - yield unencoded, encoded, size + yield f2, size def reposetup(ui, repo):