store: return just one filename in walk functions
Various walk functions return `(revlog_type, decoded, encoded)` where
decoded could be None. But no-one cares about `encoded` and expects
`unencoded` to be present, except verify (because this can only happen
with old repo formats).
Simplify all this by either failing outright if a decoding a filename
fails (instead of almost certainly failing with a type error due to
treating None as a bytes), or skipping the filename but providing in
an out argument for hg verify.
Differential Revision: https://phab.mercurial-scm.org/D11248
--- a/hgext/narrow/narrowcommands.py Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/narrow/narrowcommands.py Mon Aug 02 08:05:13 2021 -0400
@@ -289,7 +289,7 @@
repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
todelete = []
- for t, f, f2, size in repo.store.datafiles():
+ for t, f, size in repo.store.datafiles():
if f.startswith(b'data/'):
file = f[5:-2]
if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/remotefilelog/contentstore.py Mon Aug 02 08:05:13 2021 -0400
@@ -378,7 +378,7 @@
ledger.markdataentry(self, treename, node)
ledger.markhistoryentry(self, treename, node)
- for t, path, encoded, size in self._store.datafiles():
+ for t, path, size in self._store.datafiles():
if path[:5] != b'meta/' or path[-2:] != b'.i':
continue
--- a/hgext/remotefilelog/remotefilelogserver.py Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/remotefilelog/remotefilelogserver.py Mon Aug 02 08:05:13 2021 -0400
@@ -166,24 +166,24 @@
n = util.pconvert(fp[striplen:])
d = store.decodedir(n)
t = store.FILETYPE_OTHER
- yield (t, d, n, st.st_size)
+ yield (t, d, st.st_size)
if kind == stat.S_IFDIR:
visit.append(fp)
if scmutil.istreemanifest(repo):
- for (t, u, e, s) in repo.store.datafiles():
+ for (t, u, s) in repo.store.datafiles():
if u.startswith(b'meta/') and (
u.endswith(b'.i') or u.endswith(b'.d')
):
- yield (t, u, e, s)
+ yield (t, u, s)
# Return .d and .i files that do not match the shallow pattern
match = state.match
if match and not match.always():
- for (t, u, e, s) in repo.store.datafiles():
+ for (t, u, s) in repo.store.datafiles():
f = u[5:-2] # trim data/... and .i/.d
if not state.match(f):
- yield (t, u, e, s)
+ yield (t, u, s)
for x in repo.store.topfiles():
if state.noflatmf and x[1][:11] == b'00manifest.':
--- a/mercurial/repair.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/repair.py Mon Aug 02 08:05:13 2021 -0400
@@ -433,7 +433,7 @@
if scmutil.istreemanifest(repo):
# This logic is safe if treemanifest isn't enabled, but also
# pointless, so we skip it if treemanifest isn't enabled.
- for t, unencoded, encoded, size in repo.store.datafiles():
+ for t, unencoded, size in repo.store.datafiles():
if unencoded.startswith(b'meta/') and unencoded.endswith(
b'00manifest.i'
):
--- a/mercurial/revlogutils/rewrite.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/revlogutils/rewrite.py Mon Aug 02 08:05:13 2021 -0400
@@ -824,7 +824,7 @@
with context():
files = list(
(file_type, path)
- for (file_type, path, _e, _s) in repo.store.datafiles()
+ for (file_type, path, _s) in repo.store.datafiles()
if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
)
--- a/mercurial/store.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/store.py Mon Aug 02 08:05:13 2021 -0400
@@ -472,7 +472,7 @@
return self.path + b'/' + encodedir(f)
def _walk(self, relpath, recurse):
- '''yields (unencoded, encoded, size)'''
+ '''yields (revlog_type, unencoded, size)'''
path = self.path
if relpath:
path += b'/' + relpath
@@ -488,7 +488,7 @@
rl_type = is_revlog(f, kind, st)
if rl_type is not None:
n = util.pconvert(fp[striplen:])
- l.append((rl_type, decodedir(n), n, st.st_size))
+ l.append((rl_type, decodedir(n), st.st_size))
elif kind == stat.S_IFDIR and recurse:
visit.append(fp)
l.sort()
@@ -505,26 +505,32 @@
rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
- def datafiles(self, matcher=None):
+ def datafiles(self, matcher=None, undecodable=None):
+ """Like walk, but excluding the changelog and root manifest.
+
+ When [undecodable] is None, revlogs names that can't be
+ decoded cause an exception. When it is provided, it should
+ be a list and the filenames that can't be decoded are added
+ to it instead. This is very rarely needed."""
files = self._walk(b'data', True) + self._walk(b'meta', True)
- for (t, u, e, s) in files:
- yield (FILEFLAGS_FILELOG | t, u, e, s)
+ for (t, u, s) in files:
+ yield (FILEFLAGS_FILELOG | t, u, s)
def topfiles(self):
# yield manifest before changelog
files = reversed(self._walk(b'', False))
- for (t, u, e, s) in files:
+ for (t, u, s) in files:
if u.startswith(b'00changelog'):
- yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+ yield (FILEFLAGS_CHANGELOG | t, u, s)
elif u.startswith(b'00manifest'):
- yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+ yield (FILEFLAGS_MANIFESTLOG | t, u, s)
else:
- yield (FILETYPE_OTHER | t, u, e, s)
+ yield (FILETYPE_OTHER | t, u, s)
def walk(self, matcher=None):
"""return file related to data storage (ie: revlogs)
- yields (file_type, unencoded, encoded, size)
+ yields (file_type, unencoded, size)
if a matcher is passed, storage files of only those tracked paths
are passed with matches the matcher
@@ -574,15 +580,20 @@
# However that might change so we should probably add a test and encoding
# decoding for it too. see issue6548
- def datafiles(self, matcher=None):
- for t, a, b, size in super(encodedstore, self).datafiles():
+ def datafiles(self, matcher=None, undecodable=None):
+ for t, f1, size in super(encodedstore, self).datafiles():
try:
- a = decodefilename(a)
+ f2 = decodefilename(f1)
except KeyError:
- a = None
- if a is not None and not _matchtrackedpath(a, matcher):
+ if undecodable is None:
+ msg = _(b'undecodable revlog name %s') % f1
+ raise error.StorageError(msg)
+ else:
+ undecodable.append(f1)
+ continue
+ if not _matchtrackedpath(f2, matcher):
continue
- yield t, a, b, size
+ yield t, f2, size
def join(self, f):
return self.path + b'/' + encodefilename(f)
@@ -770,7 +781,7 @@
def getsize(self, path):
return self.rawvfs.stat(path).st_size
- def datafiles(self, matcher=None):
+ def datafiles(self, matcher=None, undecodable=None):
for f in sorted(self.fncache):
if not _matchtrackedpath(f, matcher):
continue
@@ -779,7 +790,7 @@
t = revlog_type(f)
assert t is not None, f
t |= FILEFLAGS_FILELOG
- yield t, f, ef, self.getsize(ef)
+ yield t, f, self.getsize(ef)
except OSError as err:
if err.errno != errno.ENOENT:
raise
--- a/mercurial/streamclone.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/streamclone.py Mon Aug 02 08:05:13 2021 -0400
@@ -248,7 +248,7 @@
# Get consistent snapshot of repo, lock during scan.
with repo.lock():
repo.ui.debug(b'scanning\n')
- for file_type, name, ename, size in _walkstreamfiles(repo):
+ for file_type, name, size in _walkstreamfiles(repo):
if size:
entries.append((name, size))
total_bytes += size
@@ -650,7 +650,7 @@
if includes or excludes:
matcher = narrowspec.match(repo.root, includes, excludes)
- for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
+ for rl_type, name, size in _walkstreamfiles(repo, matcher):
if size:
ft = _fileappend
if rl_type & store.FILEFLAGS_VOLATILE:
--- a/mercurial/upgrade_utils/engine.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/upgrade_utils/engine.py Mon Aug 02 08:05:13 2021 -0400
@@ -201,7 +201,7 @@
# Perform a pass to collect metadata. This validates we can open all
# source files and allows a unified progress bar to be displayed.
- for rl_type, unencoded, encoded, size in alldatafiles:
+ for rl_type, unencoded, size in alldatafiles:
if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
continue
--- a/mercurial/verify.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/verify.py Mon Aug 02 08:05:13 2021 -0400
@@ -395,12 +395,13 @@
storefiles = set()
subdirs = set()
revlogv1 = self.revlogv1
- for t, f, f2, size in repo.store.datafiles():
- if not f:
- self._err(None, _(b"cannot decode filename '%s'") % f2)
- elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
+ undecodable = []
+ for t, f, size in repo.store.datafiles(undecodable=undecodable):
+ if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
storefiles.add(_normpath(f))
subdirs.add(os.path.dirname(f))
+ for f in undecodable:
+ self._err(None, _(b"cannot decode filename '%s'") % f)
subdirprogress = ui.makeprogress(
_(b'checking'), unit=_(b'manifests'), total=len(subdirs)
)
@@ -459,11 +460,12 @@
ui.status(_(b"checking files\n"))
storefiles = set()
- for rl_type, f, f2, size in repo.store.datafiles():
- if not f:
- self._err(None, _(b"cannot decode filename '%s'") % f2)
- elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
+ undecodable = []
+ for t, f, size in repo.store.datafiles(undecodable=undecodable):
+ if (size > 0 or not revlogv1) and f.startswith(b'data/'):
storefiles.add(_normpath(f))
+ for f in undecodable:
+ self._err(None, _(b"cannot decode filename '%s'") % f)
state = {
# TODO this assumes revlog storage for changelog.
--- a/mercurial/wireprotov2server.py Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/wireprotov2server.py Mon Aug 02 08:05:13 2021 -0400
@@ -1579,7 +1579,7 @@
# TODO this is a bunch of storage layer interface abstractions because
# it assumes revlogs.
- for rl_type, name, encodedname, size in topfiles:
+ for rl_type, name, size in topfiles:
# XXX use the `rl_type` for that
if b'changelog' in files and name.startswith(b'00changelog'):
pass
--- a/tests/simplestorerepo.py Sun Aug 01 10:57:21 2021 -0400
+++ b/tests/simplestorerepo.py Mon Aug 02 08:05:13 2021 -0400
@@ -665,20 +665,24 @@
class simplestore(store.encodedstore):
- def datafiles(self):
+ def datafiles(self, undecodable=None):
for x in super(simplestore, self).datafiles():
yield x
# Supplement with non-revlog files.
extrafiles = self._walk('data', True, filefilter=issimplestorefile)
- for unencoded, encoded, size in extrafiles:
+ for f1, size in extrafiles:
try:
- unencoded = store.decodefilename(unencoded)
+ f2 = store.decodefilename(f1)
except KeyError:
- unencoded = None
+ if undecodable is None:
+ raise error.StorageError(b'undecodable revlog name %s' % f1)
+ else:
+ undecodable.append(f1)
+ continue
- yield unencoded, encoded, size
+ yield f2, size
def reposetup(ui, repo):