store: also return some information about the type of file `walk` found
We start returning of 4th information in the `store.walk` return tuple: the type of the file. This will make it easier for caller to determine which kind of file they are looking at. This should especically help with the `upgrade-repo` code that has to do a lot of fragile index's file name comparison.
Differential Revision: https://phab.mercurial-scm.org/D10315
--- a/hgext/largefiles/lfutil.py Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/largefiles/lfutil.py Tue Apr 06 10:38:03 2021 +0200
@@ -514,7 +514,7 @@
def islfilesrepo(repo):
'''Return true if the repo is a largefile repo.'''
if b'largefiles' in repo.requirements and any(
- shortnameslash in f[0] for f in repo.store.datafiles()
+ shortnameslash in f[1] for f in repo.store.datafiles()
):
return True
--- a/hgext/largefiles/reposetup.py Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/largefiles/reposetup.py Tue Apr 06 10:38:03 2021 +0200
@@ -445,7 +445,7 @@
def checkrequireslfiles(ui, repo, **kwargs):
if b'largefiles' not in repo.requirements and any(
- lfutil.shortname + b'/' in f[0] for f in repo.store.datafiles()
+ lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
):
repo.requirements.add(b'largefiles')
scmutil.writereporequirements(repo)
--- a/hgext/narrow/narrowcommands.py Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/narrow/narrowcommands.py Tue Apr 06 10:38:03 2021 +0200
@@ -276,7 +276,7 @@
repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
todelete = []
- for f, f2, size in repo.store.datafiles():
+ for t, f, f2, size in repo.store.datafiles():
if f.startswith(b'data/'):
file = f[5:-2]
if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/remotefilelog/contentstore.py Tue Apr 06 10:38:03 2021 +0200
@@ -365,7 +365,7 @@
ledger.markdataentry(self, treename, node)
ledger.markhistoryentry(self, treename, node)
- for path, encoded, size in self._store.datafiles():
+ for t, path, encoded, size in self._store.datafiles():
if path[:5] != b'meta/' or path[-2:] != b'.i':
continue
--- a/hgext/remotefilelog/remotefilelogserver.py Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/remotefilelog/remotefilelogserver.py Tue Apr 06 10:38:03 2021 +0200
@@ -164,24 +164,26 @@
b'.d'
):
n = util.pconvert(fp[striplen:])
- yield (store.decodedir(n), n, st.st_size)
+ d = store.decodedir(n)
+ t = store.FILETYPE_OTHER
+ yield (t, d, n, st.st_size)
if kind == stat.S_IFDIR:
visit.append(fp)
if scmutil.istreemanifest(repo):
- for (u, e, s) in repo.store.datafiles():
+ for (t, u, e, s) in repo.store.datafiles():
if u.startswith(b'meta/') and (
u.endswith(b'.i') or u.endswith(b'.d')
):
- yield (u, e, s)
+ yield (t, u, e, s)
# Return .d and .i files that do not match the shallow pattern
match = state.match
if match and not match.always():
- for (u, e, s) in repo.store.datafiles():
+ for (t, u, e, s) in repo.store.datafiles():
f = u[5:-2] # trim data/... and .i/.d
if not state.match(f):
- yield (u, e, s)
+ yield (t, u, e, s)
for x in repo.store.topfiles():
if state.noflatmf and x[0][:11] == b'00manifest.':
--- a/mercurial/repair.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/repair.py Tue Apr 06 10:38:03 2021 +0200
@@ -428,7 +428,7 @@
if scmutil.istreemanifest(repo):
# This logic is safe if treemanifest isn't enabled, but also
# pointless, so we skip it if treemanifest isn't enabled.
- for unencoded, encoded, size in repo.store.datafiles():
+ for t, unencoded, encoded, size in repo.store.datafiles():
if unencoded.startswith(b'meta/') and unencoded.endswith(
b'00manifest.i'
):
--- a/mercurial/store.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/store.py Tue Apr 06 10:38:03 2021 +0200
@@ -387,13 +387,44 @@
b'requires',
]
-REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd')
+REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
+REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored')
+
+
+def is_revlog(f, kind, st):
+ if kind != stat.S_IFREG:
+ return None
+ return revlog_type(f)
+
+
+def revlog_type(f):
+ if f.endswith(REVLOG_FILES_MAIN_EXT):
+ return FILEFLAGS_REVLOG_MAIN
+ elif f.endswith(REVLOG_FILES_OTHER_EXT):
+ return FILETYPE_FILELOG_OTHER
-def isrevlog(f, kind, st):
- if kind != stat.S_IFREG:
- return False
- return f.endswith(REVLOG_FILES_EXT)
+# the file is part of changelog data
+FILEFLAGS_CHANGELOG = 1 << 13
+# the file is part of manifest data
+FILEFLAGS_MANIFESTLOG = 1 << 12
+# the file is part of filelog data
+FILEFLAGS_FILELOG = 1 << 11
+# file that are not directly part of a revlog
+FILEFLAGS_OTHER = 1 << 10
+
+# the main entry point for a revlog
+FILEFLAGS_REVLOG_MAIN = 1 << 1
+# a secondary file for a revlog
+FILEFLAGS_REVLOG_OTHER = 1 << 0
+
+FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_OTHER = FILEFLAGS_OTHER
class basicstore(object):
@@ -425,9 +456,10 @@
p = visit.pop()
for f, kind, st in readdir(p, stat=True):
fp = p + b'/' + f
- if isrevlog(f, kind, st):
+ rl_type = is_revlog(f, kind, st)
+ if rl_type is not None:
n = util.pconvert(fp[striplen:])
- l.append((decodedir(n), n, st.st_size))
+ l.append((rl_type, decodedir(n), n, st.st_size))
elif kind == stat.S_IFDIR and recurse:
visit.append(fp)
l.sort()
@@ -445,16 +477,25 @@
return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
def datafiles(self, matcher=None):
- return self._walk(b'data', True) + self._walk(b'meta', True)
+ files = self._walk(b'data', True) + self._walk(b'meta', True)
+ for (t, u, e, s) in files:
+ yield (FILEFLAGS_FILELOG | t, u, e, s)
def topfiles(self):
# yield manifest before changelog
- return reversed(self._walk(b'', False))
+ files = reversed(self._walk(b'', False))
+ for (t, u, e, s) in files:
+ if u.startswith(b'00changelog'):
+ yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+ elif u.startswith(b'00manifest'):
+ yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+ else:
+ yield (FILETYPE_OTHER | t, u, e, s)
def walk(self, matcher=None):
"""return file related to data storage (ie: revlogs)
- yields (unencoded, encoded, size)
+ yields (file_type, unencoded, encoded, size)
if a matcher is passed, storage files of only those tracked paths
are passed with matches the matcher
@@ -500,14 +541,14 @@
self.opener = self.vfs
def datafiles(self, matcher=None):
- for a, b, size in super(encodedstore, self).datafiles():
+ for t, a, b, size in super(encodedstore, self).datafiles():
try:
a = decodefilename(a)
except KeyError:
a = None
if a is not None and not _matchtrackedpath(a, matcher):
continue
- yield a, b, size
+ yield t, a, b, size
def join(self, f):
return self.path + b'/' + encodefilename(f)
@@ -696,7 +737,9 @@
continue
ef = self.encode(f)
try:
- yield f, ef, self.getsize(ef)
+ t = revlog_type(f)
+ t |= FILEFLAGS_FILELOG
+ yield t, f, ef, self.getsize(ef)
except OSError as err:
if err.errno != errno.ENOENT:
raise
--- a/mercurial/streamclone.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/streamclone.py Tue Apr 06 10:38:03 2021 +0200
@@ -243,7 +243,7 @@
# Get consistent snapshot of repo, lock during scan.
with repo.lock():
repo.ui.debug(b'scanning\n')
- for name, ename, size in _walkstreamfiles(repo):
+ for file_type, name, ename, size in _walkstreamfiles(repo):
if size:
entries.append((name, size))
total_bytes += size
@@ -616,7 +616,7 @@
matcher = narrowspec.match(repo.root, includes, excludes)
repo.ui.debug(b'scanning\n')
- for name, ename, size in _walkstreamfiles(repo, matcher):
+ for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
if size:
entries.append((_srcstore, name, _fileappend, size))
totalfilesize += size
--- a/mercurial/upgrade_utils/engine.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/upgrade_utils/engine.py Tue Apr 06 10:38:03 2021 +0200
@@ -192,7 +192,7 @@
# Perform a pass to collect metadata. This validates we can open all
# source files and allows a unified progress bar to be displayed.
- for unencoded, encoded, size in alldatafiles:
+ for revlog_type, unencoded, encoded, size in alldatafiles:
if not unencoded.endswith(b'.i'):
continue
--- a/mercurial/verify.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/verify.py Tue Apr 06 10:38:03 2021 +0200
@@ -416,7 +416,7 @@
storefiles = set()
subdirs = set()
revlogv1 = self.revlogv1
- for f, f2, size in repo.store.datafiles():
+ for t, f, f2, size in repo.store.datafiles():
if not f:
self._err(None, _(b"cannot decode filename '%s'") % f2)
elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
@@ -480,7 +480,7 @@
ui.status(_(b"checking files\n"))
storefiles = set()
- for f, f2, size in repo.store.datafiles():
+ for rl_type, f, f2, size in repo.store.datafiles():
if not f:
self._err(None, _(b"cannot decode filename '%s'") % f2)
elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
--- a/mercurial/wireprotov2server.py Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/wireprotov2server.py Tue Apr 06 10:38:03 2021 +0200
@@ -1582,7 +1582,8 @@
# TODO this is a bunch of storage layer interface abstractions because
# it assumes revlogs.
- for name, encodedname, size in topfiles:
+ for rl_type, name, encodedname, size in topfiles:
+ # XXX use the `rl_type` for that
if b'changelog' in files and name.startswith(b'00changelog'):
pass
elif b'manifestlog' in files and name.startswith(b'00manifest'):
--- a/tests/test-persistent-nodemap.t Mon Apr 05 23:54:54 2021 -0400
+++ b/tests/test-persistent-nodemap.t Tue Apr 06 10:38:03 2021 +0200
@@ -754,15 +754,15 @@
$ hg clone -U --stream --config ui.ssh="\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/test-repo stream-clone --debug | egrep '00(changelog|manifest)'
adding [s] 00manifest.n (70 bytes)
- adding [s] 00manifest.i (313 KB)
adding [s] 00manifest.d (452 KB) (no-zstd !)
adding [s] 00manifest.d (491 KB) (zstd !)
adding [s] 00manifest-*.nd (118 KB) (glob)
adding [s] 00changelog.n (70 bytes)
- adding [s] 00changelog.i (313 KB)
adding [s] 00changelog.d (360 KB) (no-zstd !)
adding [s] 00changelog.d (368 KB) (zstd !)
adding [s] 00changelog-*.nd (118 KB) (glob)
+ adding [s] 00manifest.i (313 KB)
+ adding [s] 00changelog.i (313 KB)
$ ls -1 stream-clone/.hg/store/ | egrep '00(changelog|manifest)(\.n|-.*\.nd)'
00changelog-*.nd (glob)
00changelog.n