store: also return some information about the type of file `walk` found
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 06 Apr 2021 10:38:03 +0200
changeset 46895 6085b7f1536d
parent 46894 fe34c75f62ab
child 46896 cf49e54ef965
store: also return some information about the type of file `walk` found We start returning of 4th information in the `store.walk` return tuple: the type of the file. This will make it easier for caller to determine which kind of file they are looking at. This should especically help with the `upgrade-repo` code that has to do a lot of fragile index's file name comparison. Differential Revision: https://phab.mercurial-scm.org/D10315
hgext/largefiles/lfutil.py
hgext/largefiles/reposetup.py
hgext/narrow/narrowcommands.py
hgext/remotefilelog/contentstore.py
hgext/remotefilelog/remotefilelogserver.py
mercurial/repair.py
mercurial/store.py
mercurial/streamclone.py
mercurial/upgrade_utils/engine.py
mercurial/verify.py
mercurial/wireprotov2server.py
tests/test-persistent-nodemap.t
--- a/hgext/largefiles/lfutil.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/largefiles/lfutil.py	Tue Apr 06 10:38:03 2021 +0200
@@ -514,7 +514,7 @@
 def islfilesrepo(repo):
     '''Return true if the repo is a largefile repo.'''
     if b'largefiles' in repo.requirements and any(
-        shortnameslash in f[0] for f in repo.store.datafiles()
+        shortnameslash in f[1] for f in repo.store.datafiles()
     ):
         return True
 
--- a/hgext/largefiles/reposetup.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/largefiles/reposetup.py	Tue Apr 06 10:38:03 2021 +0200
@@ -445,7 +445,7 @@
 
     def checkrequireslfiles(ui, repo, **kwargs):
         if b'largefiles' not in repo.requirements and any(
-            lfutil.shortname + b'/' in f[0] for f in repo.store.datafiles()
+            lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
         ):
             repo.requirements.add(b'largefiles')
             scmutil.writereporequirements(repo)
--- a/hgext/narrow/narrowcommands.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/narrow/narrowcommands.py	Tue Apr 06 10:38:03 2021 +0200
@@ -276,7 +276,7 @@
                 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
 
         todelete = []
-        for f, f2, size in repo.store.datafiles():
+        for t, f, f2, size in repo.store.datafiles():
             if f.startswith(b'data/'):
                 file = f[5:-2]
                 if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/remotefilelog/contentstore.py	Tue Apr 06 10:38:03 2021 +0200
@@ -365,7 +365,7 @@
             ledger.markdataentry(self, treename, node)
             ledger.markhistoryentry(self, treename, node)
 
-        for path, encoded, size in self._store.datafiles():
+        for t, path, encoded, size in self._store.datafiles():
             if path[:5] != b'meta/' or path[-2:] != b'.i':
                 continue
 
--- a/hgext/remotefilelog/remotefilelogserver.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/hgext/remotefilelog/remotefilelogserver.py	Tue Apr 06 10:38:03 2021 +0200
@@ -164,24 +164,26 @@
                                 b'.d'
                             ):
                                 n = util.pconvert(fp[striplen:])
-                                yield (store.decodedir(n), n, st.st_size)
+                                d = store.decodedir(n)
+                                t = store.FILETYPE_OTHER
+                                yield (t, d, n, st.st_size)
                         if kind == stat.S_IFDIR:
                             visit.append(fp)
 
             if scmutil.istreemanifest(repo):
-                for (u, e, s) in repo.store.datafiles():
+                for (t, u, e, s) in repo.store.datafiles():
                     if u.startswith(b'meta/') and (
                         u.endswith(b'.i') or u.endswith(b'.d')
                     ):
-                        yield (u, e, s)
+                        yield (t, u, e, s)
 
             # Return .d and .i files that do not match the shallow pattern
             match = state.match
             if match and not match.always():
-                for (u, e, s) in repo.store.datafiles():
+                for (t, u, e, s) in repo.store.datafiles():
                     f = u[5:-2]  # trim data/...  and .i/.d
                     if not state.match(f):
-                        yield (u, e, s)
+                        yield (t, u, e, s)
 
             for x in repo.store.topfiles():
                 if state.noflatmf and x[0][:11] == b'00manifest.':
--- a/mercurial/repair.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/repair.py	Tue Apr 06 10:38:03 2021 +0200
@@ -428,7 +428,7 @@
     if scmutil.istreemanifest(repo):
         # This logic is safe if treemanifest isn't enabled, but also
         # pointless, so we skip it if treemanifest isn't enabled.
-        for unencoded, encoded, size in repo.store.datafiles():
+        for t, unencoded, encoded, size in repo.store.datafiles():
             if unencoded.startswith(b'meta/') and unencoded.endswith(
                 b'00manifest.i'
             ):
--- a/mercurial/store.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/store.py	Tue Apr 06 10:38:03 2021 +0200
@@ -387,13 +387,44 @@
     b'requires',
 ]
 
-REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd')
+REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
+REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored')
+
+
+def is_revlog(f, kind, st):
+    if kind != stat.S_IFREG:
+        return None
+    return revlog_type(f)
+
+
+def revlog_type(f):
+    if f.endswith(REVLOG_FILES_MAIN_EXT):
+        return FILEFLAGS_REVLOG_MAIN
+    elif f.endswith(REVLOG_FILES_OTHER_EXT):
+        return FILETYPE_FILELOG_OTHER
 
 
-def isrevlog(f, kind, st):
-    if kind != stat.S_IFREG:
-        return False
-    return f.endswith(REVLOG_FILES_EXT)
+# the file is part of changelog data
+FILEFLAGS_CHANGELOG = 1 << 13
+# the file is part of manifest data
+FILEFLAGS_MANIFESTLOG = 1 << 12
+# the file is part of filelog data
+FILEFLAGS_FILELOG = 1 << 11
+# file that are not directly part of a revlog
+FILEFLAGS_OTHER = 1 << 10
+
+# the main entry point for a revlog
+FILEFLAGS_REVLOG_MAIN = 1 << 1
+# a secondary file for a revlog
+FILEFLAGS_REVLOG_OTHER = 1 << 0
+
+FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
+FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
+FILETYPE_OTHER = FILEFLAGS_OTHER
 
 
 class basicstore(object):
@@ -425,9 +456,10 @@
                 p = visit.pop()
                 for f, kind, st in readdir(p, stat=True):
                     fp = p + b'/' + f
-                    if isrevlog(f, kind, st):
+                    rl_type = is_revlog(f, kind, st)
+                    if rl_type is not None:
                         n = util.pconvert(fp[striplen:])
-                        l.append((decodedir(n), n, st.st_size))
+                        l.append((rl_type, decodedir(n), n, st.st_size))
                     elif kind == stat.S_IFDIR and recurse:
                         visit.append(fp)
         l.sort()
@@ -445,16 +477,25 @@
         return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
 
     def datafiles(self, matcher=None):
-        return self._walk(b'data', True) + self._walk(b'meta', True)
+        files = self._walk(b'data', True) + self._walk(b'meta', True)
+        for (t, u, e, s) in files:
+            yield (FILEFLAGS_FILELOG | t, u, e, s)
 
     def topfiles(self):
         # yield manifest before changelog
-        return reversed(self._walk(b'', False))
+        files = reversed(self._walk(b'', False))
+        for (t, u, e, s) in files:
+            if u.startswith(b'00changelog'):
+                yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+            elif u.startswith(b'00manifest'):
+                yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+            else:
+                yield (FILETYPE_OTHER | t, u, e, s)
 
     def walk(self, matcher=None):
         """return file related to data storage (ie: revlogs)
 
-        yields (unencoded, encoded, size)
+        yields (file_type, unencoded, encoded, size)
 
         if a matcher is passed, storage files of only those tracked paths
         are passed with matches the matcher
@@ -500,14 +541,14 @@
         self.opener = self.vfs
 
     def datafiles(self, matcher=None):
-        for a, b, size in super(encodedstore, self).datafiles():
+        for t, a, b, size in super(encodedstore, self).datafiles():
             try:
                 a = decodefilename(a)
             except KeyError:
                 a = None
             if a is not None and not _matchtrackedpath(a, matcher):
                 continue
-            yield a, b, size
+            yield t, a, b, size
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)
@@ -696,7 +737,9 @@
                 continue
             ef = self.encode(f)
             try:
-                yield f, ef, self.getsize(ef)
+                t = revlog_type(f)
+                t |= FILEFLAGS_FILELOG
+                yield t, f, ef, self.getsize(ef)
             except OSError as err:
                 if err.errno != errno.ENOENT:
                     raise
--- a/mercurial/streamclone.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/streamclone.py	Tue Apr 06 10:38:03 2021 +0200
@@ -243,7 +243,7 @@
     # Get consistent snapshot of repo, lock during scan.
     with repo.lock():
         repo.ui.debug(b'scanning\n')
-        for name, ename, size in _walkstreamfiles(repo):
+        for file_type, name, ename, size in _walkstreamfiles(repo):
             if size:
                 entries.append((name, size))
                 total_bytes += size
@@ -616,7 +616,7 @@
             matcher = narrowspec.match(repo.root, includes, excludes)
 
         repo.ui.debug(b'scanning\n')
-        for name, ename, size in _walkstreamfiles(repo, matcher):
+        for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
             if size:
                 entries.append((_srcstore, name, _fileappend, size))
                 totalfilesize += size
--- a/mercurial/upgrade_utils/engine.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/upgrade_utils/engine.py	Tue Apr 06 10:38:03 2021 +0200
@@ -192,7 +192,7 @@
 
     # Perform a pass to collect metadata. This validates we can open all
     # source files and allows a unified progress bar to be displayed.
-    for unencoded, encoded, size in alldatafiles:
+    for revlog_type, unencoded, encoded, size in alldatafiles:
         if not unencoded.endswith(b'.i'):
             continue
 
--- a/mercurial/verify.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/verify.py	Tue Apr 06 10:38:03 2021 +0200
@@ -416,7 +416,7 @@
             storefiles = set()
             subdirs = set()
             revlogv1 = self.revlogv1
-            for f, f2, size in repo.store.datafiles():
+            for t, f, f2, size in repo.store.datafiles():
                 if not f:
                     self._err(None, _(b"cannot decode filename '%s'") % f2)
                 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
@@ -480,7 +480,7 @@
         ui.status(_(b"checking files\n"))
 
         storefiles = set()
-        for f, f2, size in repo.store.datafiles():
+        for rl_type, f, f2, size in repo.store.datafiles():
             if not f:
                 self._err(None, _(b"cannot decode filename '%s'") % f2)
             elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
--- a/mercurial/wireprotov2server.py	Mon Apr 05 23:54:54 2021 -0400
+++ b/mercurial/wireprotov2server.py	Tue Apr 06 10:38:03 2021 +0200
@@ -1582,7 +1582,8 @@
 
     # TODO this is a bunch of storage layer interface abstractions because
     # it assumes revlogs.
-    for name, encodedname, size in topfiles:
+    for rl_type, name, encodedname, size in topfiles:
+        # XXX use the `rl_type` for that
         if b'changelog' in files and name.startswith(b'00changelog'):
             pass
         elif b'manifestlog' in files and name.startswith(b'00manifest'):
--- a/tests/test-persistent-nodemap.t	Mon Apr 05 23:54:54 2021 -0400
+++ b/tests/test-persistent-nodemap.t	Tue Apr 06 10:38:03 2021 +0200
@@ -754,15 +754,15 @@
 
   $ hg clone -U --stream --config ui.ssh="\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/test-repo stream-clone --debug | egrep '00(changelog|manifest)'
   adding [s] 00manifest.n (70 bytes)
-  adding [s] 00manifest.i (313 KB)
   adding [s] 00manifest.d (452 KB) (no-zstd !)
   adding [s] 00manifest.d (491 KB) (zstd !)
   adding [s] 00manifest-*.nd (118 KB) (glob)
   adding [s] 00changelog.n (70 bytes)
-  adding [s] 00changelog.i (313 KB)
   adding [s] 00changelog.d (360 KB) (no-zstd !)
   adding [s] 00changelog.d (368 KB) (zstd !)
   adding [s] 00changelog-*.nd (118 KB) (glob)
+  adding [s] 00manifest.i (313 KB)
+  adding [s] 00changelog.i (313 KB)
   $ ls -1 stream-clone/.hg/store/ | egrep '00(changelog|manifest)(\.n|-.*\.nd)'
   00changelog-*.nd (glob)
   00changelog.n