Mercurial > hg
changeset 46895:6085b7f1536d
store: also return some information about the type of file `walk` found
We start returning of 4th information in the `store.walk` return tuple: the type of the file. This will make it easier for caller to determine which kind of file they are looking at. This should especically help with the `upgrade-repo` code that has to do a lot of fragile index's file name comparison.
Differential Revision: https://phab.mercurial-scm.org/D10315
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 06 Apr 2021 10:38:03 +0200 |
parents | fe34c75f62ab |
children | cf49e54ef965 |
files | hgext/largefiles/lfutil.py hgext/largefiles/reposetup.py hgext/narrow/narrowcommands.py hgext/remotefilelog/contentstore.py hgext/remotefilelog/remotefilelogserver.py mercurial/repair.py mercurial/store.py mercurial/streamclone.py mercurial/upgrade_utils/engine.py mercurial/verify.py mercurial/wireprotov2server.py tests/test-persistent-nodemap.t |
diffstat | 12 files changed, 77 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/largefiles/lfutil.py Mon Apr 05 23:54:54 2021 -0400 +++ b/hgext/largefiles/lfutil.py Tue Apr 06 10:38:03 2021 +0200 @@ -514,7 +514,7 @@ def islfilesrepo(repo): '''Return true if the repo is a largefile repo.''' if b'largefiles' in repo.requirements and any( - shortnameslash in f[0] for f in repo.store.datafiles() + shortnameslash in f[1] for f in repo.store.datafiles() ): return True
--- a/hgext/largefiles/reposetup.py Mon Apr 05 23:54:54 2021 -0400 +++ b/hgext/largefiles/reposetup.py Tue Apr 06 10:38:03 2021 +0200 @@ -445,7 +445,7 @@ def checkrequireslfiles(ui, repo, **kwargs): if b'largefiles' not in repo.requirements and any( - lfutil.shortname + b'/' in f[0] for f in repo.store.datafiles() + lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles() ): repo.requirements.add(b'largefiles') scmutil.writereporequirements(repo)
--- a/hgext/narrow/narrowcommands.py Mon Apr 05 23:54:54 2021 -0400 +++ b/hgext/narrow/narrowcommands.py Tue Apr 06 10:38:03 2021 +0200 @@ -276,7 +276,7 @@ repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup) todelete = [] - for f, f2, size in repo.store.datafiles(): + for t, f, f2, size in repo.store.datafiles(): if f.startswith(b'data/'): file = f[5:-2] if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py Mon Apr 05 23:54:54 2021 -0400 +++ b/hgext/remotefilelog/contentstore.py Tue Apr 06 10:38:03 2021 +0200 @@ -365,7 +365,7 @@ ledger.markdataentry(self, treename, node) ledger.markhistoryentry(self, treename, node) - for path, encoded, size in self._store.datafiles(): + for t, path, encoded, size in self._store.datafiles(): if path[:5] != b'meta/' or path[-2:] != b'.i': continue
--- a/hgext/remotefilelog/remotefilelogserver.py Mon Apr 05 23:54:54 2021 -0400 +++ b/hgext/remotefilelog/remotefilelogserver.py Tue Apr 06 10:38:03 2021 +0200 @@ -164,24 +164,26 @@ b'.d' ): n = util.pconvert(fp[striplen:]) - yield (store.decodedir(n), n, st.st_size) + d = store.decodedir(n) + t = store.FILETYPE_OTHER + yield (t, d, n, st.st_size) if kind == stat.S_IFDIR: visit.append(fp) if scmutil.istreemanifest(repo): - for (u, e, s) in repo.store.datafiles(): + for (t, u, e, s) in repo.store.datafiles(): if u.startswith(b'meta/') and ( u.endswith(b'.i') or u.endswith(b'.d') ): - yield (u, e, s) + yield (t, u, e, s) # Return .d and .i files that do not match the shallow pattern match = state.match if match and not match.always(): - for (u, e, s) in repo.store.datafiles(): + for (t, u, e, s) in repo.store.datafiles(): f = u[5:-2] # trim data/... and .i/.d if not state.match(f): - yield (u, e, s) + yield (t, u, e, s) for x in repo.store.topfiles(): if state.noflatmf and x[0][:11] == b'00manifest.':
--- a/mercurial/repair.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/repair.py Tue Apr 06 10:38:03 2021 +0200 @@ -428,7 +428,7 @@ if scmutil.istreemanifest(repo): # This logic is safe if treemanifest isn't enabled, but also # pointless, so we skip it if treemanifest isn't enabled. - for unencoded, encoded, size in repo.store.datafiles(): + for t, unencoded, encoded, size in repo.store.datafiles(): if unencoded.startswith(b'meta/') and unencoded.endswith( b'00manifest.i' ):
--- a/mercurial/store.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/store.py Tue Apr 06 10:38:03 2021 +0200 @@ -387,13 +387,44 @@ b'requires', ] -REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd') +REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') +REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored') + + +def is_revlog(f, kind, st): + if kind != stat.S_IFREG: + return None + return revlog_type(f) + + +def revlog_type(f): + if f.endswith(REVLOG_FILES_MAIN_EXT): + return FILEFLAGS_REVLOG_MAIN + elif f.endswith(REVLOG_FILES_OTHER_EXT): + return FILETYPE_FILELOG_OTHER -def isrevlog(f, kind, st): - if kind != stat.S_IFREG: - return False - return f.endswith(REVLOG_FILES_EXT) +# the file is part of changelog data +FILEFLAGS_CHANGELOG = 1 << 13 +# the file is part of manifest data +FILEFLAGS_MANIFESTLOG = 1 << 12 +# the file is part of filelog data +FILEFLAGS_FILELOG = 1 << 11 +# file that are not directly part of a revlog +FILEFLAGS_OTHER = 1 << 10 + +# the main entry point for a revlog +FILEFLAGS_REVLOG_MAIN = 1 << 1 +# a secondary file for a revlog +FILEFLAGS_REVLOG_OTHER = 1 << 0 + +FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_OTHER = FILEFLAGS_OTHER class basicstore(object): @@ -425,9 +456,10 @@ p = visit.pop() for f, kind, st in readdir(p, stat=True): fp = p + b'/' + f - if isrevlog(f, kind, st): + rl_type = is_revlog(f, kind, st) + if rl_type is not None: n = util.pconvert(fp[striplen:]) - l.append((decodedir(n), n, st.st_size)) + l.append((rl_type, decodedir(n), n, st.st_size)) elif kind == stat.S_IFDIR and recurse: visit.append(fp) l.sort() @@ -445,16 +477,25 @@ return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch) def datafiles(self, matcher=None): - return self._walk(b'data', True) + self._walk(b'meta', True) + files = self._walk(b'data', True) + self._walk(b'meta', True) + for (t, u, e, s) in files: + yield (FILEFLAGS_FILELOG | t, u, e, s) def topfiles(self): # yield manifest before changelog - return reversed(self._walk(b'', False)) + files = reversed(self._walk(b'', False)) + for (t, u, e, s) in files: + if u.startswith(b'00changelog'): + yield (FILEFLAGS_CHANGELOG | t, u, e, s) + elif u.startswith(b'00manifest'): + yield (FILEFLAGS_MANIFESTLOG | t, u, e, s) + else: + yield (FILETYPE_OTHER | t, u, e, s) def walk(self, matcher=None): """return file related to data storage (ie: revlogs) - yields (unencoded, encoded, size) + yields (file_type, unencoded, encoded, size) if a matcher is passed, storage files of only those tracked paths are passed with matches the matcher @@ -500,14 +541,14 @@ self.opener = self.vfs def datafiles(self, matcher=None): - for a, b, size in super(encodedstore, self).datafiles(): + for t, a, b, size in super(encodedstore, self).datafiles(): try: a = decodefilename(a) except KeyError: a = None if a is not None and not _matchtrackedpath(a, matcher): continue - yield a, b, size + yield t, a, b, size def join(self, f): return self.path + b'/' + encodefilename(f) @@ -696,7 +737,9 @@ continue ef = self.encode(f) try: - yield f, ef, self.getsize(ef) + t = revlog_type(f) + t |= FILEFLAGS_FILELOG + yield t, f, ef, self.getsize(ef) except OSError as err: if err.errno != errno.ENOENT: raise
--- a/mercurial/streamclone.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/streamclone.py Tue Apr 06 10:38:03 2021 +0200 @@ -243,7 +243,7 @@ # Get consistent snapshot of repo, lock during scan. with repo.lock(): repo.ui.debug(b'scanning\n') - for name, ename, size in _walkstreamfiles(repo): + for file_type, name, ename, size in _walkstreamfiles(repo): if size: entries.append((name, size)) total_bytes += size @@ -616,7 +616,7 @@ matcher = narrowspec.match(repo.root, includes, excludes) repo.ui.debug(b'scanning\n') - for name, ename, size in _walkstreamfiles(repo, matcher): + for rl_type, name, ename, size in _walkstreamfiles(repo, matcher): if size: entries.append((_srcstore, name, _fileappend, size)) totalfilesize += size
--- a/mercurial/upgrade_utils/engine.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/upgrade_utils/engine.py Tue Apr 06 10:38:03 2021 +0200 @@ -192,7 +192,7 @@ # Perform a pass to collect metadata. This validates we can open all # source files and allows a unified progress bar to be displayed. - for unencoded, encoded, size in alldatafiles: + for revlog_type, unencoded, encoded, size in alldatafiles: if not unencoded.endswith(b'.i'): continue
--- a/mercurial/verify.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/verify.py Tue Apr 06 10:38:03 2021 +0200 @@ -416,7 +416,7 @@ storefiles = set() subdirs = set() revlogv1 = self.revlogv1 - for f, f2, size in repo.store.datafiles(): + for t, f, f2, size in repo.store.datafiles(): if not f: self._err(None, _(b"cannot decode filename '%s'") % f2) elif (size > 0 or not revlogv1) and f.startswith(b'meta/'): @@ -480,7 +480,7 @@ ui.status(_(b"checking files\n")) storefiles = set() - for f, f2, size in repo.store.datafiles(): + for rl_type, f, f2, size in repo.store.datafiles(): if not f: self._err(None, _(b"cannot decode filename '%s'") % f2) elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
--- a/mercurial/wireprotov2server.py Mon Apr 05 23:54:54 2021 -0400 +++ b/mercurial/wireprotov2server.py Tue Apr 06 10:38:03 2021 +0200 @@ -1582,7 +1582,8 @@ # TODO this is a bunch of storage layer interface abstractions because # it assumes revlogs. - for name, encodedname, size in topfiles: + for rl_type, name, encodedname, size in topfiles: + # XXX use the `rl_type` for that if b'changelog' in files and name.startswith(b'00changelog'): pass elif b'manifestlog' in files and name.startswith(b'00manifest'):
--- a/tests/test-persistent-nodemap.t Mon Apr 05 23:54:54 2021 -0400 +++ b/tests/test-persistent-nodemap.t Tue Apr 06 10:38:03 2021 +0200 @@ -754,15 +754,15 @@ $ hg clone -U --stream --config ui.ssh="\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/test-repo stream-clone --debug | egrep '00(changelog|manifest)' adding [s] 00manifest.n (70 bytes) - adding [s] 00manifest.i (313 KB) adding [s] 00manifest.d (452 KB) (no-zstd !) adding [s] 00manifest.d (491 KB) (zstd !) adding [s] 00manifest-*.nd (118 KB) (glob) adding [s] 00changelog.n (70 bytes) - adding [s] 00changelog.i (313 KB) adding [s] 00changelog.d (360 KB) (no-zstd !) adding [s] 00changelog.d (368 KB) (zstd !) adding [s] 00changelog-*.nd (118 KB) (glob) + adding [s] 00manifest.i (313 KB) + adding [s] 00changelog.i (313 KB) $ ls -1 stream-clone/.hg/store/ | egrep '00(changelog|manifest)(\.n|-.*\.nd)' 00changelog-*.nd (glob) 00changelog.n