store: use a StoreEntry object instead of tuple for store files
We want to make the store return more semantic information instead of a stream
of file path. To achieve this, we start with adding a simple object that hold
the same information as the tuple it replace, and do a simple update to the
user code to fetch and use the same information.
From there, we will be able to iteratively upgrade the codebase toward better
objects.
--- a/hgext/largefiles/lfutil.py Mon May 15 08:56:08 2023 +0200
+++ b/hgext/largefiles/lfutil.py Mon May 15 08:56:23 2023 +0200
@@ -552,7 +552,8 @@
def islfilesrepo(repo):
'''Return true if the repo is a largefile repo.'''
if b'largefiles' in repo.requirements and any(
- shortnameslash in f[1] for f in repo.store.datafiles()
+ shortnameslash in entry.unencoded_path
+ for entry in repo.store.datafiles()
):
return True
--- a/hgext/largefiles/reposetup.py Mon May 15 08:56:08 2023 +0200
+++ b/hgext/largefiles/reposetup.py Mon May 15 08:56:23 2023 +0200
@@ -458,7 +458,8 @@
def checkrequireslfiles(ui, repo, **kwargs):
with repo.lock():
if b'largefiles' not in repo.requirements and any(
- lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
+ lfutil.shortname + b'/' in entry.unencoded_path
+ for entry in repo.store.datafiles()
):
repo.requirements.add(b'largefiles')
scmutil.writereporequirements(repo)
--- a/hgext/narrow/narrowcommands.py Mon May 15 08:56:08 2023 +0200
+++ b/hgext/narrow/narrowcommands.py Mon May 15 08:56:23 2023 +0200
@@ -288,7 +288,8 @@
repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
todelete = []
- for t, f, size in repo.store.datafiles():
+ for entry in repo.store.datafiles():
+ f = entry.unencoded_path
if f.startswith(b'data/'):
file = f[5:-2]
if not newmatch(file):
--- a/hgext/remotefilelog/remotefilelogserver.py Mon May 15 08:56:08 2023 +0200
+++ b/hgext/remotefilelog/remotefilelogserver.py Mon May 15 08:56:23 2023 +0200
@@ -162,25 +162,34 @@
):
n = util.pconvert(fp[striplen:])
d = store.decodedir(n)
- t = store.FILETYPE_OTHER
- yield (t, d, st.st_size)
+ yield store.StoreEntry(
+ unencoded_path=d,
+ is_revlog=True,
+ revlog_type=None,
+ is_revlog_main=False,
+ is_volatile=False,
+ file_size=st.st_size,
+ )
+
if kind == stat.S_IFDIR:
visit.append(fp)
if scmutil.istreemanifest(repo):
- for (t, u, s) in repo.store.datafiles():
+ for entry in repo.store.datafiles():
+ u = entry.unencoded_path
if u.startswith(b'meta/') and (
u.endswith(b'.i') or u.endswith(b'.d')
):
- yield (t, u, s)
+ yield entry
# Return .d and .i files that do not match the shallow pattern
match = state.match
if match and not match.always():
- for (t, u, s) in repo.store.datafiles():
+ for entry in repo.store.datafiles():
+ u = entry.unencoded_path
f = u[5:-2] # trim data/... and .i/.d
if not state.match(f):
- yield (t, u, s)
+ yield entry
for x in repo.store.topfiles():
if state.noflatmf and x[1][:11] == b'00manifest.':
--- a/mercurial/repair.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/repair.py Mon May 15 08:56:23 2023 +0200
@@ -444,7 +444,9 @@
if scmutil.istreemanifest(repo):
# This logic is safe if treemanifest isn't enabled, but also
# pointless, so we skip it if treemanifest isn't enabled.
- for t, unencoded, size in repo.store.datafiles():
+ for entry in repo.store.datafiles():
+ unencoded = entry.unencoded_path
+ # XXX use the entry.revlog_type instead
if unencoded.startswith(b'meta/') and unencoded.endswith(
b'00manifest.i'
):
--- a/mercurial/revlogutils/rewrite.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/revlogutils/rewrite.py Mon May 15 08:56:23 2023 +0200
@@ -825,9 +825,13 @@
with context():
files = list(
- (file_type, path)
- for (file_type, path, _s) in repo.store.datafiles()
- if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
+ entry
+ for entry in repo.store.datafiles()
+ if (
+ entry.unencoded_path.endswith(b'.i')
+ and entry.is_revlog
+ and entry.revlog_type == store.FILEFLAGS_FILELOG
+ )
)
progress = ui.makeprogress(
@@ -837,7 +841,8 @@
)
found_nothing = True
- for file_type, path in files:
+ for entry in files:
+ path = entry.unencoded_path
progress.increment()
filename = _get_filename_from_filelog_index(path)
fl = _filelog_from_filename(repo, filename)
--- a/mercurial/store.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/store.py Mon May 15 08:56:23 2023 +0200
@@ -10,9 +10,11 @@
import os
import re
import stat
+from typing import Generator
from .i18n import _
from .pycompat import getattr
+from .thirdparty import attr
from .node import hex
from . import (
changelog,
@@ -451,6 +453,20 @@
FILETYPE_OTHER = FILEFLAGS_OTHER
+@attr.s(slots=True)
+class StoreEntry:
+ """An entry in the store
+
+ This is returned by `store.walk` and represent some data in the store."""
+
+ unencoded_path = attr.ib()
+ is_revlog = attr.ib(default=False)
+ revlog_type = attr.ib(default=None)
+ is_revlog_main = attr.ib(default=None)
+ is_volatile = attr.ib(default=False)
+ file_size = attr.ib(default=None)
+
+
class basicstore:
'''base class for local repository stores'''
@@ -500,7 +516,9 @@
rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
- def datafiles(self, matcher=None, undecodable=None):
+ def datafiles(
+ self, matcher=None, undecodable=None
+ ) -> Generator[StoreEntry, None, None]:
"""Like walk, but excluding the changelog and root manifest.
When [undecodable] is None, revlogs names that can't be
@@ -510,20 +528,35 @@
files = self._walk(b'data', True) + self._walk(b'meta', True)
for (t, u, s) in files:
if t is not None:
- yield (FILEFLAGS_FILELOG | t, u, s)
+ yield StoreEntry(
+ unencoded_path=u,
+ is_revlog=True,
+ revlog_type=FILEFLAGS_FILELOG,
+ is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+ is_volatile=bool(t & FILEFLAGS_VOLATILE),
+ file_size=s,
+ )
- def topfiles(self):
+ def topfiles(self) -> Generator[StoreEntry, None, None]:
# yield manifest before changelog
files = reversed(self._walk(b'', False))
for (t, u, s) in files:
if u.startswith(b'00changelog'):
- yield (FILEFLAGS_CHANGELOG | t, u, s)
+ revlog_type = FILEFLAGS_CHANGELOG
elif u.startswith(b'00manifest'):
- yield (FILEFLAGS_MANIFESTLOG | t, u, s)
+ revlog_type = FILEFLAGS_MANIFESTLOG
else:
- yield (FILETYPE_OTHER | t, u, s)
+ revlog_type = None
+ yield StoreEntry(
+ unencoded_path=u,
+ is_revlog=revlog_type is not None,
+ revlog_type=revlog_type,
+ is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+ is_volatile=bool(t & FILEFLAGS_VOLATILE),
+ file_size=s,
+ )
- def walk(self, matcher=None):
+ def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
"""return files related to data storage (ie: revlogs)
yields (file_type, unencoded, size)
@@ -576,9 +609,12 @@
# However that might change so we should probably add a test and encoding
# decoding for it too. see issue6548
- def datafiles(self, matcher=None, undecodable=None):
- for t, f1, size in super(encodedstore, self).datafiles():
+ def datafiles(
+ self, matcher=None, undecodable=None
+ ) -> Generator[StoreEntry, None, None]:
+ for entry in super(encodedstore, self).datafiles():
try:
+ f1 = entry.unencoded_path
f2 = decodefilename(f1)
except KeyError:
if undecodable is None:
@@ -589,7 +625,8 @@
continue
if not _matchtrackedpath(f2, matcher):
continue
- yield t, f2, size
+ entry.unencoded_path = f2
+ yield entry
def join(self, f):
return self.path + b'/' + encodefilename(f)
@@ -785,7 +822,9 @@
def getsize(self, path):
return self.rawvfs.stat(path).st_size
- def datafiles(self, matcher=None, undecodable=None):
+ def datafiles(
+ self, matcher=None, undecodable=None
+ ) -> Generator[StoreEntry, None, None]:
for f in sorted(self.fncache):
if not _matchtrackedpath(f, matcher):
continue
@@ -799,7 +838,14 @@
continue
t |= FILEFLAGS_FILELOG
try:
- yield t, f, self.getsize(ef)
+ yield StoreEntry(
+ unencoded_path=f,
+ is_revlog=True,
+ revlog_type=FILEFLAGS_FILELOG,
+ is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+ is_volatile=bool(t & FILEFLAGS_VOLATILE),
+ file_size=self.getsize(ef),
+ )
except FileNotFoundError:
pass
--- a/mercurial/streamclone.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/streamclone.py Mon May 15 08:56:23 2023 +0200
@@ -269,10 +269,10 @@
# Get consistent snapshot of repo, lock during scan.
with repo.lock():
repo.ui.debug(b'scanning\n')
- for file_type, name, size in _walkstreamfiles(repo):
- if size:
- entries.append((name, size))
- total_bytes += size
+ for entry in _walkstreamfiles(repo):
+ if entry.file_size:
+ entries.append((entry.unencoded_path, entry.file_size))
+ total_bytes += entry.file_size
_test_sync_point_walk_1(repo)
_test_sync_point_walk_2(repo)
@@ -677,13 +677,15 @@
if includes or excludes:
matcher = narrowspec.match(repo.root, includes, excludes)
- for rl_type, name, size in _walkstreamfiles(repo, matcher):
- if size:
+ for entry in _walkstreamfiles(repo, matcher):
+ if entry.file_size:
ft = _fileappend
- if rl_type & store.FILEFLAGS_VOLATILE:
+ if entry.is_volatile:
ft = _filefull
- entries.append((_srcstore, name, ft, size))
- totalfilesize += size
+ entries.append(
+ (_srcstore, entry.unencoded_path, ft, entry.file_size)
+ )
+ totalfilesize += entry.file_size
for name in _walkstreamfullstorefiles(repo):
if repo.svfs.exists(name):
totalfilesize += repo.svfs.lstat(name).st_size
--- a/mercurial/upgrade_utils/engine.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/upgrade_utils/engine.py Mon May 15 08:56:23 2023 +0200
@@ -200,9 +200,10 @@
# Perform a pass to collect metadata. This validates we can open all
# source files and allows a unified progress bar to be displayed.
- for rl_type, unencoded, size in alldatafiles:
- if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
+ for entry in alldatafiles:
+ if not (entry.is_revlog and entry.is_revlog_main):
continue
+ unencoded = entry.unencoded_path
# the store.walk function will wrongly pickup transaction backup and
# get confused. As a quick fix for 5.9 release, we ignore those.
@@ -215,7 +216,7 @@
if unencoded in skip_undo:
continue
- rl = _revlogfrompath(srcrepo, rl_type, unencoded)
+ rl = _revlogfrompath(srcrepo, entry.revlog_type, unencoded)
info = rl.storageinfo(
exclusivefiles=True,
@@ -232,19 +233,19 @@
srcrawsize += rawsize
# This is for the separate progress bars.
- if rl_type & store.FILEFLAGS_CHANGELOG:
- changelogs[unencoded] = rl_type
+ if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
+ changelogs[unencoded] = entry.revlog_type
crevcount += len(rl)
csrcsize += datasize
crawsize += rawsize
- elif rl_type & store.FILEFLAGS_MANIFESTLOG:
- manifests[unencoded] = rl_type
+ elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
+ manifests[unencoded] = entry.revlog_type
mcount += 1
mrevcount += len(rl)
msrcsize += datasize
mrawsize += rawsize
- elif rl_type & store.FILEFLAGS_FILELOG:
- filelogs[unencoded] = rl_type
+ elif entry.revlog_type & store.FILEFLAGS_FILELOG:
+ filelogs[unencoded] = entry.revlog_type
fcount += 1
frevcount += len(rl)
fsrcsize += datasize
--- a/mercurial/verify.py Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/verify.py Mon May 15 08:56:23 2023 +0200
@@ -407,7 +407,9 @@
subdirs = set()
revlogv1 = self.revlogv1
undecodable = []
- for t, f, size in repo.store.datafiles(undecodable=undecodable):
+ for entry in repo.store.datafiles(undecodable=undecodable):
+ f = entry.unencoded_path
+ size = entry.file_size
if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
storefiles.add(_normpath(f))
subdirs.add(os.path.dirname(f))
@@ -472,7 +474,9 @@
storefiles = set()
undecodable = []
- for t, f, size in repo.store.datafiles(undecodable=undecodable):
+ for entry in repo.store.datafiles(undecodable=undecodable):
+ size = entry.file_size
+ f = entry.unencoded_path
if (size > 0 or not revlogv1) and f.startswith(b'data/'):
storefiles.add(_normpath(f))
for f in undecodable: