Mercurial > hg
changeset 50479:5217e36356bb
store: add logic to group revlog file together
For now each file get its own entry, this will help stopping this, soon™.
We use such gathering in the `basicstore` code.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 15 May 2023 08:58:33 +0200 |
parents | 1c0244a8cdaf |
children | d4f54aded22e |
files | mercurial/store.py |
diffstat | 1 files changed, 36 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/store.py Mon May 15 08:58:16 2023 +0200 +++ b/mercurial/store.py Mon May 15 08:58:33 2023 +0200 @@ -5,7 +5,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. - +import collections import functools import os import re @@ -395,6 +395,13 @@ b'.nd', b'.sda', ) +# file extension that also use a `-SOMELONGIDHASH.ext` form +REVLOG_FILES_LONG_EXT = ( + b'.nd', + b'.idx', + b'.dat', + b'.sda', +) # files that are "volatile" and might change between listing and streaming # # note: the ".nd" file are nodemap data and won't "change" but they might be @@ -532,6 +539,30 @@ return 0 +def _gather_revlog(files_data): + """group files per revlog prefix + + The returns a two level nested dict. The top level key is the revlog prefix + without extension, the second level is all the file "suffix" that were + seen for this revlog and arbitrary file data as value. + """ + revlogs = collections.defaultdict(dict) + for u, value in files_data: + name, ext = _split_revlog_ext(u) + revlogs[name][ext] = value + return sorted(revlogs.items()) + + +def _split_revlog_ext(filename): + """split the revlog file prefix from the variable extension""" + if filename.endswith(REVLOG_FILES_LONG_EXT): + char = b'-' + else: + char = b'.' + idx = filename.rfind(char) + return filename[:idx], filename[idx:] + + class basicstore: '''base class for local repository stores''' @@ -592,8 +623,10 @@ be a list and the filenames that can't be decoded are added to it instead. This is very rarely needed.""" files = self._walk(b'data', True) + self._walk(b'meta', True) - for u, (t, s) in files: - if t is not None: + files = (f for f in files if f[1][0] is not None) + for revlog, details in _gather_revlog(files): + for ext, (t, s) in sorted(details.items()): + u = revlog + ext yield RevlogStoreEntry( unencoded_path=u, revlog_type=FILEFLAGS_FILELOG,