changeset 50479:5217e36356bb

store: add logic to group revlog file together For now each file get its own entry, this will help stopping this, soon™. We use such gathering in the `basicstore` code.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 15 May 2023 08:58:33 +0200
parents 1c0244a8cdaf
children d4f54aded22e
files mercurial/store.py
diffstat 1 files changed, 36 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/store.py	Mon May 15 08:58:16 2023 +0200
+++ b/mercurial/store.py	Mon May 15 08:58:33 2023 +0200
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-
+import collections
 import functools
 import os
 import re
@@ -395,6 +395,13 @@
     b'.nd',
     b'.sda',
 )
+# file extension that also use a `-SOMELONGIDHASH.ext` form
+REVLOG_FILES_LONG_EXT = (
+    b'.nd',
+    b'.idx',
+    b'.dat',
+    b'.sda',
+)
 # files that are "volatile" and might change between listing and streaming
 #
 # note: the ".nd" file are nodemap data and won't "change" but they might be
@@ -532,6 +539,30 @@
             return 0
 
 
+def _gather_revlog(files_data):
+    """group files per revlog prefix
+
+    The returns a two level nested dict. The top level key is the revlog prefix
+    without extension, the second level is all the file "suffix" that were
+    seen for this revlog and arbitrary file data as value.
+    """
+    revlogs = collections.defaultdict(dict)
+    for u, value in files_data:
+        name, ext = _split_revlog_ext(u)
+        revlogs[name][ext] = value
+    return sorted(revlogs.items())
+
+
+def _split_revlog_ext(filename):
+    """split the revlog file prefix from the variable extension"""
+    if filename.endswith(REVLOG_FILES_LONG_EXT):
+        char = b'-'
+    else:
+        char = b'.'
+    idx = filename.rfind(char)
+    return filename[:idx], filename[idx:]
+
+
 class basicstore:
     '''base class for local repository stores'''
 
@@ -592,8 +623,10 @@
         be a list and the filenames that can't be decoded are added
         to it instead. This is very rarely needed."""
         files = self._walk(b'data', True) + self._walk(b'meta', True)
-        for u, (t, s) in files:
-            if t is not None:
+        files = (f for f in files if f[1][0] is not None)
+        for revlog, details in _gather_revlog(files):
+            for ext, (t, s) in sorted(details.items()):
+                u = revlog + ext
                 yield RevlogStoreEntry(
                     unencoded_path=u,
                     revlog_type=FILEFLAGS_FILELOG,