store: split the wrapping of encodedstore between _wrap and datafiles
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 15 May 2023 09:02:26 +0200
changeset 50493 816e8bc6e066
parent 50492 3473d18c029a
child 50494 b4953fad744e
store: split the wrapping of encodedstore between _wrap and datafiles The `datafiles` method of `basicstore` is doing a lot of work that should be done on decoded filename. So we now wrap `_walk` to do the decoding, and less work in `datafiles`. This is necessary to make sure file from the same revlog can be grouped together.
mercurial/store.py
--- a/mercurial/store.py	Mon May 15 09:02:09 2023 +0200
+++ b/mercurial/store.py	Mon May 15 09:02:26 2023 +0200
@@ -602,7 +602,7 @@
     def join(self, f):
         return self.path + b'/' + encodedir(f)
 
-    def _walk(self, relpath, recurse):
+    def _walk(self, relpath, recurse, undecodable=None):
         '''yields (revlog_type, unencoded, size)'''
         path = self.path
         if relpath:
@@ -651,7 +651,7 @@
             (b'meta', FILEFLAGS_MANIFESTLOG),
         ]
         for base_dir, rl_type in dirs:
-            files = self._walk(base_dir, True)
+            files = self._walk(base_dir, True, undecodable=undecodable)
             files = (f for f in files if f[1][0] is not None)
             for revlog, details in _gather_revlog(files):
                 for ext, (t, s) in sorted(details.items()):
@@ -755,17 +755,11 @@
         self.vfs = vfsmod.filtervfs(vfs, encodefilename)
         self.opener = self.vfs
 
-    # note: topfiles would also need a decode phase. It is just that in
-    # practice we do not have any file outside of `data/` that needs encoding.
-    # However that might change so we should probably add a test and encoding
-    # decoding for it too. see issue6548
-
-    def datafiles(
-        self, matcher=None, undecodable=None
-    ) -> Generator[BaseStoreEntry, None, None]:
-        for entry in super(encodedstore, self).datafiles():
+    def _walk(self, relpath, recurse, undecodable=None):
+        old = super()._walk(relpath, recurse)
+        new = []
+        for f1, value in old:
             try:
-                f1 = entry.unencoded_path
                 f2 = decodefilename(f1)
             except KeyError:
                 if undecodable is None:
@@ -774,10 +768,16 @@
                 else:
                     undecodable.append(f1)
                     continue
-            if not _matchtrackedpath(f2, matcher):
-                continue
-            entry.unencoded_path = f2
-            yield entry
+            new.append((f2, value))
+        return new
+
+    def datafiles(
+        self, matcher=None, undecodable=None
+    ) -> Generator[BaseStoreEntry, None, None]:
+        entries = super(encodedstore, self).datafiles(undecodable=undecodable)
+        for entry in entries:
+            if _matchtrackedpath(entry.unencoded_path, matcher):
+                yield entry
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)