diff mercurial/store.py @ 50640:9caa860dcbec

stream-clone: implement decidated `get_streams` method for revlog For revlog, we can do better using the maximum linkrev expected. This approach open the way to dealing with a much larger set of non-trivial changes, like splitting of inline revlogs. We will actually tackle this issue in the next changesets (thanks to this one).
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 29 May 2023 14:07:58 +0200
parents 5e60abf811f3
children 5460424092e2
line wrap: on
line diff
--- a/mercurial/store.py	Sun May 28 05:52:58 2023 +0200
+++ b/mercurial/store.py	Mon May 29 14:07:58 2023 +0200
@@ -509,7 +509,13 @@
     def files(self) -> List[StoreFile]:
         raise NotImplementedError
 
-    def get_streams(self, vfs, copies=None):
+    def get_streams(
+        self,
+        repo=None,
+        vfs=None,
+        copies=None,
+        max_changeset=None,
+    ):
         """return a list of data stream associated to files for this entry
 
         return [(unencoded_file_path, content_iterator, content_size), …]
@@ -605,6 +611,57 @@
                 self._files.append(StoreFile(unencoded_path=path, **data))
         return self._files
 
+    def get_streams(
+        self,
+        repo=None,
+        vfs=None,
+        copies=None,
+        max_changeset=None,
+    ):
+        if repo is None or max_changeset is None:
+            return super().get_streams(
+                repo=repo,
+                vfs=vfs,
+                copies=copies,
+                max_changeset=max_changeset,
+            )
+        if any(k.endswith(b'.idx') for k in self._details.keys()):
+            # This use revlog-v2, ignore for now
+            return super().get_streams(
+                repo=repo,
+                vfs=vfs,
+                copies=copies,
+                max_changeset=max_changeset,
+            )
+        name_to_ext = {}
+        for ext in self._details.keys():
+            name_to_ext[self._path_prefix + ext] = ext
+        name_to_size = {}
+        for f in self.files():
+            name_to_size[f.unencoded_path] = f.file_size(None)
+        stream = [
+            f.get_stream(vfs, copies)
+            for f in self.files()
+            if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
+        ]
+
+        rl = self.get_revlog_instance(repo).get_revlog()
+        rl_stream = rl.get_streams(max_changeset)
+        for name, s, size in rl_stream:
+            if name_to_size.get(name, 0) != size:
+                msg = _(b"expected %d bytes but %d provided for %s")
+                msg %= name_to_size.get(name, 0), size, name
+                raise error.Abort(msg)
+        stream.extend(rl_stream)
+        files = self.files()
+        assert len(stream) == len(files), (
+            stream,
+            files,
+            self._path_prefix,
+            self.target_id,
+        )
+        return stream
+
     def get_revlog_instance(self, repo):
         """Obtain a revlog instance from this store entry