changeset 51094:de6a8cc24de3

revlog: move the splitting-inline-revlog logic inside the inner object This is another large IO block that we need to move within the inner object if we want's it to be self sufficient.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 23 Oct 2023 14:27:07 +0200
parents c2c24b6b97f5
children a82704902db8
files mercurial/revlog.py
diffstat 1 files changed, 74 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlog.py	Wed Oct 25 01:02:47 2023 +0200
+++ b/mercurial/revlog.py	Mon Oct 23 14:27:07 2023 +0200
@@ -518,6 +518,64 @@
             atomictemp=True,
         )
 
+    def split_inline(self, tr, header, new_index_file_path=None):
+        """split the data of an inline revlog into an index and a data file"""
+        existing_handles = False
+        if self._writinghandles is not None:
+            existing_handles = True
+            fp = self._writinghandles[0]
+            fp.flush()
+            fp.close()
+            # We can't use the cached file handle after close(). So prevent
+            # its usage.
+            self._writinghandles = None
+            self._segmentfile.writing_handle = None
+            # No need to deal with sidedata writing handle as it is only
+            # relevant with revlog-v2 which is never inline, not reaching
+            # this code
+
+        new_dfh = self.opener(self.data_file, mode=b"w+")
+        new_dfh.truncate(0)  # drop any potentially existing data
+        try:
+            with self.reading():
+                for r in range(len(self.index)):
+                    new_dfh.write(self.get_segment_for_revs(r, r)[1])
+                new_dfh.flush()
+
+            if new_index_file_path is not None:
+                self.index_file = new_index_file_path
+            with self.__index_new_fp() as fp:
+                self.inline = False
+                for i in range(len(self.index)):
+                    e = self.index.entry_binary(i)
+                    if i == 0:
+                        packed_header = self.index.pack_header(header)
+                        e = packed_header + e
+                    fp.write(e)
+
+                # If we don't use side-write, the temp file replace the real
+                # index when we exit the context manager
+
+            self._segmentfile = randomaccessfile.randomaccessfile(
+                self.opener,
+                self.data_file,
+                self.data_config.chunk_cache_size,
+            )
+
+            if existing_handles:
+                # switched from inline to conventional reopen the index
+                ifh = self.__index_write_fp()
+                self._writinghandles = (ifh, new_dfh, None)
+                self._segmentfile.writing_handle = new_dfh
+                new_dfh = None
+                # No need to deal with sidedata writing handle as it is only
+                # relevant with revlog-v2 which is never inline, not reaching
+                # this code
+        finally:
+            if new_dfh is not None:
+                new_dfh.close()
+        return self.index_file
+
     def get_segment_for_revs(self, startrev, endrev):
         """Obtain a segment of raw data corresponding to a range of revisions.
 
@@ -2586,26 +2644,15 @@
             tr.addbackup(self._indexfile, for_offset=True)
         tr.add(self._datafile, 0)
 
-        existing_handles = False
-        if self._inner._writinghandles is not None:
-            existing_handles = True
-            fp = self._inner._writinghandles[0]
-            fp.flush()
-            fp.close()
-            # We can't use the cached file handle after close(). So prevent
-            # its usage.
-            self._inner._writinghandles = None
-            self._inner._segmentfile.writing_handle = None
-            # No need to deal with sidedata writing handle as it is only
-            # relevant with revlog-v2 which is never inline, not reaching
-            # this code
+        new_index_file_path = None
         if side_write:
             old_index_file_path = self._indexfile
             new_index_file_path = self._split_index_file
             opener = self.opener
             weak_self = weakref.ref(self)
 
-            # the "split" index replace the real index when the transaction is finalized
+            # the "split" index replace the real index when the transaction is
+            # finalized
             def finalize_callback(tr):
                 opener.rename(
                     new_index_file_path,
@@ -2621,6 +2668,7 @@
                 maybe_self = weak_self()
                 if maybe_self is not None:
                     maybe_self._indexfile = old_index_file_path
+                    maybe_self._inner.inline = True
                     maybe_self._inner.index_file = old_index_file_path
 
             tr.registertmp(new_index_file_path)
@@ -2631,54 +2679,18 @@
             tr.addfinalize(callback_id, finalize_callback)
             tr.addabort(callback_id, abort_callback)
 
-        new_dfh = self._datafp(b'w+')
-        new_dfh.truncate(0)  # drop any potentially existing data
-        try:
-            with self.reading():
-                for r in self:
-                    new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
-                new_dfh.flush()
-
-            if side_write:
-                self._indexfile = new_index_file_path
-                self._inner.index_file = self._indexfile
-            with self._inner._InnerRevlog__index_new_fp() as fp:
-                self._format_flags &= ~FLAG_INLINE_DATA
-                self._inline = False
-                self._inner.inline = False
-                for i in self:
-                    e = self.index.entry_binary(i)
-                    if i == 0:
-                        header = self._format_flags | self._format_version
-                        header = self.index.pack_header(header)
-                        e = header + e
-                    fp.write(e)
-
-                # If we don't use side-write, the temp file replace the real
-                # index when we exit the context manager
-
-            nodemaputil.setup_persistent_nodemap(tr, self)
-            self._inner._segmentfile = randomaccessfile.randomaccessfile(
-                self.opener,
-                self._datafile,
-                self.data_config.chunk_cache_size,
-            )
-
-            if existing_handles:
-                # switched from inline to conventional reopen the index
-                index_end = None
-                ifh = self._inner._InnerRevlog__index_write_fp(
-                    index_end=index_end
-                )
-                self._inner._writinghandles = (ifh, new_dfh, None)
-                self._inner._segmentfile.writing_handle = new_dfh
-                new_dfh = None
-                # No need to deal with sidedata writing handle as it is only
-                # relevant with revlog-v2 which is never inline, not reaching
-                # this code
-        finally:
-            if new_dfh is not None:
-                new_dfh.close()
+        self._format_flags &= ~FLAG_INLINE_DATA
+        self._inner.split_inline(
+            tr,
+            self._format_flags | self._format_version,
+            new_index_file_path=new_index_file_path,
+        )
+
+        self._inline = False
+        if new_index_file_path is not None:
+            self._indexfile = new_index_file_path
+
+        nodemaputil.setup_persistent_nodemap(tr, self)
 
     def _nodeduplicatecallback(self, transaction, node):
         """called when trying to add a node already stored."""