changeset 47426:cac0e0621ceb

revlog: use file read caching for sidedata The previous changeset extracted this caching logic from the revlog class into a new class. Adding a new instance of that class allows using the same logic for side data. Differential Revision: https://phab.mercurial-scm.org/D10879
author Simon Sapin <simon.sapin@octobus.net>
date Tue, 15 Jun 2021 12:34:14 +0200
parents e0a314bcbc9d
children 6ce89165eaa0
files mercurial/changelog.py mercurial/revlog.py
diffstat 2 files changed, 29 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/changelog.py	Tue Jun 08 19:55:00 2021 +0200
+++ b/mercurial/changelog.py	Tue Jun 15 12:34:14 2021 +0200
@@ -455,6 +455,7 @@
                     self._realopener, self._indexfile, self._delaybuf
                 )
             self._segmentfile.opener = self.opener
+            self._segmentfile_sidedata.opener = self.opener
         self._delayed = True
         tr.addpending(b'cl-%i' % id(self), self._writepending)
         tr.addfinalize(b'cl-%i' % id(self), self._finalize)
@@ -464,6 +465,7 @@
         self._delayed = False
         self.opener = self._realopener
         self._segmentfile.opener = self.opener
+        self._segmentfile_sidedata.opener = self.opener
         # move redirected index data back into place
         if self._docket is not None:
             self._write_docket(tr)
@@ -504,6 +506,7 @@
             self._divert = True
             self.opener = _divertopener(self._realopener, self._indexfile)
             self._segmentfile.opener = self.opener
+            self._segmentfile_sidedata.opener = self.opener
 
         if self._divert:
             return True
--- a/mercurial/revlog.py	Tue Jun 08 19:55:00 2021 +0200
+++ b/mercurial/revlog.py	Tue Jun 15 12:34:14 2021 +0200
@@ -629,6 +629,11 @@
             self._chunkcachesize,
             chunkcache,
         )
+        self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
+            self.opener,
+            self._sidedatafile,
+            self._chunkcachesize,
+        )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)
         # revlog header -> revlog compressor
@@ -782,6 +787,7 @@
         self._revisioncache = None
         self._chainbasecache.clear()
         self._segmentfile.clear_cache()
+        self._segmentfile_sidedata.clear_cache()
         self._pcache = {}
         self._nodemap_docket = None
         self.index.clearcaches()
@@ -1916,31 +1922,17 @@
         if sidedata_size == 0:
             return {}
 
-        # XXX this need caching, as we do for data
-        with self._sidedatareadfp() as sdf:
-            if self._docket.sidedata_end < sidedata_offset + sidedata_size:
-                filename = self._sidedatafile
-                end = self._docket.sidedata_end
-                offset = sidedata_offset
-                length = sidedata_size
-                m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
-                raise error.RevlogError(m)
-
-            sdf.seek(sidedata_offset, os.SEEK_SET)
-            comp_segment = sdf.read(sidedata_size)
-
-            if len(comp_segment) < sidedata_size:
-                filename = self._sidedatafile
-                length = sidedata_size
-                offset = sidedata_offset
-                got = len(comp_segment)
-                m = randomaccessfile.PARTIAL_READ_MSG % (
-                    filename,
-                    length,
-                    offset,
-                    got,
-                )
-                raise error.RevlogError(m)
+        if self._docket.sidedata_end < sidedata_offset + sidedata_size:
+            filename = self._sidedatafile
+            end = self._docket.sidedata_end
+            offset = sidedata_offset
+            length = sidedata_size
+            m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
+            raise error.RevlogError(m)
+
+        comp_segment = self._segmentfile_sidedata.read_chunk(
+            sidedata_offset, sidedata_size
+        )
 
         comp = self.index[rev][11]
         if comp == COMP_MODE_PLAIN:
@@ -2033,6 +2025,9 @@
             # its usage.
             self._writinghandles = None
             self._segmentfile.writing_handle = None
+            # No need to deal with sidedata writing handle as it is only
+            # relevant with revlog-v2 which is never inline, not reaching
+            # this code
 
         new_dfh = self._datafp(b'w+')
         new_dfh.truncate(0)  # drop any potentially existing data
@@ -2080,6 +2075,9 @@
                 self._writinghandles = (ifh, new_dfh, None)
                 self._segmentfile.writing_handle = new_dfh
                 new_dfh = None
+                # No need to deal with sidedata writing handle as it is only
+                # relevant with revlog-v2 which is never inline, not reaching
+                # this code
         finally:
             if new_dfh is not None:
                 new_dfh.close()
@@ -2138,12 +2136,14 @@
                 # exposing all file handle for writing.
                 self._writinghandles = (ifh, dfh, sdfh)
                 self._segmentfile.writing_handle = ifh if self._inline else dfh
+                self._segmentfile_sidedata.writing_handle = sdfh
                 yield
                 if self._docket is not None:
                     self._write_docket(transaction)
             finally:
                 self._writinghandles = None
                 self._segmentfile.writing_handle = None
+                self._segmentfile_sidedata.writing_handle = None
                 if dfh is not None:
                     dfh.close()
                 if sdfh is not None:
@@ -2778,6 +2778,7 @@
         self._revisioncache = None
         self._chaininfocache = util.lrucachedict(500)
         self._segmentfile.clear_cache()
+        self._segmentfile_sidedata.clear_cache()
 
         del self.index[rev:-1]