revlog: move _getsegmentforrevs on the internal object
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 24 Oct 2023 17:03:27 +0200
changeset 51091 df50a1592e0c
parent 51090 c3748f38dcd0
child 51092 31f143448704
revlog: move _getsegmentforrevs on the internal object See inline documentation for details.
contrib/perf.py
mercurial/revlog.py
mercurial/revlogutils/debug.py
mercurial/revlogutils/rewrite.py
--- a/contrib/perf.py	Tue Oct 17 06:02:33 2023 +0200
+++ b/contrib/perf.py	Tue Oct 24 17:03:27 2023 +0200
@@ -3720,11 +3720,15 @@
 
     rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
 
-    # _chunkraw was renamed to _getsegmentforrevs.
+    # - _chunkraw was renamed to _getsegmentforrevs
+    # - _getsegmentforrevs was moved on the inner object
     try:
-        segmentforrevs = rl._getsegmentforrevs
+        segmentforrevs = rl._inner.get_segment_for_revs
     except AttributeError:
-        segmentforrevs = rl._chunkraw
+        try:
+            segmentforrevs = rl._getsegmentforrevs
+        except AttributeError:
+            segmentforrevs = rl._chunkraw
 
     # Verify engines argument.
     if engines:
@@ -3894,9 +3898,12 @@
 
     # _chunkraw was renamed to _getsegmentforrevs.
     try:
-        segmentforrevs = r._getsegmentforrevs
+        segmentforrevs = r._inner.get_segment_for_revs
     except AttributeError:
-        segmentforrevs = r._chunkraw
+        try:
+            segmentforrevs = r._getsegmentforrevs
+        except AttributeError:
+            segmentforrevs = r._chunkraw
 
     node = r.lookup(rev)
     rev = r.rev(node)
--- a/mercurial/revlog.py	Tue Oct 17 06:02:33 2023 +0200
+++ b/mercurial/revlog.py	Tue Oct 24 17:03:27 2023 +0200
@@ -508,6 +508,43 @@
             atomictemp=True,
         )
 
+    def get_segment_for_revs(self, startrev, endrev):
+        """Obtain a segment of raw data corresponding to a range of revisions.
+
+        Accepts the start and end revisions and an optional already-open
+        file handle to be used for reading. If the file handle is read, its
+        seek position will not be preserved.
+
+        Requests for data may be satisfied by a cache.
+
+        Returns a 2-tuple of (offset, data) for the requested range of
+        revisions. Offset is the integer offset from the beginning of the
+        revlog and data is a str or buffer of the raw byte data.
+
+        Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
+        to determine where each revision's data begins and ends.
+
+        API: we should consider making this a private part of the InnerRevlog
+        at some point.
+        """
+        # Inlined self.start(startrev) & self.end(endrev) for perf reasons
+        # (functions are expensive).
+        index = self.index
+        istart = index[startrev]
+        start = int(istart[0] >> 16)
+        if startrev == endrev:
+            end = start + istart[1]
+        else:
+            iend = index[endrev]
+            end = int(iend[0] >> 16) + iend[1]
+
+        if self.inline:
+            start += (startrev + 1) * self.index.entry_size
+            end += (endrev + 1) * self.index.entry_size
+        length = end - start
+
+        return start, self._segmentfile.read_chunk(start, length)
+
 
 class revlog:
     """
@@ -1000,7 +1037,7 @@
                             header = self.index.pack_header(header)
                             idx = header + idx
                         yield idx
-                        yield self._getsegmentforrevs(rev, rev)[1]
+                        yield self._inner.get_segment_for_revs(rev, rev)[1]
 
             inline_stream = get_stream()
             next(inline_stream)
@@ -2153,40 +2190,6 @@
         p1, p2 = self.parents(node)
         return storageutil.hashrevisionsha1(text, p1, p2) != node
 
-    def _getsegmentforrevs(self, startrev, endrev):
-        """Obtain a segment of raw data corresponding to a range of revisions.
-
-        Accepts the start and end revisions and an optional already-open
-        file handle to be used for reading. If the file handle is read, its
-        seek position will not be preserved.
-
-        Requests for data may be satisfied by a cache.
-
-        Returns a 2-tuple of (offset, data) for the requested range of
-        revisions. Offset is the integer offset from the beginning of the
-        revlog and data is a str or buffer of the raw byte data.
-
-        Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
-        to determine where each revision's data begins and ends.
-        """
-        # Inlined self.start(startrev) & self.end(endrev) for perf reasons
-        # (functions are expensive).
-        index = self.index
-        istart = index[startrev]
-        start = int(istart[0] >> 16)
-        if startrev == endrev:
-            end = start + istart[1]
-        else:
-            iend = index[endrev]
-            end = int(iend[0] >> 16) + iend[1]
-
-        if self._inline:
-            start += (startrev + 1) * self.index.entry_size
-            end += (endrev + 1) * self.index.entry_size
-        length = end - start
-
-        return start, self._inner._segmentfile.read_chunk(start, length)
-
     def _chunk(self, rev):
         """Obtain a single decompressed chunk for a revision.
 
@@ -2197,7 +2200,7 @@
         Returns a str holding uncompressed data for the requested revision.
         """
         compression_mode = self.index[rev][10]
-        data = self._getsegmentforrevs(rev, rev)[1]
+        data = self._inner.get_segment_for_revs(rev, rev)[1]
         if compression_mode == COMP_MODE_PLAIN:
             return data
         elif compression_mode == COMP_MODE_DEFAULT:
@@ -2248,7 +2251,10 @@
                     break
 
             try:
-                offset, data = self._getsegmentforrevs(firstrev, lastrev)
+                offset, data = self._inner.get_segment_for_revs(
+                    firstrev,
+                    lastrev,
+                )
             except OverflowError:
                 # issue4215 - we can't cache a run of chunks greater than
                 # 2G on Windows
@@ -2616,7 +2622,7 @@
         try:
             with self.reading():
                 for r in self:
-                    new_dfh.write(self._getsegmentforrevs(r, r)[1])
+                    new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
                 new_dfh.flush()
 
             if side_write:
--- a/mercurial/revlogutils/debug.py	Tue Oct 17 06:02:33 2023 +0200
+++ b/mercurial/revlogutils/debug.py	Tue Oct 24 17:03:27 2023 +0200
@@ -411,8 +411,8 @@
                         numother_nad += 1
 
             # Obtain data on the raw chunks in the revlog.
-            if hasattr(r, '_getsegmentforrevs'):
-                segment = r._getsegmentforrevs(rev, rev)[1]
+            if hasattr(r, '_inner'):
+                segment = r._inner.get_segment_for_revs(rev, rev)[1]
             else:
                 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
             if segment:
--- a/mercurial/revlogutils/rewrite.py	Tue Oct 17 06:02:33 2023 +0200
+++ b/mercurial/revlogutils/rewrite.py	Tue Oct 24 17:03:27 2023 +0200
@@ -126,7 +126,8 @@
         rl.opener.rename(newrl._datafile, rl._datafile)
 
     rl.clearcaches()
-    rl._loadindex()
+    chunk_cache = rl._loadindex()
+    rl._load_inner(chunk_cache)
 
 
 def v2_censor(revlog, tr, censornode, tombstone=b''):