changeset 43525:845e5b313783

revlog: move the nodemap into the index object (for pure) This make the pure code closer to the C extension one. The ultimate goal is to merge the two into a single object and offer a unified API. This changeset focus on gathering the data on the same object. For now the code for `revlogoldindex` and `BaseIndexObject` index object are quite similar. However, there will be larger divergence later on, so I don't think is worth doing a base case. This work is part of a refactoring to unify the revlog index and the nodemap. This unification prepare the use of a persistent nodemap. Differential Revision: https://phab.mercurial-scm.org/D7313
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 06 Nov 2019 14:13:19 +0100
parents a7c0c5b5a50f
children e258ad110488
files mercurial/pure/parsers.py mercurial/revlog.py
diffstat 2 files changed, 39 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/pure/parsers.py	Wed Nov 06 14:13:19 2019 +0100
+++ b/mercurial/pure/parsers.py	Wed Nov 06 14:13:19 2019 +0100
@@ -10,8 +10,12 @@
 import struct
 import zlib
 
-from ..node import nullid
-from .. import pycompat
+from ..node import nullid, nullrev
+from .. import (
+    pycompat,
+    revlogutils,
+    util,
+)
 
 stringio = pycompat.bytesio
 
@@ -43,6 +47,17 @@
 
 
 class BaseIndexObject(object):
+    @util.propertycache
+    def nodemap(self):
+        nodemap = revlogutils.NodeMap({nullid: nullrev})
+        for r in range(0, len(self)):
+            n = self[r][7]
+            nodemap[n] = r
+        return nodemap
+
+    def clearcaches(self):
+        self.__dict__.pop('nodemap', None)
+
     def __len__(self):
         return self._lgt + len(self._extra)
 
--- a/mercurial/revlog.py	Wed Nov 06 14:13:19 2019 +0100
+++ b/mercurial/revlog.py	Wed Nov 06 14:13:19 2019 +0100
@@ -205,6 +205,17 @@
 
 
 class revlogoldindex(list):
+    @util.propertycache
+    def nodemap(self):
+        nodemap = revlogutils.NodeMap({nullid: nullrev})
+        for r in range(0, len(self)):
+            n = self[r][7]
+            nodemap[n] = r
+        return nodemap
+
+    def clearcaches(self):
+        self.__dict__.pop('nodemap', None)
+
     def __getitem__(self, i):
         if i == -1:
             return (0, 0, 0, -1, -1, -1, -1, nullid)
@@ -240,7 +251,8 @@
             nodemap[e[6]] = n
             n += 1
 
-        return revlogoldindex(index), nodemap, None
+        index = revlogoldindex(index)
+        return index, index.nodemap, None
 
     def packentry(self, entry, node, version, rev):
         if gettype(entry[0]):
@@ -287,7 +299,7 @@
     def parseindex(self, data, inline):
         # call the C implementation to parse the index data
         index, cache = parsers.parse_index2(data, inline)
-        return index, getattr(index, 'nodemap', None), cache
+        return index, index.nodemap, cache
 
     def packentry(self, entry, node, version, rev):
         p = indexformatng_pack(*entry)
@@ -372,11 +384,11 @@
         self._chunkcachesize = 65536
         self._maxchainlen = None
         self._deltabothparents = True
-        self.index = []
+        self.index = None
         # Mapping of partial identifiers to full nodes.
         self._pcache = {}
         # Mapping of revision integer to full node.
-        self._nodecache = revlogutils.NodeMap({nullid: nullrev})
+        self._nodecache = None
         self._nodepos = None
         self._compengine = b'zlib'
         self._compengineopts = {}
@@ -541,8 +553,7 @@
                 _(b"index %s is corrupted") % self.indexfile
             )
         self.index, nodemap, self._chunkcache = d
-        if nodemap is not None:
-            self.nodemap = self._nodecache = nodemap
+        self.nodemap = self._nodecache = nodemap
         if not self._chunkcache:
             self._chunkclear()
         # revnum -> (chain-length, sum-delta-length)
@@ -646,15 +657,7 @@
         self._chainbasecache.clear()
         self._chunkcache = (0, b'')
         self._pcache = {}
-
-        try:
-            # If we are using the native C version, you are in a fun case
-            # where self.index, self.nodemap and self._nodecaches is the same
-            # object.
-            self._nodecache.clearcaches()
-        except AttributeError:
-            self._nodecache = revlogutils.NodeMap({nullid: nullrev})
-            self._nodepos = None
+        self.index.clearcaches()
 
     def rev(self, node):
         try:
@@ -662,29 +665,10 @@
         except TypeError:
             raise
         except error.RevlogError:
-            if not isinstance(self._nodecache, revlogutils.NodeMap):
-                # parsers.c radix tree lookup failed
-                if node == wdirid or node in wdirfilenodeids:
-                    raise error.WdirUnsupported
-                raise error.LookupError(node, self.indexfile, _(b'no node'))
-            else:
-                # pure python cache lookup failed
-                n = self._nodecache
-                i = self.index
-                p = self._nodepos
-                if p is None:
-                    p = len(i) - 1
-                else:
-                    assert p < len(i)
-                for r in pycompat.xrange(p, -1, -1):
-                    v = i[r][7]
-                    n[v] = r
-                    if v == node:
-                        self._nodepos = r - 1
-                        return r
-                if node == wdirid or node in wdirfilenodeids:
-                    raise error.WdirUnsupported
-                raise error.LookupError(node, self.indexfile, _(b'no node'))
+            # parsers.c radix tree lookup failed
+            if node == wdirid or node in wdirfilenodeids:
+                raise error.WdirUnsupported
+            raise error.LookupError(node, self.indexfile, _(b'no node'))
 
     # Accessors for index entries.