changeset 45219:4f0e03d980f3

dirstate: isolate node len dependency for the pure version When switching to a 256bit hash function, this still needs adjustment, but concentrates the change in one place. Differential Revision: https://phab.mercurial-scm.org/D8815
author Joerg Sonnenberger <joerg@bec.de>
date Sat, 25 Jul 2020 04:29:17 +0200
parents 3ea3b85df03f
children ff59af8395a5
files mercurial/dirstate.py
diffstat 1 files changed, 13 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirstate.py	Fri Jul 24 08:38:19 2020 -0700
+++ b/mercurial/dirstate.py	Sat Jul 25 04:29:17 2020 +0200
@@ -1425,6 +1425,7 @@
         self._opener = opener
         self._root = root
         self._filename = b'dirstate'
+        self._nodelen = 20
 
         self._parents = None
         self._dirtyparents = False
@@ -1609,7 +1610,7 @@
         if not self._parents:
             try:
                 fp = self._opendirstatefile()
-                st = fp.read(40)
+                st = fp.read(2 * self._nodelen)
                 fp.close()
             except IOError as err:
                 if err.errno != errno.ENOENT:
@@ -1618,8 +1619,11 @@
                 st = b''
 
             l = len(st)
-            if l == 40:
-                self._parents = (st[:20], st[20:40])
+            if l == self._nodelen * 2:
+                self._parents = (
+                    st[: self._nodelen],
+                    st[self._nodelen : 2 * self._nodelen],
+                )
             elif l == 0:
                 self._parents = (nullid, nullid)
             else:
@@ -1655,15 +1659,17 @@
         if util.safehasattr(parsers, b'dict_new_presized'):
             # Make an estimate of the number of files in the dirstate based on
             # its size. From a linear regression on a set of real-world repos,
-            # all over 10,000 files, the size of a dirstate entry is 85
-            # bytes. The cost of resizing is significantly higher than the cost
+            # all over 10,000 files, the size of a dirstate entry is 2 nodes
+            # plus 45 bytes. The cost of resizing is significantly higher than the cost
             # of filling in a larger presized dict, so subtract 20% from the
             # size.
             #
             # This heuristic is imperfect in many ways, so in a future dirstate
             # format update it makes sense to just record the number of entries
             # on write.
-            self._map = parsers.dict_new_presized(len(st) // 71)
+            self._map = parsers.dict_new_presized(
+                len(st) // ((2 * self._nodelen + 45) * 4 // 5)
+            )
 
         # Python's garbage collector triggers a GC each time a certain number
         # of container objects (the number being defined by
@@ -1829,7 +1835,7 @@
             if not self._parents:
                 try:
                     fp = self._opendirstatefile()
-                    st = fp.read(40)
+                    st = fp.read(2 * self._nodelen)
                     fp.close()
                 except IOError as err:
                     if err.errno != errno.ENOENT: