branchmap: add a cache validation cache, avoid expensive re-hash on every use
authorKyle Lippincott <spectral@google.com>
Wed, 16 Sep 2020 12:13:46 -0700
changeset 45515 89f0d9f87701
parent 45514 93a0f3ba36bb
child 45516 73a5aa5e1857
branchmap: add a cache validation cache, avoid expensive re-hash on every use In a pathological `hg log` case, we end up executing the branchmap validity checking twice per commit displayed. Or maybe we always do, and I just noticed because it's really slow in this repo for some reason. Before: ``` Time (mean ± σ): 9.816 s ± 0.071 s [User: 9.435 s, System: 0.392 s] Range (min … max): 9.709 s … 9.920 s ``` After: ``` Time (mean ± σ): 8.671 s ± 0.078 s [User: 8.309 s, System: 0.392 s] Range (min … max): 8.594 s … 8.816 s ``` Differential Revision: https://phab.mercurial-scm.org/D9023
mercurial/changelog.py
mercurial/scmutil.py
--- a/mercurial/changelog.py	Mon Sep 21 15:05:38 2020 -0400
+++ b/mercurial/changelog.py	Wed Sep 16 12:13:46 2020 -0700
@@ -403,9 +403,21 @@
         self._delayed = False
         self._delaybuf = None
         self._divert = False
-        self.filteredrevs = frozenset()
+        self._filteredrevs = frozenset()
+        self._filteredrevs_hashcache = {}
         self._copiesstorage = opener.options.get(b'copies-storage')
 
+    @property
+    def filteredrevs(self):
+        return self._filteredrevs
+
+    @filteredrevs.setter
+    def filteredrevs(self, val):
+        # Ensure all updates go through this function
+        assert isinstance(val, frozenset)
+        self._filteredrevs = val
+        self._filteredrevs_hashcache = {}
+
     def delayupdate(self, tr):
         """delay visibility of index updates to other readers"""
 
--- a/mercurial/scmutil.py	Mon Sep 21 15:05:38 2020 -0400
+++ b/mercurial/scmutil.py	Wed Sep 16 12:13:46 2020 -0700
@@ -364,13 +364,15 @@
     cl = repo.changelog
     if not cl.filteredrevs:
         return None
-    key = None
-    revs = sorted(r for r in cl.filteredrevs if r <= maxrev)
-    if revs:
-        s = hashutil.sha1()
-        for rev in revs:
-            s.update(b'%d;' % rev)
-        key = s.digest()
+    key = cl._filteredrevs_hashcache.get(maxrev)
+    if not key:
+        revs = sorted(r for r in cl.filteredrevs if r <= maxrev)
+        if revs:
+            s = hashutil.sha1()
+            for rev in revs:
+                s.update(b'%d;' % rev)
+            key = s.digest()
+            cl._filteredrevs_hashcache[maxrev] = key
     return key