changeset 41885:5baf06d2bb41

discovery: cache the children mapping used during each discovery During discovery, the `undecided` set keep shrinking. Therefore, the map computed for an iteration N will be valid for iteration N+1. Instead of computing the same data over and over we cache it the first time. Our private pathological case speed up from about 7.5 seconds to about 6.3 seconds. (starting from over 70s at the start of the full series)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Thu, 28 Feb 2019 01:48:20 +0100
parents d5e6ae6e8012
children a05f0bbefdd9
files mercurial/setdiscovery.py
diffstat 1 files changed, 5 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/setdiscovery.py	Thu Feb 28 01:15:45 2019 +0100
+++ b/mercurial/setdiscovery.py	Thu Feb 28 01:48:20 2019 +0100
@@ -116,6 +116,7 @@
         self._common = repo.changelog.incrementalmissingrevs()
         self._undecided = None
         self.missing = set()
+        self._childrenmap = None
 
     def addcommons(self, commons):
         """registrer nodes known as common"""
@@ -173,15 +174,14 @@
 
     def _childrengetter(self, revs):
 
+        if self._childrenmap is not None:
+            return self._childrenmap.__getitem__
+
         # _updatesample() essentially does interaction over revisions to look
         # up their children. This lookup is expensive and doing it in a loop is
         # quadratic. We precompute the children for all relevant revisions and
         # make the lookup in _updatesample() a simple dict lookup.
-        #
-        # Because this function can be called multiple times during discovery,
-        # we may still perform redundant work and there is room to optimize
-        # this by keeping a persistent cache of children across invocations.
-        children = {}
+        self._childrenmap = children = {}
 
         parentrevs = self._parentsgetter()