delta-find: move the base of the delta search in its own function
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 23 Nov 2023 22:29:02 +0100
changeset 51355 fac6038b11f5
parent 51354 94fe4474b053
child 51356 701caeabbee7
delta-find: move the base of the delta search in its own function That logic is complicated enough that is is worth puting in its own function. Another method will be introduced in the next changeset to deal with the actual refining.
mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py	Thu Nov 23 21:44:51 2023 +0100
+++ b/mercurial/revlogutils/deltas.py	Thu Nov 23 22:29:02 2023 +0100
@@ -1080,6 +1080,99 @@
         self.current_stage = _STAGE_PREV
         yield (self.target_rev - 1,)
 
+    def _iter_snapshots_base(self):
+        assert self.revlog.delta_config.sparse_revlog
+        prev = self.target_rev - 1
+        deltachain = lambda rev: self.revlog._deltachain(rev)[0]
+
+        parents = [p for p in (self.p1, self.p2) if p != nullrev]
+        if not parents:
+            return
+        self.current_stage = _STAGE_SNAPSHOT
+        # See if we can use an existing snapshot in the parent chains to
+        # use as a base for a new intermediate-snapshot
+        #
+        # search for snapshot in parents delta chain map: snapshot-level:
+        # snapshot-rev
+        parents_snaps = collections.defaultdict(set)
+        candidate_chains = [deltachain(p) for p in parents]
+        for chain in candidate_chains:
+            for idx, s in enumerate(chain):
+                if not self.revlog.issnapshot(s):
+                    break
+                parents_snaps[idx].add(s)
+        snapfloor = min(parents_snaps[0]) + 1
+        self.snapshot_cache.update(self.revlog, snapfloor)
+        # search for the highest "unrelated" revision
+        #
+        # Adding snapshots used by "unrelated" revision increase the odd we
+        # reuse an independant, yet better snapshot chain.
+        #
+        # XXX instead of building a set of revisions, we could lazily
+        # enumerate over the chains. That would be more efficient, however
+        # we stick to simple code for now.
+        all_revs = set()
+        for chain in candidate_chains:
+            all_revs.update(chain)
+        other = None
+        for r in self.revlog.revs(prev, snapfloor):
+            if r not in all_revs:
+                other = r
+                break
+        if other is not None:
+            # To avoid unfair competition, we won't use unrelated
+            # intermediate snapshot that are deeper than the ones from the
+            # parent delta chain.
+            max_depth = max(parents_snaps.keys())
+            chain = deltachain(other)
+            for depth, s in enumerate(chain):
+                if s < snapfloor:
+                    continue
+                if max_depth < depth:
+                    break
+                if not self.revlog.issnapshot(s):
+                    break
+                parents_snaps[depth].add(s)
+        # Test them as possible intermediate snapshot base We test them
+        # from highest to lowest level. High level one are more likely to
+        # result in small delta
+        floor = None
+        for idx, snaps in sorted(parents_snaps.items(), reverse=True):
+            siblings = set()
+            for s in snaps:
+                siblings.update(self.snapshot_cache.snapshots[s])
+            # Before considering making a new intermediate snapshot, we
+            # check if an existing snapshot, children of base we consider,
+            # would be suitable.
+            #
+            # It give a change to reuse a delta chain "unrelated" to the
+            # current revision instead of starting our own. Without such
+            # re-use, topological branches would keep reopening new chains.
+            # Creating more and more snapshot as the repository grow.
+
+            if floor is not None:
+                # We only do this for siblings created after the one in our
+                # parent's delta chain. Those created before has less
+                # chances to be valid base since our ancestors had to
+                # create a new snapshot.
+                siblings = [r for r in siblings if floor < r]
+            yield tuple(sorted(siblings))
+            # then test the base from our parent's delta chain.
+            yield tuple(sorted(snaps))
+            floor = min(snaps)
+        # No suitable base found in the parent chain, search if any full
+        # snapshots emitted since parent's base would be a suitable base
+        # for an intermediate snapshot.
+        #
+        # It give a chance to reuse a delta chain unrelated to the current
+        # revisions instead of starting our own. Without such re-use,
+        # topological branches would keep reopening new full chains.
+        # Creating more and more snapshot as the repository grow.
+        full = [
+            r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
+        ]
+        yield tuple(sorted(full))
+
     def _refined_groups(self):
         good = None
         groups = self._raw_groups()
@@ -1133,100 +1226,9 @@
         The group order aims at providing fast or small candidates first.
         """
         yield from self._iter_parents()
-        sparse = self.revlog.delta_config.sparse_revlog
-        prev = self.target_rev - 1
-        deltachain = lambda rev: self.revlog._deltachain(rev)[0]
-
-        parents = [p for p in (self.p1, self.p2) if p != nullrev]
-        if sparse and parents:
-            self.current_stage = _STAGE_SNAPSHOT
-            # See if we can use an existing snapshot in the parent chains to
-            # use as a base for a new intermediate-snapshot
-            #
-            # search for snapshot in parents delta chain map: snapshot-level:
-            # snapshot-rev
-            parents_snaps = collections.defaultdict(set)
-            candidate_chains = [deltachain(p) for p in parents]
-            for chain in candidate_chains:
-                for idx, s in enumerate(chain):
-                    if not self.revlog.issnapshot(s):
-                        break
-                    parents_snaps[idx].add(s)
-            snapfloor = min(parents_snaps[0]) + 1
-            self.snapshot_cache.update(self.revlog, snapfloor)
-            # search for the highest "unrelated" revision
-            #
-            # Adding snapshots used by "unrelated" revision increase the odd we
-            # reuse an independant, yet better snapshot chain.
-            #
-            # XXX instead of building a set of revisions, we could lazily
-            # enumerate over the chains. That would be more efficient, however
-            # we stick to simple code for now.
-            all_revs = set()
-            for chain in candidate_chains:
-                all_revs.update(chain)
-            other = None
-            for r in self.revlog.revs(prev, snapfloor):
-                if r not in all_revs:
-                    other = r
-                    break
-            if other is not None:
-                # To avoid unfair competition, we won't use unrelated
-                # intermediate snapshot that are deeper than the ones from the
-                # parent delta chain.
-                max_depth = max(parents_snaps.keys())
-                chain = deltachain(other)
-                for depth, s in enumerate(chain):
-                    if s < snapfloor:
-                        continue
-                    if max_depth < depth:
-                        break
-                    if not self.revlog.issnapshot(s):
-                        break
-                    parents_snaps[depth].add(s)
-            # Test them as possible intermediate snapshot base We test them
-            # from highest to lowest level. High level one are more likely to
-            # result in small delta
-            floor = None
-            for idx, snaps in sorted(parents_snaps.items(), reverse=True):
-                siblings = set()
-                for s in snaps:
-                    siblings.update(self.snapshot_cache.snapshots[s])
-                # Before considering making a new intermediate snapshot, we
-                # check if an existing snapshot, children of base we consider,
-                # would be suitable.
-                #
-                # It give a change to reuse a delta chain "unrelated" to the
-                # current revision instead of starting our own. Without such
-                # re-use, topological branches would keep reopening new chains.
-                # Creating more and more snapshot as the repository grow.
-
-                if floor is not None:
-                    # We only do this for siblings created after the one in our
-                    # parent's delta chain. Those created before has less
-                    # chances to be valid base since our ancestors had to
-                    # create a new snapshot.
-                    siblings = [r for r in siblings if floor < r]
-                yield tuple(sorted(siblings))
-                # then test the base from our parent's delta chain.
-                yield tuple(sorted(snaps))
-                floor = min(snaps)
-            # No suitable base found in the parent chain, search if any full
-            # snapshots emitted since parent's base would be a suitable base
-            # for an intermediate snapshot.
-            #
-            # It give a chance to reuse a delta chain unrelated to the current
-            # revisions instead of starting our own. Without such re-use,
-            # topological branches would keep reopening new full chains.
-            # Creating more and more snapshot as the repository grow.
-            full = [
-                r
-                for r in self.snapshot_cache.snapshots[nullrev]
-                if snapfloor <= r
-            ]
-            yield tuple(sorted(full))
-
-        if not sparse:
+        if self.revlog.delta_config.sparse_revlog:
+            yield from self._iter_snapshots_base()
+        else:
             yield from self._iter_prev()