delta-find: move `_rawgroups` on the `_DeltaSearch` object
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 20 Nov 2023 04:59:25 +0100
changeset 51322 a227e061bb4b
parent 51321 c83074405276
child 51323 87fce139e927
delta-find: move `_rawgroups` on the `_DeltaSearch` object Moving more code before doing more logic changes.
mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py	Mon Nov 20 04:53:11 2023 +0100
+++ b/mercurial/revlogutils/deltas.py	Mon Nov 20 04:59:25 2023 +0100
@@ -888,13 +888,7 @@
                 return
         if self.snapshot_cache is None:
             self.snapshot_cache = SnapshotCache()
-        groups = _rawgroups(
-            self.revlog,
-            self.p1,
-            self.p2,
-            self.cachedelta,
-            self.snapshot_cache,
-        )
+        groups = self._raw_groups()
         for candidates in groups:
             good = yield candidates
             if good is not None:
@@ -937,127 +931,133 @@
 
         yield None
 
+    def _raw_groups(self):
+        """Provides group of revision to be tested as delta base
 
-def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
-    """Provides group of revision to be tested as delta base
+        This lower level function focus on emitting delta theorically
+        interresting without looking it any practical details.
 
-    This lower level function focus on emitting delta theorically interresting
-    without looking it any practical details.
+        The group order aims at providing fast or small candidates first.
+        """
+        # Why search for delta base if we cannot use a delta base ?
+        assert self.revlog.delta_config.general_delta
+        # also see issue6056
+        sparse = self.revlog.delta_config.sparse_revlog
+        curr = len(self.revlog)
+        prev = curr - 1
+        deltachain = lambda rev: self.revlog._deltachain(rev)[0]
 
-    The group order aims at providing fast or small candidates first.
-    """
-    # Why search for delta base if we cannot use a delta base ?
-    assert revlog.delta_config.general_delta
-    # also see issue6056
-    sparse = revlog.delta_config.sparse_revlog
-    curr = len(revlog)
-    prev = curr - 1
-    deltachain = lambda rev: revlog._deltachain(rev)[0]
+        # exclude already lazy tested base if any
+        parents = [p for p in (self.p1, self.p2) if p != nullrev]
 
-    # exclude already lazy tested base if any
-    parents = [p for p in (p1, p2) if p != nullrev]
+        if (
+            not self.revlog.delta_config.delta_both_parents
+            and len(parents) == 2
+        ):
+            parents.sort()
+            # To minimize the chance of having to build a fulltext,
+            # pick first whichever parent is closest to us (max rev)
+            yield (parents[1],)
+            # then the other one (min rev) if the first did not fit
+            yield (parents[0],)
+        elif len(parents) > 0:
+            # Test all parents (1 or 2), and keep the best candidate
+            yield parents
 
-    if not revlog.delta_config.delta_both_parents and len(parents) == 2:
-        parents.sort()
-        # To minimize the chance of having to build a fulltext,
-        # pick first whichever parent is closest to us (max rev)
-        yield (parents[1],)
-        # then the other one (min rev) if the first did not fit
-        yield (parents[0],)
-    elif len(parents) > 0:
-        # Test all parents (1 or 2), and keep the best candidate
-        yield parents
-
-    if sparse and parents:
-        if snapshot_cache is None:
-            # map: base-rev: [snapshot-revs]
-            snapshot_cache = SnapshotCache()
-        # See if we can use an existing snapshot in the parent chains to use as
-        # a base for a new intermediate-snapshot
-        #
-        # search for snapshot in parents delta chain
-        # map: snapshot-level: snapshot-rev
-        parents_snaps = collections.defaultdict(set)
-        candidate_chains = [deltachain(p) for p in parents]
-        for chain in candidate_chains:
-            for idx, s in enumerate(chain):
-                if not revlog.issnapshot(s):
+        if sparse and parents:
+            if self.snapshot_cache is None:
+                # map: base-rev: [snapshot-revs]
+                self.snapshot_cache = SnapshotCache()
+            # See if we can use an existing snapshot in the parent chains to
+            # use as a base for a new intermediate-snapshot
+            #
+            # search for snapshot in parents delta chain map: snapshot-level:
+            # snapshot-rev
+            parents_snaps = collections.defaultdict(set)
+            candidate_chains = [deltachain(p) for p in parents]
+            for chain in candidate_chains:
+                for idx, s in enumerate(chain):
+                    if not self.revlog.issnapshot(s):
+                        break
+                    parents_snaps[idx].add(s)
+            snapfloor = min(parents_snaps[0]) + 1
+            self.snapshot_cache.update(self.revlog, snapfloor)
+            # search for the highest "unrelated" revision
+            #
+            # Adding snapshots used by "unrelated" revision increase the odd we
+            # reuse an independant, yet better snapshot chain.
+            #
+            # XXX instead of building a set of revisions, we could lazily
+            # enumerate over the chains. That would be more efficient, however
+            # we stick to simple code for now.
+            all_revs = set()
+            for chain in candidate_chains:
+                all_revs.update(chain)
+            other = None
+            for r in self.revlog.revs(prev, snapfloor):
+                if r not in all_revs:
+                    other = r
                     break
-                parents_snaps[idx].add(s)
-        snapfloor = min(parents_snaps[0]) + 1
-        snapshot_cache.update(revlog, snapfloor)
-        # search for the highest "unrelated" revision
-        #
-        # Adding snapshots used by "unrelated" revision increase the odd we
-        # reuse an independant, yet better snapshot chain.
-        #
-        # XXX instead of building a set of revisions, we could lazily enumerate
-        # over the chains. That would be more efficient, however we stick to
-        # simple code for now.
-        all_revs = set()
-        for chain in candidate_chains:
-            all_revs.update(chain)
-        other = None
-        for r in revlog.revs(prev, snapfloor):
-            if r not in all_revs:
-                other = r
-                break
-        if other is not None:
-            # To avoid unfair competition, we won't use unrelated intermediate
-            # snapshot that are deeper than the ones from the parent delta
-            # chain.
-            max_depth = max(parents_snaps.keys())
-            chain = deltachain(other)
-            for depth, s in enumerate(chain):
-                if s < snapfloor:
-                    continue
-                if max_depth < depth:
-                    break
-                if not revlog.issnapshot(s):
-                    break
-                parents_snaps[depth].add(s)
-        # Test them as possible intermediate snapshot base
-        # We test them from highest to lowest level. High level one are more
-        # likely to result in small delta
-        floor = None
-        for idx, snaps in sorted(parents_snaps.items(), reverse=True):
-            siblings = set()
-            for s in snaps:
-                siblings.update(snapshot_cache.snapshots[s])
-            # Before considering making a new intermediate snapshot, we check
-            # if an existing snapshot, children of base we consider, would be
-            # suitable.
+            if other is not None:
+                # To avoid unfair competition, we won't use unrelated
+                # intermediate snapshot that are deeper than the ones from the
+                # parent delta chain.
+                max_depth = max(parents_snaps.keys())
+                chain = deltachain(other)
+                for depth, s in enumerate(chain):
+                    if s < snapfloor:
+                        continue
+                    if max_depth < depth:
+                        break
+                    if not self.revlog.issnapshot(s):
+                        break
+                    parents_snaps[depth].add(s)
+            # Test them as possible intermediate snapshot base We test them
+            # from highest to lowest level. High level one are more likely to
+            # result in small delta
+            floor = None
+            for idx, snaps in sorted(parents_snaps.items(), reverse=True):
+                siblings = set()
+                for s in snaps:
+                    siblings.update(self.snapshot_cache.snapshots[s])
+                # Before considering making a new intermediate snapshot, we
+                # check if an existing snapshot, children of base we consider,
+                # would be suitable.
+                #
+                # It give a change to reuse a delta chain "unrelated" to the
+                # current revision instead of starting our own. Without such
+                # re-use, topological branches would keep reopening new chains.
+                # Creating more and more snapshot as the repository grow.
+
+                if floor is not None:
+                    # We only do this for siblings created after the one in our
+                    # parent's delta chain. Those created before has less
+                    # chances to be valid base since our ancestors had to
+                    # create a new snapshot.
+                    siblings = [r for r in siblings if floor < r]
+                yield tuple(sorted(siblings))
+                # then test the base from our parent's delta chain.
+                yield tuple(sorted(snaps))
+                floor = min(snaps)
+            # No suitable base found in the parent chain, search if any full
+            # snapshots emitted since parent's base would be a suitable base
+            # for an intermediate snapshot.
             #
-            # It give a change to reuse a delta chain "unrelated" to the
-            # current revision instead of starting our own. Without such
-            # re-use, topological branches would keep reopening new chains.
+            # It give a chance to reuse a delta chain unrelated to the current
+            # revisions instead of starting our own. Without such re-use,
+            # topological branches would keep reopening new full chains.
             # Creating more and more snapshot as the repository grow.
+            full = [
+                r
+                for r in self.snapshot_cache.snapshots[nullrev]
+                if snapfloor <= r
+            ]
+            yield tuple(sorted(full))
 
-            if floor is not None:
-                # We only do this for siblings created after the one in our
-                # parent's delta chain. Those created before has less chances
-                # to be valid base since our ancestors had to create a new
-                # snapshot.
-                siblings = [r for r in siblings if floor < r]
-            yield tuple(sorted(siblings))
-            # then test the base from our parent's delta chain.
-            yield tuple(sorted(snaps))
-            floor = min(snaps)
-        # No suitable base found in the parent chain, search if any full
-        # snapshots emitted since parent's base would be a suitable base for an
-        # intermediate snapshot.
-        #
-        # It give a chance to reuse a delta chain unrelated to the current
-        # revisions instead of starting our own. Without such re-use,
-        # topological branches would keep reopening new full chains. Creating
-        # more and more snapshot as the repository grow.
-        full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
-        yield tuple(sorted(full))
-
-    if not sparse:
-        # other approach failed try against prev to hopefully save us a
-        # fulltext.
-        yield (prev,)
+        if not sparse:
+            # other approach failed try against prev to hopefully save us a
+            # fulltext.
+            yield (prev,)
 
 
 class SnapshotCache: