# HG changeset patch # User Pierre-Yves David # Date 1700452765 -3600 # Node ID a227e061bb4b7b994ca87398f9240677c7d48d0d # Parent c83074405276affc838fecd70f2150bd93143f05 delta-find: move `_rawgroups` on the `_DeltaSearch` object Moving more code before doing more logic changes. diff -r c83074405276 -r a227e061bb4b mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py Mon Nov 20 04:53:11 2023 +0100 +++ b/mercurial/revlogutils/deltas.py Mon Nov 20 04:59:25 2023 +0100 @@ -888,13 +888,7 @@ return if self.snapshot_cache is None: self.snapshot_cache = SnapshotCache() - groups = _rawgroups( - self.revlog, - self.p1, - self.p2, - self.cachedelta, - self.snapshot_cache, - ) + groups = self._raw_groups() for candidates in groups: good = yield candidates if good is not None: @@ -937,127 +931,133 @@ yield None + def _raw_groups(self): + """Provides group of revision to be tested as delta base -def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): - """Provides group of revision to be tested as delta base + This lower level function focus on emitting delta theorically + interresting without looking it any practical details. - This lower level function focus on emitting delta theorically interresting - without looking it any practical details. + The group order aims at providing fast or small candidates first. + """ + # Why search for delta base if we cannot use a delta base ? + assert self.revlog.delta_config.general_delta + # also see issue6056 + sparse = self.revlog.delta_config.sparse_revlog + curr = len(self.revlog) + prev = curr - 1 + deltachain = lambda rev: self.revlog._deltachain(rev)[0] - The group order aims at providing fast or small candidates first. - """ - # Why search for delta base if we cannot use a delta base ? - assert revlog.delta_config.general_delta - # also see issue6056 - sparse = revlog.delta_config.sparse_revlog - curr = len(revlog) - prev = curr - 1 - deltachain = lambda rev: revlog._deltachain(rev)[0] + # exclude already lazy tested base if any + parents = [p for p in (self.p1, self.p2) if p != nullrev] - # exclude already lazy tested base if any - parents = [p for p in (p1, p2) if p != nullrev] + if ( + not self.revlog.delta_config.delta_both_parents + and len(parents) == 2 + ): + parents.sort() + # To minimize the chance of having to build a fulltext, + # pick first whichever parent is closest to us (max rev) + yield (parents[1],) + # then the other one (min rev) if the first did not fit + yield (parents[0],) + elif len(parents) > 0: + # Test all parents (1 or 2), and keep the best candidate + yield parents - if not revlog.delta_config.delta_both_parents and len(parents) == 2: - parents.sort() - # To minimize the chance of having to build a fulltext, - # pick first whichever parent is closest to us (max rev) - yield (parents[1],) - # then the other one (min rev) if the first did not fit - yield (parents[0],) - elif len(parents) > 0: - # Test all parents (1 or 2), and keep the best candidate - yield parents - - if sparse and parents: - if snapshot_cache is None: - # map: base-rev: [snapshot-revs] - snapshot_cache = SnapshotCache() - # See if we can use an existing snapshot in the parent chains to use as - # a base for a new intermediate-snapshot - # - # search for snapshot in parents delta chain - # map: snapshot-level: snapshot-rev - parents_snaps = collections.defaultdict(set) - candidate_chains = [deltachain(p) for p in parents] - for chain in candidate_chains: - for idx, s in enumerate(chain): - if not revlog.issnapshot(s): + if sparse and parents: + if self.snapshot_cache is None: + # map: base-rev: [snapshot-revs] + self.snapshot_cache = SnapshotCache() + # See if we can use an existing snapshot in the parent chains to + # use as a base for a new intermediate-snapshot + # + # search for snapshot in parents delta chain map: snapshot-level: + # snapshot-rev + parents_snaps = collections.defaultdict(set) + candidate_chains = [deltachain(p) for p in parents] + for chain in candidate_chains: + for idx, s in enumerate(chain): + if not self.revlog.issnapshot(s): + break + parents_snaps[idx].add(s) + snapfloor = min(parents_snaps[0]) + 1 + self.snapshot_cache.update(self.revlog, snapfloor) + # search for the highest "unrelated" revision + # + # Adding snapshots used by "unrelated" revision increase the odd we + # reuse an independant, yet better snapshot chain. + # + # XXX instead of building a set of revisions, we could lazily + # enumerate over the chains. That would be more efficient, however + # we stick to simple code for now. + all_revs = set() + for chain in candidate_chains: + all_revs.update(chain) + other = None + for r in self.revlog.revs(prev, snapfloor): + if r not in all_revs: + other = r break - parents_snaps[idx].add(s) - snapfloor = min(parents_snaps[0]) + 1 - snapshot_cache.update(revlog, snapfloor) - # search for the highest "unrelated" revision - # - # Adding snapshots used by "unrelated" revision increase the odd we - # reuse an independant, yet better snapshot chain. - # - # XXX instead of building a set of revisions, we could lazily enumerate - # over the chains. That would be more efficient, however we stick to - # simple code for now. - all_revs = set() - for chain in candidate_chains: - all_revs.update(chain) - other = None - for r in revlog.revs(prev, snapfloor): - if r not in all_revs: - other = r - break - if other is not None: - # To avoid unfair competition, we won't use unrelated intermediate - # snapshot that are deeper than the ones from the parent delta - # chain. - max_depth = max(parents_snaps.keys()) - chain = deltachain(other) - for depth, s in enumerate(chain): - if s < snapfloor: - continue - if max_depth < depth: - break - if not revlog.issnapshot(s): - break - parents_snaps[depth].add(s) - # Test them as possible intermediate snapshot base - # We test them from highest to lowest level. High level one are more - # likely to result in small delta - floor = None - for idx, snaps in sorted(parents_snaps.items(), reverse=True): - siblings = set() - for s in snaps: - siblings.update(snapshot_cache.snapshots[s]) - # Before considering making a new intermediate snapshot, we check - # if an existing snapshot, children of base we consider, would be - # suitable. + if other is not None: + # To avoid unfair competition, we won't use unrelated + # intermediate snapshot that are deeper than the ones from the + # parent delta chain. + max_depth = max(parents_snaps.keys()) + chain = deltachain(other) + for depth, s in enumerate(chain): + if s < snapfloor: + continue + if max_depth < depth: + break + if not self.revlog.issnapshot(s): + break + parents_snaps[depth].add(s) + # Test them as possible intermediate snapshot base We test them + # from highest to lowest level. High level one are more likely to + # result in small delta + floor = None + for idx, snaps in sorted(parents_snaps.items(), reverse=True): + siblings = set() + for s in snaps: + siblings.update(self.snapshot_cache.snapshots[s]) + # Before considering making a new intermediate snapshot, we + # check if an existing snapshot, children of base we consider, + # would be suitable. + # + # It give a change to reuse a delta chain "unrelated" to the + # current revision instead of starting our own. Without such + # re-use, topological branches would keep reopening new chains. + # Creating more and more snapshot as the repository grow. + + if floor is not None: + # We only do this for siblings created after the one in our + # parent's delta chain. Those created before has less + # chances to be valid base since our ancestors had to + # create a new snapshot. + siblings = [r for r in siblings if floor < r] + yield tuple(sorted(siblings)) + # then test the base from our parent's delta chain. + yield tuple(sorted(snaps)) + floor = min(snaps) + # No suitable base found in the parent chain, search if any full + # snapshots emitted since parent's base would be a suitable base + # for an intermediate snapshot. # - # It give a change to reuse a delta chain "unrelated" to the - # current revision instead of starting our own. Without such - # re-use, topological branches would keep reopening new chains. + # It give a chance to reuse a delta chain unrelated to the current + # revisions instead of starting our own. Without such re-use, + # topological branches would keep reopening new full chains. # Creating more and more snapshot as the repository grow. + full = [ + r + for r in self.snapshot_cache.snapshots[nullrev] + if snapfloor <= r + ] + yield tuple(sorted(full)) - if floor is not None: - # We only do this for siblings created after the one in our - # parent's delta chain. Those created before has less chances - # to be valid base since our ancestors had to create a new - # snapshot. - siblings = [r for r in siblings if floor < r] - yield tuple(sorted(siblings)) - # then test the base from our parent's delta chain. - yield tuple(sorted(snaps)) - floor = min(snaps) - # No suitable base found in the parent chain, search if any full - # snapshots emitted since parent's base would be a suitable base for an - # intermediate snapshot. - # - # It give a chance to reuse a delta chain unrelated to the current - # revisions instead of starting our own. Without such re-use, - # topological branches would keep reopening new full chains. Creating - # more and more snapshot as the repository grow. - full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r] - yield tuple(sorted(full)) - - if not sparse: - # other approach failed try against prev to hopefully save us a - # fulltext. - yield (prev,) + if not sparse: + # other approach failed try against prev to hopefully save us a + # fulltext. + yield (prev,) class SnapshotCache: