# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@octobus.net>
# Date 1667764037 18000
# Node ID f5f113f1b0112a80e63ba587bf6b67c7522245bc
# Parent  54421ef8a423810d17eb71f47574ba21ae8db733
delta-find: add a way to control the number of bases tested at the same time

See inline comment for details.

The feature is currently disabled, but should be enabled by default to mitigate
some existing pathological cases.

Also see the next changeset for details.

diff -r 54421ef8a423 -r f5f113f1b011 mercurial/configitems.py
--- a/mercurial/configitems.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/configitems.py	Sun Nov 06 14:47:17 2022 -0500
@@ -2042,6 +2042,11 @@
 )
 coreconfigitem(
     b'storage',
+    b'revlog.delta-parent-search.candidate-group-chunk-size',
+    default=0,
+)
+coreconfigitem(
+    b'storage',
     b'revlog.issue6528.fix-incoming',
     default=True,
 )
diff -r 54421ef8a423 -r f5f113f1b011 mercurial/helptext/config.txt
--- a/mercurial/helptext/config.txt	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/helptext/config.txt	Sun Nov 06 14:47:17 2022 -0500
@@ -2281,6 +2281,21 @@
     To fix affected revisions that already exist within the repository, one can
     use :hg:`debug-repair-issue-6528`.
 
+.. container:: verbose
+
+    ``revlog.delta-parent-search.candidate-group-chunk-size``
+        Tune the number of delta bases the storage will consider in the
+        same "round" of search. In some very rare cases, using a smaller value
+        might result in faster processing at the possible expense of storage
+        space, while using larger values might result in slower processing at the
+        possible benefit of storage space. A value of "0" means no limitation.
+
+        default: no limitation
+
+        This is unlikely that you'll have to tune this configuration. If you think
+        you do, consider talking with the mercurial developer community about your
+        repositories.
+
 ``revlog.optimize-delta-parent-choice``
     When storing a merge revision, both parents will be equally considered as
     a possible delta base. This results in better delta selection and improved
diff -r 54421ef8a423 -r f5f113f1b011 mercurial/localrepo.py
--- a/mercurial/localrepo.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/localrepo.py	Sun Nov 06 14:47:17 2022 -0500
@@ -1081,6 +1081,11 @@
         b'storage', b'revlog.optimize-delta-parent-choice'
     )
     options[b'deltabothparents'] = deltabothparents
+    dps_cgds = ui.configint(
+        b'storage',
+        b'revlog.delta-parent-search.candidate-group-chunk-size',
+    )
+    options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
     options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')
 
     issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
diff -r 54421ef8a423 -r f5f113f1b011 mercurial/revlog.py
--- a/mercurial/revlog.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/revlog.py	Sun Nov 06 14:47:17 2022 -0500
@@ -348,6 +348,7 @@
         self._chunkcachesize = 65536
         self._maxchainlen = None
         self._deltabothparents = True
+        self._candidate_group_chunk_size = 0
         self._debug_delta = False
         self.index = None
         self._docket = None
@@ -422,6 +423,9 @@
             self._maxchainlen = opts[b'maxchainlen']
         if b'deltabothparents' in opts:
             self._deltabothparents = opts[b'deltabothparents']
+        dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
+        if dps_cgds:
+            self._candidate_group_chunk_size = dps_cgds
         self._lazydelta = bool(opts.get(b'lazydelta', True))
         self._lazydeltabase = False
         if self._lazydelta:
diff -r 54421ef8a423 -r f5f113f1b011 mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/revlogutils/deltas.py	Sun Nov 06 14:47:17 2022 -0500
@@ -680,6 +680,7 @@
     good = None
 
     deltas_limit = textlen * LIMIT_DELTA2TEXT
+    group_chunk_size = revlog._candidate_group_chunk_size
 
     tested = {nullrev}
     candidates = _refinedgroups(
@@ -770,11 +771,30 @@
 
             group.append(rev)
         if group:
-            # XXX: in the sparse revlog case, group can become large,
-            #      impacting performances. Some bounding or slicing mecanism
-            #      would help to reduce this impact.
-            tested.update(group)
-            good = yield tuple(group)
+            # When the size of the candidate group is big, it can result in a
+            # quite significant performance impact. To reduce this, we can send
+            # them in smaller batches until the new batch does not provide any
+            # improvements.
+            #
+            # This might reduce the overall efficiency of the compression in
+            # some corner cases, but that should also prevent very pathological
+            # cases from being an issue. (eg. 20 000 candidates).
+            #
+            # XXX note that the ordering of the group becomes important as it
+            # now impacts the final result. The current order is unprocessed
+            # and can be improved.
+            if group_chunk_size == 0:
+                tested.update(group)
+                good = yield tuple(group)
+            else:
+                prev_good = good
+                for start in range(0, len(group), group_chunk_size):
+                    sub_group = group[start : start + group_chunk_size]
+                    tested.update(sub_group)
+                    good = yield tuple(sub_group)
+                    if prev_good == good:
+                        break
+
     yield None