Mercurial > hg-stable

--- a/mercurial/configitems.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/configitems.py	Sun Nov 06 14:47:17 2022 -0500
@@ -2042,6 +2042,11 @@
 )
 coreconfigitem(
     b'storage',
+    b'revlog.delta-parent-search.candidate-group-chunk-size',
+    default=0,
+)
+coreconfigitem(
+    b'storage',
     b'revlog.issue6528.fix-incoming',
     default=True,
 )
--- a/mercurial/helptext/config.txt	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/helptext/config.txt	Sun Nov 06 14:47:17 2022 -0500
@@ -2281,6 +2281,21 @@
     To fix affected revisions that already exist within the repository, one can
     use :hg:`debug-repair-issue-6528`.

+.. container:: verbose
+
+    ``revlog.delta-parent-search.candidate-group-chunk-size``
+        Tune the number of delta bases the storage will consider in the
+        same "round" of search. In some very rare cases, using a smaller value
+        might result in faster processing at the possible expense of storage
+        space, while using larger values might result in slower processing at the
+        possible benefit of storage space. A value of "0" means no limitation.
+
+        default: no limitation
+
+        This is unlikely that you'll have to tune this configuration. If you think
+        you do, consider talking with the mercurial developer community about your
+        repositories.
+
 ``revlog.optimize-delta-parent-choice``
     When storing a merge revision, both parents will be equally considered as
     a possible delta base. This results in better delta selection and improved
--- a/mercurial/localrepo.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/localrepo.py	Sun Nov 06 14:47:17 2022 -0500
@@ -1081,6 +1081,11 @@
         b'storage', b'revlog.optimize-delta-parent-choice'
     )
     options[b'deltabothparents'] = deltabothparents
+    dps_cgds = ui.configint(
+        b'storage',
+        b'revlog.delta-parent-search.candidate-group-chunk-size',
+    )
+    options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
     options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')

     issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
--- a/mercurial/revlog.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/revlog.py	Sun Nov 06 14:47:17 2022 -0500
@@ -348,6 +348,7 @@
         self._chunkcachesize = 65536
         self._maxchainlen = None
         self._deltabothparents = True
+        self._candidate_group_chunk_size = 0
         self._debug_delta = False
         self.index = None
         self._docket = None
@@ -422,6 +423,9 @@
             self._maxchainlen = opts[b'maxchainlen']
         if b'deltabothparents' in opts:
             self._deltabothparents = opts[b'deltabothparents']
+        dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
+        if dps_cgds:
+            self._candidate_group_chunk_size = dps_cgds
         self._lazydelta = bool(opts.get(b'lazydelta', True))
         self._lazydeltabase = False
         if self._lazydelta:
--- a/mercurial/revlogutils/deltas.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/revlogutils/deltas.py	Sun Nov 06 14:47:17 2022 -0500
@@ -680,6 +680,7 @@
     good = None

     deltas_limit = textlen * LIMIT_DELTA2TEXT
+    group_chunk_size = revlog._candidate_group_chunk_size

     tested = {nullrev}
     candidates = _refinedgroups(
@@ -770,11 +771,30 @@

             group.append(rev)
         if group:
-            # XXX: in the sparse revlog case, group can become large,
-            #      impacting performances. Some bounding or slicing mecanism
-            #      would help to reduce this impact.
-            tested.update(group)
-            good = yield tuple(group)
+            # When the size of the candidate group is big, it can result in a
+            # quite significant performance impact. To reduce this, we can send
+            # them in smaller batches until the new batch does not provide any
+            # improvements.
+            #
+            # This might reduce the overall efficiency of the compression in
+            # some corner cases, but that should also prevent very pathological
+            # cases from being an issue. (eg. 20 000 candidates).
+            #
+            # XXX note that the ordering of the group becomes important as it
+            # now impacts the final result. The current order is unprocessed
+            # and can be improved.
+            if group_chunk_size == 0:
+                tested.update(group)
+                good = yield tuple(group)
+            else:
+                prev_good = good
+                for start in range(0, len(group), group_chunk_size):
+                    sub_group = group[start : start + group_chunk_size]
+                    tested.update(sub_group)
+                    good = yield tuple(sub_group)
+                    if prev_good == good:
+                        break
+
     yield None