delta-find: add a `delta-reuse-policy` on configuration `path`
authorPierre-Yves David <pierre-yves.david@octobus.net>
Sat, 03 Dec 2022 01:24:34 +0100
changeset 49767 f1887500f3ec
parent 49766 152d9c011bcd
child 49768 bcae90c53def
delta-find: add a `delta-reuse-policy` on configuration `path` That option allows to control the behavior on a per-path basis, opening the way to treating pulls from central servers differently than other operations.
mercurial/bundle2.py
mercurial/helptext/config.txt
mercurial/utils/urlutil.py
tests/test-revlog-delta-find.t
--- a/mercurial/bundle2.py	Sat Dec 03 01:31:23 2022 +0100
+++ b/mercurial/bundle2.py	Sat Dec 03 01:24:34 2022 +0100
@@ -517,6 +517,10 @@
 
 
 def _processchangegroup(op, cg, tr, source, url, **kwargs):
+    if op.remote is not None and op.remote.path is not None:
+        remote_path = op.remote.path
+        kwargs = kwargs.copy()
+        kwargs['delta_base_reuse_policy'] = remote_path.delta_reuse_policy
     ret = cg.apply(op.repo, tr, source, url, **kwargs)
     op.records.add(
         b'changegroup',
--- a/mercurial/helptext/config.txt	Sat Dec 03 01:31:23 2022 +0100
+++ b/mercurial/helptext/config.txt	Sat Dec 03 01:24:34 2022 +0100
@@ -1922,6 +1922,35 @@
   - ``ignore``: ignore bookmarks during exchange.
     (This currently only affect pulling)
 
+.. container:: verbose
+
+  ``delta-reuse-policy``
+  Control the policy regarding deltas sent by the remote during pulls.
+
+  This is an advanced option that non-admin users should not need to understand
+  or set. This option can be used to speed up pulls from trusted central
+  servers, or to fix-up deltas from older servers.
+
+  It supports the following values:
+
+  - ``default``: use the policy defined by
+    `storage.revlog.reuse-external-delta-parent`,
+
+  - ``no-reuse``: start a new optimal delta search for each new revision we add
+    to the repository. The deltas from the server will be reused when the base
+    it applies to is tested (this can be frequent if that base is the one and
+    unique parent of that revision). This can significantly slowdown pulls but
+    will result in an optimized storage space if the remote peer is sending poor
+    quality deltas.
+
+  - ``try-base``: try to reuse the deltas from the remote peer as long as they
+    create a valid delta-chain in the local repository. This speeds up the
+    unbundling process, but can result in sub-optimal storage space if the
+    remote peer is sending poor quality deltas.
+
+  See `hg help config.storage.revlog.reuse-external-delta-parent` for a similar
+  global option. That option defines the behavior of `default`.
+
 The following special named paths exist:
 
 ``default``
--- a/mercurial/utils/urlutil.py	Sat Dec 03 01:31:23 2022 +0100
+++ b/mercurial/utils/urlutil.py	Sat Dec 03 01:24:34 2022 +0100
@@ -24,6 +24,10 @@
     stringutil,
 )
 
+from ..revlogutils import (
+    constants as revlog_constants,
+)
+
 
 if pycompat.TYPE_CHECKING:
     from typing import (
@@ -767,6 +771,26 @@
     return value
 
 
+DELTA_REUSE_POLICIES = {
+    b'default': None,
+    b'try-base': revlog_constants.DELTA_BASE_REUSE_TRY,
+    b'no-reuse': revlog_constants.DELTA_BASE_REUSE_NO,
+}
+
+
+@pathsuboption(b'delta-reuse-policy', b'delta_reuse_policy')
+def delta_reuse_policy(ui, path, value):
+    if value not in DELTA_REUSE_POLICIES:
+        path_name = path.name
+        if path_name is None:
+            # this is an "anonymous" path, config comes from the global one
+            path_name = b'*'
+        msg = _(b'(paths.%s:delta-reuse-policy has unknown value: "%s")\n')
+        msg %= (path_name, value)
+        ui.warn(msg)
+    return DELTA_REUSE_POLICIES.get(value)
+
+
 @pathsuboption(b'multi-urls', b'multi_urls')
 def multiurls_pathoption(ui, path, value):
     res = stringutil.parsebool(value)
--- a/tests/test-revlog-delta-find.t	Sat Dec 03 01:31:23 2022 +0100
+++ b/tests/test-revlog-delta-find.t	Sat Dec 03 01:24:34 2022 +0100
@@ -191,3 +191,72 @@
   \s*1001 (re)
   $ hg -R bundle-reuse-disabled debugdata my-file.txt 1 | wc -l
   \s*1200 (re)
+
+
+Check the path.*:delta-reuse-policy option
+==========================================
+
+Get a repository with the bad parent picked and a clone ready to pull the merge
+
+  $ cp -ar bundle-reuse-enabled peer-bad-delta
+  $ hg clone peer-bad-delta local-pre-pull --rev `cat large.node` --rev `cat small.node` --quiet
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=0: delta-base=0 * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=1: delta-base=0 * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=2: delta-base=0 * (glob)
+
+Check the parent order for the file
+
+  $ hg -R local-pre-pull debugdata my-file.txt 2 | wc -l
+  \s*1001 (re)
+  $ hg -R local-pre-pull debugdata my-file.txt 1 | wc -l
+  \s*1200 (re)
+
+Pull with no value (so the default)
+-----------------------------------
+
+default is to reuse the (bad) delta
+
+  $ cp -ar local-pre-pull local-no-value
+  $ hg -R local-no-value pull --quiet
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
+
+Pull with explicitly the default
+--------------------------------
+
+default is to reuse the (bad) delta
+
+  $ cp -ar local-pre-pull local-default
+  $ hg -R local-default pull --quiet --config 'paths.default:delta-reuse-policy=default'
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)
+
+Pull with no-reuse
+------------------
+
+We don't reuse the base, so we get a better delta
+
+  $ cp -ar local-pre-pull local-no-reuse
+  $ hg -R local-no-reuse pull --quiet --config 'paths.default:delta-reuse-policy=no-reuse'
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=1 * (glob)
+
+Pull with try-base
+------------------
+
+We requested to use the (bad) delta
+
+  $ cp -ar local-pre-pull local-try-base
+  $ hg -R local-try-base pull --quiet --config 'paths.default:delta-reuse-policy=try-base'
+  DBG-DELTAS: CHANGELOG: * (glob)
+  DBG-DELTAS: MANIFESTLOG: * (glob)
+  DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob)