# HG changeset patch # User Pierre-Yves David # Date 1667878230 18000 # Node ID bcae90c53defff8253ce78290a2dbd9e35af9a89 # Parent f1887500f3ec151a317d6e3b64ccf69e5a5810a5 delta-find: add a delta-reuse policy that blindly accepts incoming deltas When this policy is set, incoming deltas are blindly accepted without regard for the validity of the chain they build. diff -r f1887500f3ec -r bcae90c53def mercurial/helptext/config.txt --- a/mercurial/helptext/config.txt Sat Dec 03 01:24:34 2022 +0100 +++ b/mercurial/helptext/config.txt Mon Nov 07 22:30:30 2022 -0500 @@ -1948,6 +1948,13 @@ unbundling process, but can result in sub-optimal storage space if the remote peer is sending poor quality deltas. + - ``forced``: the deltas from the peer will be reused in all cases, even if + the resulting delta-chain is "invalid". This setting will ensure the bundle + is applied at minimal CPU cost, but it can result in longer delta chains + being created on the client, making revisions potentially slower to access + in the future. If you think you need this option, you should make sure you + are also talking to the Mercurial developer community to get confirmation. + See `hg help config.storage.revlog.reuse-external-delta-parent` for a similar global option. That option defines the behavior of `default`. diff -r f1887500f3ec -r bcae90c53def mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py Sat Dec 03 01:24:34 2022 +0100 +++ b/mercurial/revlogutils/constants.py Mon Nov 07 22:30:30 2022 -0500 @@ -315,3 +315,4 @@ # The delta base will be tested for validy first. So that the cached deltas get # used when possible. DELTA_BASE_REUSE_TRY = 1 +DELTA_BASE_REUSE_FORCE = 2 diff -r f1887500f3ec -r bcae90c53def mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py Sat Dec 03 01:24:34 2022 +0100 +++ b/mercurial/revlogutils/deltas.py Mon Nov 07 22:30:30 2022 -0500 @@ -20,6 +20,7 @@ COMP_MODE_DEFAULT, COMP_MODE_INLINE, COMP_MODE_PLAIN, + DELTA_BASE_REUSE_FORCE, DELTA_BASE_REUSE_NO, KIND_CHANGELOG, KIND_FILELOG, @@ -584,6 +585,13 @@ if deltainfo is None: return False + if ( + revinfo.cachedelta is not None + and deltainfo.base == revinfo.cachedelta[0] + and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE + ): + return True + # - 'deltainfo.distance' is the distance from the base revision -- # bounding it limits the amount of I/O we need to do. # - 'deltainfo.compresseddeltalen' is the sum of the total size of @@ -711,6 +719,16 @@ # filter out revision we tested already if rev in tested: continue + + if ( + cachedelta is not None + and rev == cachedelta[0] + and cachedelta[2] == DELTA_BASE_REUSE_FORCE + ): + # instructions are to forcibly consider/use this delta base + group.append(rev) + continue + # an higher authority deamed the base unworthy (e.g. censored) if excluded_bases is not None and rev in excluded_bases: tested.add(rev) diff -r f1887500f3ec -r bcae90c53def mercurial/utils/urlutil.py --- a/mercurial/utils/urlutil.py Sat Dec 03 01:24:34 2022 +0100 +++ b/mercurial/utils/urlutil.py Mon Nov 07 22:30:30 2022 -0500 @@ -775,6 +775,7 @@ b'default': None, b'try-base': revlog_constants.DELTA_BASE_REUSE_TRY, b'no-reuse': revlog_constants.DELTA_BASE_REUSE_NO, + b'forced': revlog_constants.DELTA_BASE_REUSE_FORCE, } diff -r f1887500f3ec -r bcae90c53def tests/test-revlog-delta-find.t --- a/tests/test-revlog-delta-find.t Sat Dec 03 01:24:34 2022 +0100 +++ b/tests/test-revlog-delta-find.t Mon Nov 07 22:30:30 2022 -0500 @@ -260,3 +260,57 @@ DBG-DELTAS: CHANGELOG: * (glob) DBG-DELTAS: MANIFESTLOG: * (glob) DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob) + +Case where we force a "bad" delta to be applied +=============================================== + +We build a very different file content to force a full snapshot + + $ cp -ar peer-bad-delta peer-bad-delta-with-full + $ cp -ar local-pre-pull local-pre-pull-full + $ echo '[paths]' >> local-pre-pull-full/.hg/hgrc + $ echo 'default=../peer-bad-delta-with-full' >> local-pre-pull-full/.hg/hgrc + + $ hg -R peer-bad-delta-with-full update 'desc("merge")' --quiet + $ ($TESTDIR/seq.py 2000 2100; $TESTDIR/seq.py 500 510; $TESTDIR/seq.py 3000 3050) \ + > | $PYTHON $TESTTMP/sha256line.py > peer-bad-delta-with-full/my-file.txt + $ hg -R peer-bad-delta-with-full commit -m 'trigger-full' + DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 * (glob) + DBG-DELTAS: MANIFESTLOG: * (glob) + DBG-DELTAS: CHANGELOG: * (glob) + +Check that "try-base" behavior challenge the delta +-------------------------------------------------- + +The bundling process creates a delta against the previous revision, however this +is an invalid chain for the client, so it is not considered and we do a full +snapshot again. + + $ cp -ar local-pre-pull-full local-try-base-full + $ hg -R local-try-base-full pull --quiet \ + > --config 'paths.default:delta-reuse-policy=try-base' + DBG-DELTAS: CHANGELOG: * (glob) + DBG-DELTAS: CHANGELOG: * (glob) + DBG-DELTAS: MANIFESTLOG: * (glob) + DBG-DELTAS: MANIFESTLOG: * (glob) + DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob) + DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=4 * (glob) + +Check that "forced" behavior do not challenge the delta, even if it is bad. +--------------------------------------------------------------------------- + +The client does not challenge anything and applies the bizarre delta directly. + +Note: If the bundling process becomes smarter, this test might no longer work +(as the server won't be sending "bad" deltas anymore) and might need something +more subtle to test this behavior. + + $ cp -ar local-pre-pull-full local-forced-full + $ hg -R local-forced-full pull --quiet \ + > --config 'paths.default:delta-reuse-policy=forced' + DBG-DELTAS: CHANGELOG: * (glob) + DBG-DELTAS: CHANGELOG: * (glob) + DBG-DELTAS: MANIFESTLOG: * (glob) + DBG-DELTAS: MANIFESTLOG: * (glob) + DBG-DELTAS: FILELOG:my-file.txt: rev=3: delta-base=2 * (glob) + DBG-DELTAS: FILELOG:my-file.txt: rev=4: delta-base=3 * (glob)