# HG changeset patch # User Valentin Gatien-Baron , Pierre-Yves David # Date 1556224214 -7200 # Node ID a0b26fc8fbba272ad9f9788fe8c3ec05c024ad1e # Parent bc4373babd04359c9b4a6488d4fde2b89fbbbc59 deltas: skip if projected delta size does not match text size constraint Before computing any delta, we get a basic estimation of the delta size we can expect and the resulted compressed value. We then checks this projected size against the ½ⁿ size constraints. This allows to exclude potential base candidates before doing any expensive computation. This only apply to the intermediate-snapshot case since this constraint only apply to them. In practice we only perform this new checks for the manifestlog. Manifest log combine two property: it is likely to have delta chain issue and its diffing/compression is fairly predictable. The initial author of this changeset is Valentin Gatien-Baron providing the initial idea and initial testing, Pierre-Yves David later consolidated the code in the right location and run more extensive testing. diff -r bc4373babd04 -r a0b26fc8fbba mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py Fri Apr 26 00:28:22 2019 +0200 +++ b/mercurial/revlogutils/deltas.py Thu Apr 25 22:30:14 2019 +0200 @@ -679,6 +679,25 @@ # if chain already have too much data, skip base if deltas_limit < chainsize: continue + if sparse and revlog.upperboundcomp is not None: + maxcomp = revlog.upperboundcomp + basenotsnap = (p1, p2, nullrev) + if rev not in basenotsnap and revlog.issnapshot(rev): + snapshotdepth = revlog.snapshotdepth(rev) + # If text is significantly larger than the base, we can + # expect the resulting delta to be proportional to the size + # difference + revsize = revlog.rawsize(rev) + rawsizedistance = max(textlen - revsize, 0) + # use an estimate of the compression upper bound. + lowestrealisticdeltalen = rawsizedistance // maxcomp + + # check the absolute constraint on the delta size + snapshotlimit = textlen >> snapshotdepth + if snapshotlimit < lowestrealisticdeltalen: + # delta lower bound is larger than accepted upper bound + continue + group.append(rev) if group: # XXX: in the sparse revlog case, group can become large,