deltas: add a debug-delta-find command to analyse delta search
See command documentation for details.
For some reason, pytype is confused by our usage of None/deltainfo variable, so
I had to quiet it.
--- a/mercurial/debugcommands.py Thu May 19 23:39:42 2022 +0100
+++ b/mercurial/debugcommands.py Fri May 20 14:27:46 2022 +0200
@@ -73,6 +73,7 @@
repoview,
requirements,
revlog,
+ revlogutils,
revset,
revsetlang,
scmutil,
@@ -989,6 +990,65 @@
@command(
+ b'debug-delta-find',
+ cmdutil.debugrevlogopts + cmdutil.formatteropts,
+ _(b'-c|-m|FILE REV'),
+ optionalrepo=True,
+)
+def debugdeltafind(ui, repo, arg_1, arg_2=None, **opts):
+ """display the computation to get to a valid delta for storing REV
+
+ This command will replay the process used to find the "best" delta to store
+ a revision and display information about all the steps used to get to that
+ result.
+
+ The revision use the revision number of the target storage (not changelog
+ revision number).
+
+ note: the process is initiated from a full text of the revision to store.
+ """
+ opts = pycompat.byteskwargs(opts)
+ if arg_2 is None:
+ file_ = None
+ rev = arg_1
+ else:
+ file_ = arg_1
+ rev = arg_2
+
+ rev = int(rev)
+
+ revlog = cmdutil.openrevlog(repo, b'debugdeltachain', file_, opts)
+
+ deltacomputer = deltautil.deltacomputer(
+ revlog,
+ write_debug=ui.write,
+ debug_search=True,
+ )
+
+ node = revlog.node(rev)
+ p1r, p2r = revlog.parentrevs(rev)
+ p1 = revlog.node(p1r)
+ p2 = revlog.node(p2r)
+ btext = [revlog.revision(rev)]
+ textlen = len(btext[0])
+ cachedelta = None
+ flags = revlog.flags(rev)
+
+ revinfo = revlogutils.revisioninfo(
+ node,
+ p1,
+ p2,
+ btext,
+ textlen,
+ cachedelta,
+ flags,
+ )
+
+ fh = revlog._datafp()
+ deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
+
+
+@command(
b'debugdirstate|debugstate',
[
(
--- a/mercurial/revlogutils/deltas.py Thu May 19 23:39:42 2022 +0100
+++ b/mercurial/revlogutils/deltas.py Fri May 20 14:27:46 2022 +0200
@@ -931,9 +931,10 @@
class deltacomputer:
- def __init__(self, revlog, write_debug=None):
+ def __init__(self, revlog, write_debug=None, debug_search=False):
self.revlog = revlog
self._write_debug = write_debug
+ self._debug_search = debug_search
def buildtext(self, revinfo, fh):
"""Builds a fulltext version of a revision
@@ -980,6 +981,7 @@
def _builddeltainfo(self, revinfo, base, fh):
# can we use the cached delta?
revlog = self.revlog
+ debug_search = self._write_debug is not None and self._debug_search
chainbase = revlog.chainbase(base)
if revlog._generaldelta:
deltabase = base
@@ -1009,13 +1011,27 @@
delta = revinfo.cachedelta[1]
if delta is None:
delta = self._builddeltadiff(base, revinfo, fh)
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
+ msg %= len(delta)
+ self._write_debug(msg)
# snapshotdept need to be neither None nor 0 level snapshot
if revlog.upperboundcomp is not None and snapshotdepth:
lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
snapshotlimit = revinfo.textlen >> snapshotdepth
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
+ msg %= lowestrealisticdeltalen
+ self._write_debug(msg)
if snapshotlimit < lowestrealisticdeltalen:
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
+ self._write_debug(msg)
return None
if revlog.length(base) < lowestrealisticdeltalen:
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
+ self._write_debug(msg)
return None
header, data = revlog.compress(delta)
deltalen = len(header) + len(data)
@@ -1090,6 +1106,8 @@
if self._write_debug is not None:
start = util.timer()
+ debug_search = self._write_debug is not None and self._debug_search
+
# count the number of different delta we tried (for debug purpose)
dbg_try_count = 0
# count the number of "search round" we did. (for debug purpose)
@@ -1113,6 +1131,10 @@
p2_chain_len = revlog._chaininfo(p2r)[0]
else:
p2_chain_len = -1
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
+ msg %= target_rev
+ self._write_debug(msg)
groups = _candidategroups(
self.revlog, revinfo.textlen, p1r, p2r, cachedelta
@@ -1120,21 +1142,93 @@
candidaterevs = next(groups)
while candidaterevs is not None:
dbg_try_rounds += 1
+ if debug_search:
+ prev = None
+ if deltainfo is not None:
+ prev = deltainfo.base
+
+ if p1 in candidaterevs or p2 in candidaterevs:
+ round_type = b"parents"
+ elif prev is not None and all(c < prev for c in candidaterevs):
+ round_type = b"refine-down"
+ elif prev is not None and all(c > prev for c in candidaterevs):
+ round_type = b"refine-up"
+ else:
+ round_type = b"search-down"
+ msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
+ msg %= (dbg_try_rounds, len(candidaterevs), round_type)
+ self._write_debug(msg)
nominateddeltas = []
if deltainfo is not None:
+ if debug_search:
+ msg = (
+ b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
+ )
+ msg %= (deltainfo.base, deltainfo.deltalen)
+ self._write_debug(msg)
# if we already found a good delta,
# challenge it against refined candidates
nominateddeltas.append(deltainfo)
for candidaterev in candidaterevs:
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
+ msg %= candidaterev
+ self._write_debug(msg)
+ candidate_type = None
+ if candidaterev == p1:
+ candidate_type = b"p1"
+ elif candidaterev == p2:
+ candidate_type = b"p2"
+ elif self.revlog.issnapshot(candidaterev):
+ candidate_type = b"snapshot-%d"
+ candidate_type %= self.revlog.snapshotdepth(
+ candidaterev
+ )
+
+ if candidate_type is not None:
+ msg = b"DBG-DELTAS-SEARCH: type=%s\n"
+ msg %= candidate_type
+ self._write_debug(msg)
+ msg = b"DBG-DELTAS-SEARCH: size=%d\n"
+ msg %= self.revlog.length(candidaterev)
+ self._write_debug(msg)
+ msg = b"DBG-DELTAS-SEARCH: base=%d\n"
+ msg %= self.revlog.deltaparent(candidaterev)
+ self._write_debug(msg)
if candidaterev in excluded_bases:
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"
+ self._write_debug(msg)
continue
if candidaterev >= target_rev:
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"
+ self._write_debug(msg)
continue
dbg_try_count += 1
+
+ if debug_search:
+ delta_start = util.timer()
candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
+ if debug_search:
+ delta_end = util.timer()
+ msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
+ msg %= delta_end - delta_start
+ self._write_debug(msg)
if candidatedelta is not None:
if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
+ if debug_search:
+ msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
+ msg %= candidatedelta.deltalen
+ self._write_debug(msg)
nominateddeltas.append(candidatedelta)
+ elif debug_search:
+ msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
+ msg %= candidatedelta.deltalen
+ self._write_debug(msg)
+ elif debug_search:
+ msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
+ self._write_debug(msg)
if nominateddeltas:
deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
if deltainfo is not None:
@@ -1145,7 +1239,7 @@
if deltainfo is None:
dbg_type = b"full"
deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
- elif deltainfo.snapshotdepth:
+ elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
dbg_type = b"snapshot"
else:
dbg_type = b"delta"
@@ -1161,8 +1255,13 @@
'p1-chain-len': p1_chain_len,
'p2-chain-len': p2_chain_len,
}
- if deltainfo.snapshotdepth is not None:
- dbg['snapshot-depth'] = deltainfo.snapshotdepth
+ if (
+ deltainfo.snapshotdepth # pytype: disable=attribute-error
+ is not None
+ ):
+ dbg[
+ 'snapshot-depth'
+ ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
else:
dbg['snapshot-depth'] = 0
target_revlog = b"UNKNOWN"
--- a/tests/test-completion.t Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-completion.t Fri May 20 14:27:46 2022 +0200
@@ -74,6 +74,7 @@
Show debug commands if there are no other candidates
$ hg debugcomplete debug
+ debug-delta-find
debug-repair-issue6528
debugancestor
debugantivirusrunning
@@ -267,6 +268,7 @@
config: untrusted, exp-all-known, edit, local, source, shared, non-shared, global, template
continue: dry-run
copy: forget, after, at-rev, force, include, exclude, dry-run
+ debug-delta-find: changelog, manifest, dir, template
debug-repair-issue6528: to-report, from-report, paranoid, dry-run
debugancestor:
debugantivirusrunning:
--- a/tests/test-help.t Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-help.t Fri May 20 14:27:46 2022 +0200
@@ -978,6 +978,8 @@
$ hg help debug
debug commands (internal and unsupported):
+ debug-delta-find
+ display the computation to get to a valid delta for storing REV
debug-repair-issue6528
find affected revisions and repair them. See issue6528 for more
details.
--- a/tests/test-sparse-revlog.t Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-sparse-revlog.t Fri May 20 14:27:46 2022 +0200
@@ -149,4 +149,46 @@
deltas against p2 : 63 ( 1.36%)
deltas against other : 0 ( 0.00%)
+
+Test `debug-delta-find`
+-----------------------
+
+ $ ls -1
+ SPARSE-REVLOG-TEST-FILE
+ $ hg debugdeltachain SPARSE-REVLOG-TEST-FILE | grep other | tail -1
+ 4971 3 5 4930 other 19179 346472 427596 1.23414 15994877 15567281 36.40652 427596 179288 1.00000 5
+ $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971
+ DBG-DELTAS-SEARCH: SEARCH rev=4971
+ DBG-DELTAS-SEARCH: ROUND #1 - 2 candidates - search-down
+ DBG-DELTAS-SEARCH: CANDIDATE: rev=4962
+ DBG-DELTAS-SEARCH: type=snapshot-4
+ DBG-DELTAS-SEARCH: size=18296
+ DBG-DELTAS-SEARCH: base=4930
+ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377
+ DBG-DELTAS-SEARCH: delta-search-time=* (glob)
+ DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD)
+ DBG-DELTAS-SEARCH: CANDIDATE: rev=4971
+ DBG-DELTAS-SEARCH: type=snapshot-4
+ DBG-DELTAS-SEARCH: size=19179
+ DBG-DELTAS-SEARCH: base=4930
+ DBG-DELTAS-SEARCH: TOO-HIGH
+ DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down
+ DBG-DELTAS-SEARCH: CANDIDATE: rev=4930
+ DBG-DELTAS-SEARCH: type=snapshot-3
+ DBG-DELTAS-SEARCH: size=39228
+ DBG-DELTAS-SEARCH: base=4799
+ DBG-DELTAS-SEARCH: uncompressed-delta-size=33050
+ DBG-DELTAS-SEARCH: delta-search-time=* (glob)
+ DBG-DELTAS-SEARCH: DELTA: length=19179 (GOOD)
+ DBG-DELTAS-SEARCH: ROUND #3 - 1 candidates - refine-down
+ DBG-DELTAS-SEARCH: CONTENDER: rev=4930 - length=19179
+ DBG-DELTAS-SEARCH: CANDIDATE: rev=4799
+ DBG-DELTAS-SEARCH: type=snapshot-2
+ DBG-DELTAS-SEARCH: size=50213
+ DBG-DELTAS-SEARCH: base=4623
+ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661
+ DBG-DELTAS-SEARCH: delta-search-time=* (glob)
+ DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD)
+ DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob)
+
$ cd ..