debug: add an option to display statistic about a unbundling operation
This will helps a lot to understand how the bundling decision have actually
impacted pull/unbundle on the other side.
--- a/mercurial/changegroup.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/changegroup.py Mon Oct 17 22:19:43 2022 +0200
@@ -105,6 +105,164 @@
os.unlink(cleanup)
+def _dbg_ubdl_line(
+ ui,
+ indent,
+ key,
+ base_value=None,
+ percentage_base=None,
+ percentage_key=None,
+):
+ """Print one line of debug_unbundle_debug_info"""
+ line = b"DEBUG-UNBUNDLING: "
+ line += b' ' * (2 * indent)
+ key += b":"
+ padding = b''
+ if base_value is not None:
+ assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
+ line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
+ if isinstance(base_value, float):
+ line += b"%14.3f seconds" % base_value
+ else:
+ line += b"%10d" % base_value
+ padding = b' '
+ else:
+ line += key
+
+ if percentage_base is not None:
+ line += padding
+ padding = b''
+ assert base_value is not None
+ percentage = base_value * 100 // percentage_base
+ if percentage_key is not None:
+ line += b" (%3d%% of %s)" % (
+ percentage,
+ percentage_key,
+ )
+ else:
+ line += b" (%3d%%)" % percentage
+
+ line += b'\n'
+ ui.write_err(line)
+
+
+def _sumf(items):
+ # python < 3.8 does not support a `start=0.0` argument to sum
+ # So we have to cheat a bit until we drop support for those version
+ if not items:
+ return 0.0
+ return sum(items)
+
+
+def display_unbundle_debug_info(ui, debug_info):
+ """display an unbundling report from debug information"""
+ cl_info = []
+ mn_info = []
+ fl_info = []
+ _dispatch = [
+ (b'CHANGELOG:', cl_info),
+ (b'MANIFESTLOG:', mn_info),
+ (b'FILELOG:', fl_info),
+ ]
+ for e in debug_info:
+ for prefix, info in _dispatch:
+ if e["target-revlog"].startswith(prefix):
+ info.append(e)
+ break
+ else:
+ assert False, 'unreachable'
+ each_info = [
+ (b'changelog', cl_info),
+ (b'manifests', mn_info),
+ (b'files', fl_info),
+ ]
+
+ # General Revision Countss
+ _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info))
+ for key, info in each_info:
+ if not info:
+ continue
+ _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info))
+
+ # General Time spent
+ all_durations = [e['duration'] for e in debug_info]
+ all_durations.sort()
+ total_duration = _sumf(all_durations)
+ _dbg_ubdl_line(ui, 0, b'total-time', total_duration)
+
+ for key, info in each_info:
+ if not info:
+ continue
+ durations = [e['duration'] for e in info]
+ durations.sort()
+ _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration)
+
+ # Count and cache reuse per delta types
+ each_types = {}
+ for key, info in each_info:
+ each_types[key] = types = {
+ b'full': 0,
+ b'full-cached': 0,
+ b'snapshot': 0,
+ b'snapshot-cached': 0,
+ b'delta': 0,
+ b'delta-cached': 0,
+ b'unknown': 0,
+ b'unknown-cached': 0,
+ }
+ for e in info:
+ types[e['type']] += 1
+ if e['using-cached-base']:
+ types[e['type'] + b'-cached'] += 1
+
+ EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown')
+ if debug_info:
+ _dbg_ubdl_line(ui, 0, b'type-count')
+ for key, info in each_info:
+ if info:
+ _dbg_ubdl_line(ui, 1, key)
+ t = each_types[key]
+ for tn in EXPECTED_TYPES:
+ if t[tn]:
+ tc = tn + b'-cached'
+ _dbg_ubdl_line(ui, 2, tn, t[tn])
+ _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn])
+
+ # time perf delta types and reuse
+ each_type_time = {}
+ for key, info in each_info:
+ each_type_time[key] = t = {
+ b'full': [],
+ b'full-cached': [],
+ b'snapshot': [],
+ b'snapshot-cached': [],
+ b'delta': [],
+ b'delta-cached': [],
+ b'unknown': [],
+ b'unknown-cached': [],
+ }
+ for e in info:
+ t[e['type']].append(e['duration'])
+ if e['using-cached-base']:
+ t[e['type'] + b'-cached'].append(e['duration'])
+ for t_key, value in list(t.items()):
+ value.sort()
+ t[t_key] = _sumf(value)
+
+ if debug_info:
+ _dbg_ubdl_line(ui, 0, b'type-time')
+ for key, info in each_info:
+ if info:
+ _dbg_ubdl_line(ui, 1, key)
+ t = each_type_time[key]
+ td = total_duration # to same space on next lines
+ for tn in EXPECTED_TYPES:
+ if t[tn]:
+ tc = tn + b'-cached'
+ _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total")
+ _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total")
+
+
class cg1unpacker:
"""Unpacker for cg1 changegroup streams.
@@ -254,7 +412,15 @@
pos = next
yield closechunk()
- def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
+ def _unpackmanifests(
+ self,
+ repo,
+ revmap,
+ trp,
+ prog,
+ addrevisioncb=None,
+ debug_info=None,
+ ):
self.callback = prog.increment
# no need to check for empty manifest group here:
# if the result of the merge of 1 and 2 is the same in 3 and 4,
@@ -263,7 +429,13 @@
self.manifestheader()
deltas = self.deltaiter()
storage = repo.manifestlog.getstorage(b'')
- storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
+ storage.addgroup(
+ deltas,
+ revmap,
+ trp,
+ addrevisioncb=addrevisioncb,
+ debug_info=debug_info,
+ )
prog.complete()
self.callback = None
@@ -292,6 +464,10 @@
"""
repo = repo.unfiltered()
+ debug_info = None
+ if repo.ui.configbool(b'debug', b'unbundling-stats'):
+ debug_info = []
+
# Only useful if we're adding sidedata categories. If both peers have
# the same categories, then we simply don't do anything.
adding_sidedata = (
@@ -366,6 +542,7 @@
alwayscache=True,
addrevisioncb=onchangelog,
duplicaterevisioncb=ondupchangelog,
+ debug_info=debug_info,
):
repo.ui.develwarn(
b'applied empty changelog from changegroup',
@@ -413,6 +590,7 @@
trp,
progress,
addrevisioncb=on_manifest_rev,
+ debug_info=debug_info,
)
needfiles = {}
@@ -449,6 +627,7 @@
efiles,
needfiles,
addrevisioncb=on_filelog_rev,
+ debug_info=debug_info,
)
if sidedata_helpers:
@@ -567,6 +746,8 @@
b'changegroup-runhooks-%020i' % clstart,
lambda tr: repo._afterlock(runhooks),
)
+ if debug_info is not None:
+ display_unbundle_debug_info(repo.ui, debug_info)
finally:
repo.ui.flush()
# never return 0 here:
@@ -626,9 +807,22 @@
protocol_flags = 0
return node, p1, p2, deltabase, cs, flags, protocol_flags
- def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
+ def _unpackmanifests(
+ self,
+ repo,
+ revmap,
+ trp,
+ prog,
+ addrevisioncb=None,
+ debug_info=None,
+ ):
super(cg3unpacker, self)._unpackmanifests(
- repo, revmap, trp, prog, addrevisioncb=addrevisioncb
+ repo,
+ revmap,
+ trp,
+ prog,
+ addrevisioncb=addrevisioncb,
+ debug_info=debug_info,
)
for chunkdata in iter(self.filelogheader, {}):
# If we get here, there are directory manifests in the changegroup
@@ -636,7 +830,11 @@
repo.ui.debug(b"adding %s revisions\n" % d)
deltas = self.deltaiter()
if not repo.manifestlog.getstorage(d).addgroup(
- deltas, revmap, trp, addrevisioncb=addrevisioncb
+ deltas,
+ revmap,
+ trp,
+ addrevisioncb=addrevisioncb,
+ debug_info=debug_info,
):
raise error.Abort(_(b"received dir revlog group is empty"))
@@ -2173,6 +2371,7 @@
expectedfiles,
needfiles,
addrevisioncb=None,
+ debug_info=None,
):
revisions = 0
files = 0
@@ -2193,6 +2392,7 @@
revmap,
trp,
addrevisioncb=addrevisioncb,
+ debug_info=debug_info,
)
if not added:
raise error.Abort(_(b"received file revlog group is empty"))
--- a/mercurial/configitems.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/configitems.py Mon Oct 17 22:19:43 2022 +0200
@@ -594,6 +594,12 @@
b'bundling-stats',
default=False,
)
+# display extra information about the unbundling process
+coreconfigitem(
+ b'debug',
+ b'unbundling-stats',
+ default=False,
+)
coreconfigitem(
b'defaults',
b'.*',
--- a/mercurial/filelog.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/filelog.py Mon Oct 17 22:19:43 2022 +0200
@@ -153,6 +153,7 @@
addrevisioncb=None,
duplicaterevisioncb=None,
maybemissingparents=False,
+ debug_info=None,
):
if maybemissingparents:
raise error.Abort(
@@ -173,6 +174,7 @@
transaction,
addrevisioncb=addrevisioncb,
duplicaterevisioncb=duplicaterevisioncb,
+ debug_info=debug_info,
)
def getstrippoint(self, minlink):
--- a/mercurial/manifest.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/manifest.py Mon Oct 17 22:19:43 2022 +0200
@@ -1856,6 +1856,7 @@
alwayscache=False,
addrevisioncb=None,
duplicaterevisioncb=None,
+ debug_info=None,
):
return self._revlog.addgroup(
deltas,
@@ -1864,6 +1865,7 @@
alwayscache=alwayscache,
addrevisioncb=addrevisioncb,
duplicaterevisioncb=duplicaterevisioncb,
+ debug_info=debug_info,
)
def rawsize(self, rev):
--- a/mercurial/revlog.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/revlog.py Mon Oct 17 22:19:43 2022 +0200
@@ -2640,6 +2640,7 @@
alwayscache=False,
addrevisioncb=None,
duplicaterevisioncb=None,
+ debug_info=None,
):
"""
add a delta group
@@ -2665,6 +2666,7 @@
deltacomputer = deltautil.deltacomputer(
self,
write_debug=write_debug,
+ debug_info=debug_info,
)
# loop through our set of deltas
for data in deltas:
--- a/mercurial/revlogutils/deltas.py Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/revlogutils/deltas.py Mon Oct 17 22:19:43 2022 +0200
@@ -674,7 +674,12 @@
deltas_limit = textlen * LIMIT_DELTA2TEXT
tested = {nullrev}
- candidates = _refinedgroups(revlog, p1, p2, cachedelta)
+ candidates = _refinedgroups(
+ revlog,
+ p1,
+ p2,
+ cachedelta,
+ )
while True:
temptative = candidates.send(good)
if temptative is None:
@@ -703,6 +708,7 @@
# no delta for rawtext-changing revs (see "candelta" for why)
if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
continue
+
# If we reach here, we are about to build and test a delta.
# The delta building process will compute the chaininfo in all
# case, since that computation is cached, it is fine to access it
@@ -768,15 +774,28 @@
# This logic only applies to general delta repositories and can be disabled
# through configuration. Disabling reuse source delta is useful when
# we want to make sure we recomputed "optimal" deltas.
+ debug_info = None
if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
# Assume what we received from the server is a good choice
# build delta will reuse the cache
+ if debug_info is not None:
+ debug_info['cached-delta.tested'] += 1
good = yield (cachedelta[0],)
if good is not None:
+ if debug_info is not None:
+ debug_info['cached-delta.accepted'] += 1
yield None
return
+ # XXX cache me higher
snapshots = collections.defaultdict(list)
- for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
+ groups = _rawgroups(
+ revlog,
+ p1,
+ p2,
+ cachedelta,
+ snapshots,
+ )
+ for candidates in groups:
good = yield candidates
if good is not None:
break
@@ -805,7 +824,10 @@
children = tuple(sorted(c for c in snapshots[good]))
good = yield children
- # we have found nothing
+ if debug_info is not None:
+ if good is None:
+ debug_info['no-solution'] += 1
+
yield None
@@ -931,10 +953,17 @@
class deltacomputer:
- def __init__(self, revlog, write_debug=None, debug_search=False):
+ def __init__(
+ self,
+ revlog,
+ write_debug=None,
+ debug_search=False,
+ debug_info=None,
+ ):
self.revlog = revlog
self._write_debug = write_debug
self._debug_search = debug_search
+ self._debug_info = debug_info
def buildtext(self, revinfo, fh):
"""Builds a fulltext version of a revision
@@ -1103,11 +1132,14 @@
if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
return self._fullsnapshotinfo(fh, revinfo, target_rev)
- if self._write_debug is not None:
+ gather_debug = (
+ self._write_debug is not None or self._debug_info is not None
+ )
+ debug_search = self._write_debug is not None and self._debug_search
+
+ if gather_debug:
start = util.timer()
- debug_search = self._write_debug is not None and self._debug_search
-
# count the number of different delta we tried (for debug purpose)
dbg_try_count = 0
# count the number of "search round" we did. (for debug purpose)
@@ -1122,7 +1154,7 @@
deltainfo = None
p1r, p2r = revlog.rev(p1), revlog.rev(p2)
- if self._write_debug is not None:
+ if gather_debug:
if p1r != nullrev:
p1_chain_len = revlog._chaininfo(p1r)[0]
else:
@@ -1250,9 +1282,8 @@
else:
dbg_type = b"delta"
- if self._write_debug is not None:
+ if gather_debug:
end = util.timer()
- assert deltainfo is not None # please pytype
used_cached = (
cachedelta is not None
and dbg_try_rounds == 1
@@ -1262,7 +1293,7 @@
dbg = {
'duration': end - start,
'revision': target_rev,
- 'delta-base': deltainfo.base,
+ 'delta-base': deltainfo.base, # pytype: disable=attribute-error
'search_round_count': dbg_try_rounds,
'using-cached-base': used_cached,
'delta_try_count': dbg_try_count,
@@ -1294,35 +1325,39 @@
target_revlog += b'%s:' % target_key
dbg['target-revlog'] = target_revlog
- msg = (
- b"DBG-DELTAS:"
- b" %-12s"
- b" rev=%d:"
- b" delta-base=%d"
- b" is-cached=%d"
- b" - search-rounds=%d"
- b" try-count=%d"
- b" - delta-type=%-6s"
- b" snap-depth=%d"
- b" - p1-chain-length=%d"
- b" p2-chain-length=%d"
- b" - duration=%f"
- b"\n"
- )
- msg %= (
- dbg["target-revlog"],
- dbg["revision"],
- dbg["delta-base"],
- dbg["using-cached-base"],
- dbg["search_round_count"],
- dbg["delta_try_count"],
- dbg["type"],
- dbg["snapshot-depth"],
- dbg["p1-chain-len"],
- dbg["p2-chain-len"],
- dbg["duration"],
- )
- self._write_debug(msg)
+ if self._debug_info is not None:
+ self._debug_info.append(dbg)
+
+ if self._write_debug is not None:
+ msg = (
+ b"DBG-DELTAS:"
+ b" %-12s"
+ b" rev=%d:"
+ b" delta-base=%d"
+ b" is-cached=%d"
+ b" - search-rounds=%d"
+ b" try-count=%d"
+ b" - delta-type=%-6s"
+ b" snap-depth=%d"
+ b" - p1-chain-length=%d"
+ b" p2-chain-length=%d"
+ b" - duration=%f"
+ b"\n"
+ )
+ msg %= (
+ dbg["target-revlog"],
+ dbg["revision"],
+ dbg["delta-base"],
+ dbg["using-cached-base"],
+ dbg["search_round_count"],
+ dbg["delta_try_count"],
+ dbg["type"],
+ dbg["snapshot-depth"],
+ dbg["p1-chain-len"],
+ dbg["p2-chain-len"],
+ dbg["duration"],
+ )
+ self._write_debug(msg)
return deltainfo
--- a/tests/test-bundle.t Tue Nov 15 16:25:23 2022 +0100
+++ b/tests/test-bundle.t Mon Oct 17 22:19:43 2022 +0200
@@ -1081,3 +1081,47 @@
new changesets 4fe08cd4693e:4652c276ac4f (3 drafts)
(run 'hg update' to get a working copy)
+
+Test the debug statistic when applying a bundle
+-----------------------------------------------
+
+ $ hg init bar
+ $ hg -R bar unbundle ./default.hg --config debug.unbundling-stats=yes
+ adding changesets
+ adding manifests
+ adding file changes
+ DEBUG-UNBUNDLING: revisions: 9
+ DEBUG-UNBUNDLING: changelog: 3 ( 33%)
+ DEBUG-UNBUNDLING: manifests: 3 ( 33%)
+ DEBUG-UNBUNDLING: files: 3 ( 33%)
+ DEBUG-UNBUNDLING: total-time: ?????????????? seconds (glob)
+ DEBUG-UNBUNDLING: changelog: ?????????????? seconds (???%) (glob)
+ DEBUG-UNBUNDLING: manifests: ?????????????? seconds (???%) (glob)
+ DEBUG-UNBUNDLING: files: ?????????????? seconds (???%) (glob)
+ DEBUG-UNBUNDLING: type-count:
+ DEBUG-UNBUNDLING: changelog:
+ DEBUG-UNBUNDLING: full: 3
+ DEBUG-UNBUNDLING: cached: 0 ( 0%)
+ DEBUG-UNBUNDLING: manifests:
+ DEBUG-UNBUNDLING: full: 1
+ DEBUG-UNBUNDLING: cached: 0 ( 0%)
+ DEBUG-UNBUNDLING: delta: 2
+ DEBUG-UNBUNDLING: cached: 2 (100%)
+ DEBUG-UNBUNDLING: files:
+ DEBUG-UNBUNDLING: full: 3
+ DEBUG-UNBUNDLING: cached: 0 ( 0%)
+ DEBUG-UNBUNDLING: type-time:
+ DEBUG-UNBUNDLING: changelog:
+ DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: manifests:
+ DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: delta: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: files:
+ DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob)
+ DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob)
+ added 3 changesets with 3 changes to 3 files
+ new changesets 4fe08cd4693e:4652c276ac4f (3 drafts)
+ (run 'hg update' to get a working copy)