debug: add an option to display statistic about a unbundling operation
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 17 Oct 2022 22:19:43 +0200
changeset 49610 35d4c2124073
parent 49609 9cac281eb9c0
child 49611 2afee2176775
debug: add an option to display statistic about a unbundling operation This will helps a lot to understand how the bundling decision have actually impacted pull/unbundle on the other side.
mercurial/changegroup.py
mercurial/configitems.py
mercurial/filelog.py
mercurial/manifest.py
mercurial/revlog.py
mercurial/revlogutils/deltas.py
tests/test-bundle.t
--- a/mercurial/changegroup.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/changegroup.py	Mon Oct 17 22:19:43 2022 +0200
@@ -105,6 +105,164 @@
                 os.unlink(cleanup)
 
 
+def _dbg_ubdl_line(
+    ui,
+    indent,
+    key,
+    base_value=None,
+    percentage_base=None,
+    percentage_key=None,
+):
+    """Print one line of debug_unbundle_debug_info"""
+    line = b"DEBUG-UNBUNDLING: "
+    line += b' ' * (2 * indent)
+    key += b":"
+    padding = b''
+    if base_value is not None:
+        assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
+        line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
+        if isinstance(base_value, float):
+            line += b"%14.3f seconds" % base_value
+        else:
+            line += b"%10d" % base_value
+            padding = b'            '
+    else:
+        line += key
+
+    if percentage_base is not None:
+        line += padding
+        padding = b''
+        assert base_value is not None
+        percentage = base_value * 100 // percentage_base
+        if percentage_key is not None:
+            line += b" (%3d%% of %s)" % (
+                percentage,
+                percentage_key,
+            )
+        else:
+            line += b" (%3d%%)" % percentage
+
+    line += b'\n'
+    ui.write_err(line)
+
+
+def _sumf(items):
+    # python < 3.8 does not support a `start=0.0` argument to sum
+    # So we have to cheat a bit until we drop support for those version
+    if not items:
+        return 0.0
+    return sum(items)
+
+
+def display_unbundle_debug_info(ui, debug_info):
+    """display an unbundling report from debug information"""
+    cl_info = []
+    mn_info = []
+    fl_info = []
+    _dispatch = [
+        (b'CHANGELOG:', cl_info),
+        (b'MANIFESTLOG:', mn_info),
+        (b'FILELOG:', fl_info),
+    ]
+    for e in debug_info:
+        for prefix, info in _dispatch:
+            if e["target-revlog"].startswith(prefix):
+                info.append(e)
+                break
+        else:
+            assert False, 'unreachable'
+    each_info = [
+        (b'changelog', cl_info),
+        (b'manifests', mn_info),
+        (b'files', fl_info),
+    ]
+
+    # General Revision Countss
+    _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info))
+    for key, info in each_info:
+        if not info:
+            continue
+        _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info))
+
+    # General Time spent
+    all_durations = [e['duration'] for e in debug_info]
+    all_durations.sort()
+    total_duration = _sumf(all_durations)
+    _dbg_ubdl_line(ui, 0, b'total-time', total_duration)
+
+    for key, info in each_info:
+        if not info:
+            continue
+        durations = [e['duration'] for e in info]
+        durations.sort()
+        _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration)
+
+    # Count and cache reuse per delta types
+    each_types = {}
+    for key, info in each_info:
+        each_types[key] = types = {
+            b'full': 0,
+            b'full-cached': 0,
+            b'snapshot': 0,
+            b'snapshot-cached': 0,
+            b'delta': 0,
+            b'delta-cached': 0,
+            b'unknown': 0,
+            b'unknown-cached': 0,
+        }
+        for e in info:
+            types[e['type']] += 1
+            if e['using-cached-base']:
+                types[e['type'] + b'-cached'] += 1
+
+    EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown')
+    if debug_info:
+        _dbg_ubdl_line(ui, 0, b'type-count')
+    for key, info in each_info:
+        if info:
+            _dbg_ubdl_line(ui, 1, key)
+        t = each_types[key]
+        for tn in EXPECTED_TYPES:
+            if t[tn]:
+                tc = tn + b'-cached'
+                _dbg_ubdl_line(ui, 2, tn, t[tn])
+                _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn])
+
+    # time perf delta types and reuse
+    each_type_time = {}
+    for key, info in each_info:
+        each_type_time[key] = t = {
+            b'full': [],
+            b'full-cached': [],
+            b'snapshot': [],
+            b'snapshot-cached': [],
+            b'delta': [],
+            b'delta-cached': [],
+            b'unknown': [],
+            b'unknown-cached': [],
+        }
+        for e in info:
+            t[e['type']].append(e['duration'])
+            if e['using-cached-base']:
+                t[e['type'] + b'-cached'].append(e['duration'])
+        for t_key, value in list(t.items()):
+            value.sort()
+            t[t_key] = _sumf(value)
+
+    if debug_info:
+        _dbg_ubdl_line(ui, 0, b'type-time')
+    for key, info in each_info:
+        if info:
+            _dbg_ubdl_line(ui, 1, key)
+        t = each_type_time[key]
+        td = total_duration  # to same space on next lines
+        for tn in EXPECTED_TYPES:
+            if t[tn]:
+                tc = tn + b'-cached'
+                _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total")
+                _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total")
+
+
 class cg1unpacker:
     """Unpacker for cg1 changegroup streams.
 
@@ -254,7 +412,15 @@
                     pos = next
             yield closechunk()
 
-    def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
+    def _unpackmanifests(
+        self,
+        repo,
+        revmap,
+        trp,
+        prog,
+        addrevisioncb=None,
+        debug_info=None,
+    ):
         self.callback = prog.increment
         # no need to check for empty manifest group here:
         # if the result of the merge of 1 and 2 is the same in 3 and 4,
@@ -263,7 +429,13 @@
         self.manifestheader()
         deltas = self.deltaiter()
         storage = repo.manifestlog.getstorage(b'')
-        storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
+        storage.addgroup(
+            deltas,
+            revmap,
+            trp,
+            addrevisioncb=addrevisioncb,
+            debug_info=debug_info,
+        )
         prog.complete()
         self.callback = None
 
@@ -292,6 +464,10 @@
         """
         repo = repo.unfiltered()
 
+        debug_info = None
+        if repo.ui.configbool(b'debug', b'unbundling-stats'):
+            debug_info = []
+
         # Only useful if we're adding sidedata categories. If both peers have
         # the same categories, then we simply don't do anything.
         adding_sidedata = (
@@ -366,6 +542,7 @@
                 alwayscache=True,
                 addrevisioncb=onchangelog,
                 duplicaterevisioncb=ondupchangelog,
+                debug_info=debug_info,
             ):
                 repo.ui.develwarn(
                     b'applied empty changelog from changegroup',
@@ -413,6 +590,7 @@
                 trp,
                 progress,
                 addrevisioncb=on_manifest_rev,
+                debug_info=debug_info,
             )
 
             needfiles = {}
@@ -449,6 +627,7 @@
                 efiles,
                 needfiles,
                 addrevisioncb=on_filelog_rev,
+                debug_info=debug_info,
             )
 
             if sidedata_helpers:
@@ -567,6 +746,8 @@
                     b'changegroup-runhooks-%020i' % clstart,
                     lambda tr: repo._afterlock(runhooks),
                 )
+            if debug_info is not None:
+                display_unbundle_debug_info(repo.ui, debug_info)
         finally:
             repo.ui.flush()
         # never return 0 here:
@@ -626,9 +807,22 @@
         protocol_flags = 0
         return node, p1, p2, deltabase, cs, flags, protocol_flags
 
-    def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
+    def _unpackmanifests(
+        self,
+        repo,
+        revmap,
+        trp,
+        prog,
+        addrevisioncb=None,
+        debug_info=None,
+    ):
         super(cg3unpacker, self)._unpackmanifests(
-            repo, revmap, trp, prog, addrevisioncb=addrevisioncb
+            repo,
+            revmap,
+            trp,
+            prog,
+            addrevisioncb=addrevisioncb,
+            debug_info=debug_info,
         )
         for chunkdata in iter(self.filelogheader, {}):
             # If we get here, there are directory manifests in the changegroup
@@ -636,7 +830,11 @@
             repo.ui.debug(b"adding %s revisions\n" % d)
             deltas = self.deltaiter()
             if not repo.manifestlog.getstorage(d).addgroup(
-                deltas, revmap, trp, addrevisioncb=addrevisioncb
+                deltas,
+                revmap,
+                trp,
+                addrevisioncb=addrevisioncb,
+                debug_info=debug_info,
             ):
                 raise error.Abort(_(b"received dir revlog group is empty"))
 
@@ -2173,6 +2371,7 @@
     expectedfiles,
     needfiles,
     addrevisioncb=None,
+    debug_info=None,
 ):
     revisions = 0
     files = 0
@@ -2193,6 +2392,7 @@
                 revmap,
                 trp,
                 addrevisioncb=addrevisioncb,
+                debug_info=debug_info,
             )
             if not added:
                 raise error.Abort(_(b"received file revlog group is empty"))
--- a/mercurial/configitems.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/configitems.py	Mon Oct 17 22:19:43 2022 +0200
@@ -594,6 +594,12 @@
     b'bundling-stats',
     default=False,
 )
+# display extra information about the unbundling process
+coreconfigitem(
+    b'debug',
+    b'unbundling-stats',
+    default=False,
+)
 coreconfigitem(
     b'defaults',
     b'.*',
--- a/mercurial/filelog.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/filelog.py	Mon Oct 17 22:19:43 2022 +0200
@@ -153,6 +153,7 @@
         addrevisioncb=None,
         duplicaterevisioncb=None,
         maybemissingparents=False,
+        debug_info=None,
     ):
         if maybemissingparents:
             raise error.Abort(
@@ -173,6 +174,7 @@
                 transaction,
                 addrevisioncb=addrevisioncb,
                 duplicaterevisioncb=duplicaterevisioncb,
+                debug_info=debug_info,
             )
 
     def getstrippoint(self, minlink):
--- a/mercurial/manifest.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/manifest.py	Mon Oct 17 22:19:43 2022 +0200
@@ -1856,6 +1856,7 @@
         alwayscache=False,
         addrevisioncb=None,
         duplicaterevisioncb=None,
+        debug_info=None,
     ):
         return self._revlog.addgroup(
             deltas,
@@ -1864,6 +1865,7 @@
             alwayscache=alwayscache,
             addrevisioncb=addrevisioncb,
             duplicaterevisioncb=duplicaterevisioncb,
+            debug_info=debug_info,
         )
 
     def rawsize(self, rev):
--- a/mercurial/revlog.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/revlog.py	Mon Oct 17 22:19:43 2022 +0200
@@ -2640,6 +2640,7 @@
         alwayscache=False,
         addrevisioncb=None,
         duplicaterevisioncb=None,
+        debug_info=None,
     ):
         """
         add a delta group
@@ -2665,6 +2666,7 @@
                 deltacomputer = deltautil.deltacomputer(
                     self,
                     write_debug=write_debug,
+                    debug_info=debug_info,
                 )
                 # loop through our set of deltas
                 for data in deltas:
--- a/mercurial/revlogutils/deltas.py	Tue Nov 15 16:25:23 2022 +0100
+++ b/mercurial/revlogutils/deltas.py	Mon Oct 17 22:19:43 2022 +0200
@@ -674,7 +674,12 @@
     deltas_limit = textlen * LIMIT_DELTA2TEXT
 
     tested = {nullrev}
-    candidates = _refinedgroups(revlog, p1, p2, cachedelta)
+    candidates = _refinedgroups(
+        revlog,
+        p1,
+        p2,
+        cachedelta,
+    )
     while True:
         temptative = candidates.send(good)
         if temptative is None:
@@ -703,6 +708,7 @@
             # no delta for rawtext-changing revs (see "candelta" for why)
             if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                 continue
+
             # If we reach here, we are about to build and test a delta.
             # The delta building process will compute the chaininfo in all
             # case, since that computation is cached, it is fine to access it
@@ -768,15 +774,28 @@
     # This logic only applies to general delta repositories and can be disabled
     # through configuration. Disabling reuse source delta is useful when
     # we want to make sure we recomputed "optimal" deltas.
+    debug_info = None
     if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
         # Assume what we received from the server is a good choice
         # build delta will reuse the cache
+        if debug_info is not None:
+            debug_info['cached-delta.tested'] += 1
         good = yield (cachedelta[0],)
         if good is not None:
+            if debug_info is not None:
+                debug_info['cached-delta.accepted'] += 1
             yield None
             return
+    # XXX cache me higher
     snapshots = collections.defaultdict(list)
-    for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
+    groups = _rawgroups(
+        revlog,
+        p1,
+        p2,
+        cachedelta,
+        snapshots,
+    )
+    for candidates in groups:
         good = yield candidates
         if good is not None:
             break
@@ -805,7 +824,10 @@
             children = tuple(sorted(c for c in snapshots[good]))
             good = yield children
 
-    # we have found nothing
+    if debug_info is not None:
+        if good is None:
+            debug_info['no-solution'] += 1
+
     yield None
 
 
@@ -931,10 +953,17 @@
 
 
 class deltacomputer:
-    def __init__(self, revlog, write_debug=None, debug_search=False):
+    def __init__(
+        self,
+        revlog,
+        write_debug=None,
+        debug_search=False,
+        debug_info=None,
+    ):
         self.revlog = revlog
         self._write_debug = write_debug
         self._debug_search = debug_search
+        self._debug_info = debug_info
 
     def buildtext(self, revinfo, fh):
         """Builds a fulltext version of a revision
@@ -1103,11 +1132,14 @@
         if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
             return self._fullsnapshotinfo(fh, revinfo, target_rev)
 
-        if self._write_debug is not None:
+        gather_debug = (
+            self._write_debug is not None or self._debug_info is not None
+        )
+        debug_search = self._write_debug is not None and self._debug_search
+
+        if gather_debug:
             start = util.timer()
 
-        debug_search = self._write_debug is not None and self._debug_search
-
         # count the number of different delta we tried (for debug purpose)
         dbg_try_count = 0
         # count the number of "search round" we did. (for debug purpose)
@@ -1122,7 +1154,7 @@
         deltainfo = None
         p1r, p2r = revlog.rev(p1), revlog.rev(p2)
 
-        if self._write_debug is not None:
+        if gather_debug:
             if p1r != nullrev:
                 p1_chain_len = revlog._chaininfo(p1r)[0]
             else:
@@ -1250,9 +1282,8 @@
         else:
             dbg_type = b"delta"
 
-        if self._write_debug is not None:
+        if gather_debug:
             end = util.timer()
-            assert deltainfo is not None  # please pytype
             used_cached = (
                 cachedelta is not None
                 and dbg_try_rounds == 1
@@ -1262,7 +1293,7 @@
             dbg = {
                 'duration': end - start,
                 'revision': target_rev,
-                'delta-base': deltainfo.base,
+                'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                 'search_round_count': dbg_try_rounds,
                 'using-cached-base': used_cached,
                 'delta_try_count': dbg_try_count,
@@ -1294,35 +1325,39 @@
                     target_revlog += b'%s:' % target_key
             dbg['target-revlog'] = target_revlog
 
-            msg = (
-                b"DBG-DELTAS:"
-                b" %-12s"
-                b" rev=%d:"
-                b" delta-base=%d"
-                b" is-cached=%d"
-                b" - search-rounds=%d"
-                b" try-count=%d"
-                b" - delta-type=%-6s"
-                b" snap-depth=%d"
-                b" - p1-chain-length=%d"
-                b" p2-chain-length=%d"
-                b" - duration=%f"
-                b"\n"
-            )
-            msg %= (
-                dbg["target-revlog"],
-                dbg["revision"],
-                dbg["delta-base"],
-                dbg["using-cached-base"],
-                dbg["search_round_count"],
-                dbg["delta_try_count"],
-                dbg["type"],
-                dbg["snapshot-depth"],
-                dbg["p1-chain-len"],
-                dbg["p2-chain-len"],
-                dbg["duration"],
-            )
-            self._write_debug(msg)
+            if self._debug_info is not None:
+                self._debug_info.append(dbg)
+
+            if self._write_debug is not None:
+                msg = (
+                    b"DBG-DELTAS:"
+                    b" %-12s"
+                    b" rev=%d:"
+                    b" delta-base=%d"
+                    b" is-cached=%d"
+                    b" - search-rounds=%d"
+                    b" try-count=%d"
+                    b" - delta-type=%-6s"
+                    b" snap-depth=%d"
+                    b" - p1-chain-length=%d"
+                    b" p2-chain-length=%d"
+                    b" - duration=%f"
+                    b"\n"
+                )
+                msg %= (
+                    dbg["target-revlog"],
+                    dbg["revision"],
+                    dbg["delta-base"],
+                    dbg["using-cached-base"],
+                    dbg["search_round_count"],
+                    dbg["delta_try_count"],
+                    dbg["type"],
+                    dbg["snapshot-depth"],
+                    dbg["p1-chain-len"],
+                    dbg["p2-chain-len"],
+                    dbg["duration"],
+                )
+                self._write_debug(msg)
         return deltainfo
 
 
--- a/tests/test-bundle.t	Tue Nov 15 16:25:23 2022 +0100
+++ b/tests/test-bundle.t	Mon Oct 17 22:19:43 2022 +0200
@@ -1081,3 +1081,47 @@
   new changesets 4fe08cd4693e:4652c276ac4f (3 drafts)
   (run 'hg update' to get a working copy)
 
+
+Test the debug statistic when applying a bundle
+-----------------------------------------------
+
+  $ hg init bar
+  $ hg -R bar unbundle ./default.hg  --config debug.unbundling-stats=yes
+  adding changesets
+  adding manifests
+  adding file changes
+  DEBUG-UNBUNDLING: revisions:                9
+  DEBUG-UNBUNDLING:   changelog:              3             ( 33%)
+  DEBUG-UNBUNDLING:   manifests:              3             ( 33%)
+  DEBUG-UNBUNDLING:   files:                  3             ( 33%)
+  DEBUG-UNBUNDLING: total-time:      ?????????????? seconds (glob)
+  DEBUG-UNBUNDLING:   changelog:     ?????????????? seconds (???%) (glob)
+  DEBUG-UNBUNDLING:   manifests:     ?????????????? seconds (???%) (glob)
+  DEBUG-UNBUNDLING:   files:         ?????????????? seconds (???%) (glob)
+  DEBUG-UNBUNDLING: type-count:
+  DEBUG-UNBUNDLING:   changelog:
+  DEBUG-UNBUNDLING:     full:                 3
+  DEBUG-UNBUNDLING:       cached:             0             (  0%)
+  DEBUG-UNBUNDLING:   manifests:
+  DEBUG-UNBUNDLING:     full:                 1
+  DEBUG-UNBUNDLING:       cached:             0             (  0%)
+  DEBUG-UNBUNDLING:     delta:                2
+  DEBUG-UNBUNDLING:       cached:             2             (100%)
+  DEBUG-UNBUNDLING:   files:
+  DEBUG-UNBUNDLING:     full:                 3
+  DEBUG-UNBUNDLING:       cached:             0             (  0%)
+  DEBUG-UNBUNDLING: type-time:
+  DEBUG-UNBUNDLING:   changelog:
+  DEBUG-UNBUNDLING:     full:        ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:       cached:    ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:   manifests:
+  DEBUG-UNBUNDLING:     full:        ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:       cached:    ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:     delta:       ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:       cached:    ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:   files:
+  DEBUG-UNBUNDLING:     full:        ?????????????? seconds (???% of total) (glob)
+  DEBUG-UNBUNDLING:       cached:    ?????????????? seconds (???% of total) (glob)
+  added 3 changesets with 3 changes to 3 files
+  new changesets 4fe08cd4693e:4652c276ac4f (3 drafts)
+  (run 'hg update' to get a working copy)