debug: add an option to display statistic about a bundling operation
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 15 Nov 2022 16:25:23 +0100
changeset 49609 9cac281eb9c0
parent 49608 78ba41878f2e
child 49610 35d4c2124073
debug: add an option to display statistic about a bundling operation This will helps a lot to understand how the bundling decision might impact pull/unbundle on the other side.
hgext/remotefilelog/remotefilelog.py
hgext/sqlitestore.py
mercurial/changegroup.py
mercurial/configitems.py
mercurial/filelog.py
mercurial/manifest.py
mercurial/revlog.py
mercurial/utils/storageutil.py
tests/test-bundle.t
--- a/hgext/remotefilelog/remotefilelog.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/hgext/remotefilelog/remotefilelog.py	Tue Nov 15 16:25:23 2022 +0100
@@ -299,6 +299,7 @@
         deltaprevious=False,
         deltamode=None,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         # we don't use any of these parameters here
         del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious
--- a/hgext/sqlitestore.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/hgext/sqlitestore.py	Tue Nov 15 16:25:23 2022 +0100
@@ -589,6 +589,7 @@
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         if nodesorder not in (b'nodes', b'storage', b'linear', None):
             raise error.ProgrammingError(
--- a/mercurial/changegroup.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/changegroup.py	Tue Nov 15 16:25:23 2022 +0100
@@ -869,6 +869,7 @@
     fullclnodes=None,
     precomputedellipsis=None,
     sidedata_helpers=None,
+    debug_info=None,
 ):
     """Calculate deltas for a set of revisions.
 
@@ -978,6 +979,7 @@
         assumehaveparentrevisions=not ellipses,
         deltamode=deltamode,
         sidedata_helpers=sidedata_helpers,
+        debug_info=debug_info,
     )
 
     for i, revision in enumerate(revisions):
@@ -1003,6 +1005,187 @@
         progress.complete()
 
 
+def make_debug_info():
+    """ "build a "new" debug_info dictionnary
+
+    That dictionnary can be used to gather information about the bundle process
+    """
+    return {
+        'revision-total': 0,
+        'revision-changelog': 0,
+        'revision-manifest': 0,
+        'revision-files': 0,
+        'file-count': 0,
+        'merge-total': 0,
+        'available-delta': 0,
+        'available-full': 0,
+        'delta-against-prev': 0,
+        'delta-full': 0,
+        'delta-against-p1': 0,
+        'denied-delta-candeltafn': 0,
+        'denied-base-not-available': 0,
+        'reused-storage-delta': 0,
+        'computed-delta': 0,
+    }
+
+
+def merge_debug_info(base, other):
+    """merge the debug information from <other> into <base>
+
+    This function can be used to gather lower level information into higher level ones.
+    """
+    for key in (
+        'revision-total',
+        'revision-changelog',
+        'revision-manifest',
+        'revision-files',
+        'merge-total',
+        'available-delta',
+        'available-full',
+        'delta-against-prev',
+        'delta-full',
+        'delta-against-p1',
+        'denied-delta-candeltafn',
+        'denied-base-not-available',
+        'reused-storage-delta',
+        'computed-delta',
+    ):
+        base[key] += other[key]
+
+
+_KEY_PART_WIDTH = 17
+
+
+def _dbg_bdl_line(
+    ui,
+    indent,
+    key,
+    base_value=None,
+    percentage_base=None,
+    percentage_key=None,
+    percentage_ref=None,
+    extra=None,
+):
+    """Print one line of debug_bundle_debug_info"""
+    line = b"DEBUG-BUNDLING: "
+    line += b' ' * (2 * indent)
+    key += b":"
+    if base_value is not None:
+        assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
+        line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
+        line += b"%10d" % base_value
+    else:
+        line += key
+
+    if percentage_base is not None:
+        assert base_value is not None
+        percentage = base_value * 100 // percentage_base
+        if percentage_key is not None:
+            line += b" (%d%% of %s %d)" % (
+                percentage,
+                percentage_key,
+                percentage_ref,
+            )
+        else:
+            line += b" (%d%%)" % percentage
+
+    if extra:
+        line += b" "
+        line += extra
+
+    line += b'\n'
+    ui.write_err(line)
+
+
+def display_bundling_debug_info(
+    ui,
+    debug_info,
+    cl_debug_info,
+    mn_debug_info,
+    fl_debug_info,
+):
+    """display debug information gathered during a bundling through `ui`"""
+    d = debug_info
+    c = cl_debug_info
+    m = mn_debug_info
+    f = fl_debug_info
+    all_info = [
+        (b"changelog", b"cl", c),
+        (b"manifests", b"mn", m),
+        (b"files", b"fl", f),
+    ]
+    _dbg_bdl_line(ui, 0, b'revisions', d['revision-total'])
+    _dbg_bdl_line(ui, 1, b'changelog', d['revision-changelog'])
+    _dbg_bdl_line(ui, 1, b'manifest', d['revision-manifest'])
+    extra = b'(for %d revlogs)' % d['file-count']
+    _dbg_bdl_line(ui, 1, b'files', d['revision-files'], extra=extra)
+    if d['merge-total']:
+        _dbg_bdl_line(ui, 1, b'merge', d['merge-total'], d['revision-total'])
+    for k, __, v in all_info:
+        if v['merge-total']:
+            _dbg_bdl_line(ui, 2, k, v['merge-total'], v['revision-total'])
+
+    _dbg_bdl_line(ui, 0, b'deltas')
+    _dbg_bdl_line(
+        ui,
+        1,
+        b'from-storage',
+        d['reused-storage-delta'],
+        percentage_base=d['available-delta'],
+        percentage_key=b"available",
+        percentage_ref=d['available-delta'],
+    )
+
+    if d['denied-delta-candeltafn']:
+        _dbg_bdl_line(ui, 2, b'denied-fn', d['denied-delta-candeltafn'])
+    for __, k, v in all_info:
+        if v['denied-delta-candeltafn']:
+            _dbg_bdl_line(ui, 3, k, v['denied-delta-candeltafn'])
+
+    if d['denied-base-not-available']:
+        _dbg_bdl_line(ui, 2, b'denied-nb', d['denied-base-not-available'])
+    for k, __, v in all_info:
+        if v['denied-base-not-available']:
+            _dbg_bdl_line(ui, 3, k, v['denied-base-not-available'])
+
+    if d['computed-delta']:
+        _dbg_bdl_line(ui, 1, b'computed', d['computed-delta'])
+
+    if d['available-full']:
+        _dbg_bdl_line(
+            ui,
+            2,
+            b'full',
+            d['delta-full'],
+            percentage_base=d['available-full'],
+            percentage_key=b"native",
+            percentage_ref=d['available-full'],
+        )
+    for k, __, v in all_info:
+        if v['available-full']:
+            _dbg_bdl_line(
+                ui,
+                3,
+                k,
+                v['delta-full'],
+                percentage_base=v['available-full'],
+                percentage_key=b"native",
+                percentage_ref=v['available-full'],
+            )
+
+    if d['delta-against-prev']:
+        _dbg_bdl_line(ui, 2, b'previous', d['delta-against-prev'])
+    for k, __, v in all_info:
+        if v['delta-against-prev']:
+            _dbg_bdl_line(ui, 3, k, v['delta-against-prev'])
+
+    if d['delta-against-p1']:
+        _dbg_bdl_line(ui, 2, b'parent-1', d['delta-against-prev'])
+    for k, __, v in all_info:
+        if v['delta-against-p1']:
+            _dbg_bdl_line(ui, 3, k, v['delta-against-p1'])
+
+
 class cgpacker:
     def __init__(
         self,
@@ -1086,13 +1269,21 @@
             self._verbosenote = lambda s: None
 
     def generate(
-        self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
+        self,
+        commonrevs,
+        clnodes,
+        fastpathlinkrev,
+        source,
+        changelog=True,
     ):
         """Yield a sequence of changegroup byte chunks.
         If changelog is False, changelog data won't be added to changegroup
         """
 
+        debug_info = None
         repo = self._repo
+        if repo.ui.configbool(b'debug', b'bundling-stats'):
+            debug_info = make_debug_info()
         cl = repo.changelog
 
         self._verbosenote(_(b'uncompressed size of bundle content:\n'))
@@ -1107,14 +1298,19 @@
                 # correctly advertise its sidedata categories directly.
                 remote_sidedata = repo._wanted_sidedata
             sidedata_helpers = sidedatamod.get_sidedata_helpers(
-                repo, remote_sidedata
+                repo,
+                remote_sidedata,
             )
 
+        cl_debug_info = None
+        if debug_info is not None:
+            cl_debug_info = make_debug_info()
         clstate, deltas = self._generatechangelog(
             cl,
             clnodes,
             generate=changelog,
             sidedata_helpers=sidedata_helpers,
+            debug_info=cl_debug_info,
         )
         for delta in deltas:
             for chunk in _revisiondeltatochunks(
@@ -1126,6 +1322,9 @@
         close = closechunk()
         size += len(close)
         yield closechunk()
+        if debug_info is not None:
+            merge_debug_info(debug_info, cl_debug_info)
+            debug_info['revision-changelog'] = cl_debug_info['revision-total']
 
         self._verbosenote(_(b'%8.i (changelog)\n') % size)
 
@@ -1133,6 +1332,9 @@
         manifests = clstate[b'manifests']
         changedfiles = clstate[b'changedfiles']
 
+        if debug_info is not None:
+            debug_info['file-count'] = len(changedfiles)
+
         # We need to make sure that the linkrev in the changegroup refers to
         # the first changeset that introduced the manifest or file revision.
         # The fastpath is usually safer than the slowpath, because the filelogs
@@ -1156,6 +1358,9 @@
         fnodes = {}  # needed file nodes
 
         size = 0
+        mn_debug_info = None
+        if debug_info is not None:
+            mn_debug_info = make_debug_info()
         it = self.generatemanifests(
             commonrevs,
             clrevorder,
@@ -1165,6 +1370,7 @@
             source,
             clstate[b'clrevtomanifestrev'],
             sidedata_helpers=sidedata_helpers,
+            debug_info=mn_debug_info,
         )
 
         for tree, deltas in it:
@@ -1185,6 +1391,9 @@
             close = closechunk()
             size += len(close)
             yield close
+        if debug_info is not None:
+            merge_debug_info(debug_info, mn_debug_info)
+            debug_info['revision-manifest'] = mn_debug_info['revision-total']
 
         self._verbosenote(_(b'%8.i (manifests)\n') % size)
         yield self._manifestsend
@@ -1199,6 +1408,9 @@
         manifests.clear()
         clrevs = {cl.rev(x) for x in clnodes}
 
+        fl_debug_info = None
+        if debug_info is not None:
+            fl_debug_info = make_debug_info()
         it = self.generatefiles(
             changedfiles,
             commonrevs,
@@ -1208,6 +1420,7 @@
             fnodes,
             clrevs,
             sidedata_helpers=sidedata_helpers,
+            debug_info=fl_debug_info,
         )
 
         for path, deltas in it:
@@ -1230,12 +1443,29 @@
             self._verbosenote(_(b'%8.i  %s\n') % (size, path))
 
         yield closechunk()
+        if debug_info is not None:
+            merge_debug_info(debug_info, fl_debug_info)
+            debug_info['revision-files'] = fl_debug_info['revision-total']
+
+        if debug_info is not None:
+            display_bundling_debug_info(
+                repo.ui,
+                debug_info,
+                cl_debug_info,
+                mn_debug_info,
+                fl_debug_info,
+            )
 
         if clnodes:
             repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
 
     def _generatechangelog(
-        self, cl, nodes, generate=True, sidedata_helpers=None
+        self,
+        cl,
+        nodes,
+        generate=True,
+        sidedata_helpers=None,
+        debug_info=None,
     ):
         """Generate data for changelog chunks.
 
@@ -1332,6 +1562,7 @@
             fullclnodes=self._fullclnodes,
             precomputedellipsis=self._precomputedellipsis,
             sidedata_helpers=sidedata_helpers,
+            debug_info=debug_info,
         )
 
         return state, gen
@@ -1346,6 +1577,7 @@
         source,
         clrevtolocalrev,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         """Returns an iterator of changegroup chunks containing manifests.
 
@@ -1444,6 +1676,7 @@
                 fullclnodes=self._fullclnodes,
                 precomputedellipsis=self._precomputedellipsis,
                 sidedata_helpers=sidedata_helpers,
+                debug_info=debug_info,
             )
 
             if not self._oldmatcher.visitdir(store.tree[:-1]):
@@ -1483,6 +1716,7 @@
         fnodes,
         clrevs,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         changedfiles = [
             f
@@ -1578,6 +1812,7 @@
                 fullclnodes=self._fullclnodes,
                 precomputedellipsis=self._precomputedellipsis,
                 sidedata_helpers=sidedata_helpers,
+                debug_info=debug_info,
             )
 
             yield fname, deltas
@@ -1867,7 +2102,12 @@
 
 
 def makechangegroup(
-    repo, outgoing, version, source, fastpath=False, bundlecaps=None
+    repo,
+    outgoing,
+    version,
+    source,
+    fastpath=False,
+    bundlecaps=None,
 ):
     cgstream = makestream(
         repo,
@@ -1917,7 +2157,12 @@
 
     repo.hook(b'preoutgoing', throw=True, source=source)
     _changegroupinfo(repo, csets, source)
-    return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
+    return bundler.generate(
+        commonrevs,
+        csets,
+        fastpathlinkrev,
+        source,
+    )
 
 
 def _addchangegroupfiles(
--- a/mercurial/configitems.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/configitems.py	Tue Nov 15 16:25:23 2022 +0100
@@ -588,6 +588,12 @@
     b'revlog.debug-delta',
     default=False,
 )
+# display extra information about the bundling process
+coreconfigitem(
+    b'debug',
+    b'bundling-stats',
+    default=False,
+)
 coreconfigitem(
     b'defaults',
     b'.*',
--- a/mercurial/filelog.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/filelog.py	Tue Nov 15 16:25:23 2022 +0100
@@ -111,6 +111,7 @@
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         return self._revlog.emitrevisions(
             nodes,
@@ -119,6 +120,7 @@
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
             sidedata_helpers=sidedata_helpers,
+            debug_info=debug_info,
         )
 
     def addrevision(
--- a/mercurial/manifest.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/manifest.py	Tue Nov 15 16:25:23 2022 +0100
@@ -1836,6 +1836,7 @@
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         return self._revlog.emitrevisions(
             nodes,
@@ -1844,6 +1845,7 @@
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
             sidedata_helpers=sidedata_helpers,
+            debug_info=debug_info,
         )
 
     def addgroup(
--- a/mercurial/revlog.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/revlog.py	Tue Nov 15 16:25:23 2022 +0100
@@ -2889,6 +2889,7 @@
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
         sidedata_helpers=None,
+        debug_info=None,
     ):
         if nodesorder not in (b'nodes', b'storage', b'linear', None):
             raise error.ProgrammingError(
@@ -2918,6 +2919,7 @@
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
             sidedata_helpers=sidedata_helpers,
+            debug_info=debug_info,
         )
 
     DELTAREUSEALWAYS = b'always'
--- a/mercurial/utils/storageutil.py	Tue Nov 15 18:08:56 2022 +0100
+++ b/mercurial/utils/storageutil.py	Tue Nov 15 16:25:23 2022 +0100
@@ -305,6 +305,7 @@
     revisiondata=False,
     assumehaveparentrevisions=False,
     sidedata_helpers=None,
+    debug_info=None,
 ):
     """Generic implementation of ifiledata.emitrevisions().
 
@@ -370,6 +371,10 @@
     ``sidedata_helpers`` (optional)
         If not None, means that sidedata should be included.
         See `revlogutil.sidedata.get_sidedata_helpers`.
+
+    ``debug_info`
+        An optionnal dictionnary to gather information about the bundling
+        process (if present, see config: debug.bundling.stats.
     """
 
     fnode = store.node
@@ -395,23 +400,43 @@
         if rev == nullrev:
             continue
 
+        debug_delta_source = None
+        if debug_info is not None:
+            debug_info['revision-total'] += 1
+
         node = fnode(rev)
         p1rev, p2rev = store.parentrevs(rev)
 
+        if debug_info is not None:
+            if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
+                debug_info['merge-total'] += 1
+
         if deltaparentfn:
             deltaparentrev = deltaparentfn(rev)
+            if debug_info is not None:
+                if deltaparentrev == nullrev:
+                    debug_info['available-full'] += 1
+                else:
+                    debug_info['available-delta'] += 1
+
         else:
             deltaparentrev = nullrev
 
         # Forced delta against previous mode.
         if deltamode == repository.CG_DELTAMODE_PREV:
+            if debug_info is not None:
+                debug_delta_source = "prev"
             baserev = prevrev
 
         # We're instructed to send fulltext. Honor that.
         elif deltamode == repository.CG_DELTAMODE_FULL:
+            if debug_info is not None:
+                debug_delta_source = "full"
             baserev = nullrev
         # We're instructed to use p1. Honor that
         elif deltamode == repository.CG_DELTAMODE_P1:
+            if debug_info is not None:
+                debug_delta_source = "p1"
             baserev = p1rev
 
         # There is a delta in storage. We try to use that because it
@@ -421,20 +446,29 @@
             # Base revision was already emitted in this group. We can
             # always safely use the delta.
             if deltaparentrev in available:
+                if debug_info is not None:
+                    debug_delta_source = "storage"
                 baserev = deltaparentrev
 
             # Base revision is a parent that hasn't been emitted already.
             # Use it if we can assume the receiver has the parent revision.
             elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
+                if debug_info is not None:
+                    debug_delta_source = "storage"
                 baserev = deltaparentrev
-
             # No guarantee the receiver has the delta parent. Send delta
             # against last revision (if possible), which in the common case
             # should be similar enough to this revision that the delta is
             # reasonable.
             elif prevrev is not None:
+                if debug_info is not None:
+                    debug_info['denied-base-not-available'] += 1
+                    debug_delta_source = "prev"
                 baserev = prevrev
             else:
+                if debug_info is not None:
+                    debug_info['denied-base-not-available'] += 1
+                    debug_delta_source = "full"
                 baserev = nullrev
 
         # Storage has a fulltext revision.
@@ -442,13 +476,24 @@
         # Let's use the previous revision, which is as good a guess as any.
         # There is definitely room to improve this logic.
         elif prevrev is not None:
+            if debug_info is not None:
+                debug_delta_source = "prev"
             baserev = prevrev
         else:
+            if debug_info is not None:
+                debug_delta_source = "full"
             baserev = nullrev
 
         # But we can't actually use our chosen delta base for whatever
         # reason. Reset to fulltext.
-        if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
+        if (
+            baserev != nullrev
+            and candeltafn is not None
+            and not candeltafn(baserev, rev)
+        ):
+            if debug_info is not None:
+                debug_delta_source = "full"
+                debug_info['denied-delta-candeltafn'] += 1
             baserev = nullrev
 
         revision = None
@@ -460,6 +505,9 @@
                 try:
                     revision = store.rawdata(node)
                 except error.CensoredNodeError as e:
+                    if debug_info is not None:
+                        debug_delta_source = "full"
+                        debug_info['denied-delta-not-available'] += 1
                     revision = e.tombstone
 
                 if baserev != nullrev:
@@ -471,12 +519,46 @@
             elif (
                 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
             ):
+                if debug_info is not None:
+                    debug_info['computed-delta'] += 1  # close enough
+                    debug_info['delta-full'] += 1
                 revision = store.rawdata(node)
                 available.add(rev)
             else:
                 if revdifffn:
+                    if debug_info is not None:
+                        if debug_delta_source == "full":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-full'] += 1
+                        elif debug_delta_source == "prev":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-against-prev'] += 1
+                        elif debug_delta_source == "p1":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-against-p1'] += 1
+                        elif debug_delta_source == "storage":
+                            debug_info['reused-storage-delta'] += 1
+                        else:
+                            assert False, 'unreachable'
+
                     delta = revdifffn(baserev, rev)
                 else:
+                    if debug_info is not None:
+                        if debug_delta_source == "full":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-full'] += 1
+                        elif debug_delta_source == "prev":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-against-prev'] += 1
+                        elif debug_delta_source == "p1":
+                            debug_info['computed-delta'] += 1
+                            debug_info['delta-against-p1'] += 1
+                        elif debug_delta_source == "storage":
+                            # seem quite unlikelry to happens
+                            debug_info['computed-delta'] += 1
+                            debug_info['reused-storage-delta'] += 1
+                        else:
+                            assert False, 'unreachable'
                     delta = mdiff.textdiff(
                         store.rawdata(baserev), store.rawdata(rev)
                     )
--- a/tests/test-bundle.t	Tue Nov 15 18:08:56 2022 +0100
+++ b/tests/test-bundle.t	Tue Nov 15 16:25:23 2022 +0100
@@ -1039,6 +1039,24 @@
   $ hg bundle -a --config devel.bundle.delta=full ./full.hg
   3 changesets found
 
+
+Test the debug statistic when building a bundle
+-----------------------------------------------
+
+  $ hg bundle -a ./default.hg --config debug.bundling-stats=yes
+  3 changesets found
+  DEBUG-BUNDLING: revisions:                9
+  DEBUG-BUNDLING:   changelog:              3
+  DEBUG-BUNDLING:   manifest:               3
+  DEBUG-BUNDLING:   files:                  3 (for 3 revlogs)
+  DEBUG-BUNDLING: deltas:
+  DEBUG-BUNDLING:   from-storage:           2 (100% of available 2)
+  DEBUG-BUNDLING:   computed:               7
+  DEBUG-BUNDLING:     full:                 7 (100% of native 7)
+  DEBUG-BUNDLING:       changelog:          3 (100% of native 3)
+  DEBUG-BUNDLING:       manifests:          1 (100% of native 1)
+  DEBUG-BUNDLING:       files:              3 (100% of native 3)
+
 Test the debug output when applying delta
 -----------------------------------------