changeset 49819:b1e4c74beb6f

debug: add debug-revlog-stats command Display statistics about revlogs in the store. Useful to get an approximate size of a repository, etc. More statistics will be added in the future.
author Franck Bret <franck.bret@octobus.net>
date Mon, 19 Dec 2022 16:22:01 +0100
parents 3fd5824f1177
children 3eac92509484
files mercurial/debugcommands.py mercurial/revlogutils/debug.py tests/test-completion.t tests/test-debug-revlog-stats.t tests/test-help.t
diffstat 5 files changed, 166 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/debugcommands.py	Fri Dec 16 22:24:05 2022 -0500
+++ b/mercurial/debugcommands.py	Mon Dec 19 16:22:01 2022 +0100
@@ -3809,6 +3809,33 @@
 
 
 @command(
+    b'debug-revlog-stats',
+    [
+        (b'c', b'changelog', None, _(b'Display changelog statistics')),
+        (b'm', b'manifest', None, _(b'Display manifest statistics')),
+        (b'f', b'filelogs', None, _(b'Display filelogs statistics')),
+    ]
+    + cmdutil.formatteropts,
+)
+def debug_revlog_stats(ui, repo, **opts):
+    """display statistics about revlogs in the store"""
+    opts = pycompat.byteskwargs(opts)
+    changelog = opts[b"changelog"]
+    manifest = opts[b"manifest"]
+    filelogs = opts[b"filelogs"]
+
+    if changelog is None and manifest is None and filelogs is None:
+        changelog = True
+        manifest = True
+        filelogs = True
+
+    repo = repo.unfiltered()
+    fm = ui.formatter(b'debug-revlog-stats', opts)
+    revlog_debug.debug_revlog_stats(repo, fm, changelog, manifest, filelogs)
+    fm.end()
+
+
+@command(
     b'debugsuccessorssets',
     [(b'', b'closest', False, _(b'return closest successors sets only'))],
     _(b'[REV]'),
--- a/mercurial/revlogutils/debug.py	Fri Dec 16 22:24:05 2022 -0500
+++ b/mercurial/revlogutils/debug.py	Mon Dec 19 16:22:01 2022 +0100
@@ -661,3 +661,61 @@
 
     fh = revlog._datafp()
     deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
+
+
+def _get_revlogs(repo, changelog: bool, manifest: bool, filelogs: bool):
+    """yield revlogs from this repository"""
+    if changelog:
+        yield repo.changelog
+
+    if manifest:
+        # XXX: Handle tree manifest
+        root_mf = repo.manifestlog.getstorage(b'')
+        assert not root_mf._treeondisk
+        yield root_mf._revlog
+
+    if filelogs:
+        files = set()
+        for rev in repo:
+            ctx = repo[rev]
+            files |= set(ctx.files())
+
+        for f in sorted(files):
+            yield repo.file(f)._revlog
+
+
+def debug_revlog_stats(
+    repo, fm, changelog: bool, manifest: bool, filelogs: bool
+):
+    """Format revlog statistics for debugging purposes
+
+    fm: the output formatter.
+    """
+    fm.plain(b'rev-count   data-size inl type      target \n')
+
+    for rlog in _get_revlogs(repo, changelog, manifest, filelogs):
+        fm.startitem()
+        nb_rev = len(rlog)
+        inline = rlog._inline
+        data_size = rlog._get_data_offset(nb_rev - 1)
+
+        target = rlog.target
+        revlog_type = b'unknown'
+        revlog_target = b''
+        if target[0] == constants.KIND_CHANGELOG:
+            revlog_type = b'changelog'
+        elif target[0] == constants.KIND_MANIFESTLOG:
+            revlog_type = b'manifest'
+            revlog_target = target[1]
+        elif target[0] == constants.KIND_FILELOG:
+            revlog_type = b'file'
+            revlog_target = target[1]
+
+        fm.write(b'revlog.rev-count', b'%9d', nb_rev)
+        fm.write(b'revlog.data-size', b'%12d', data_size)
+
+        fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
+        fm.write(b'revlog.type', b' %-9s', revlog_type)
+        fm.write(b'revlog.target', b' %s', revlog_target)
+
+        fm.plain(b'\n')
--- a/tests/test-completion.t	Fri Dec 16 22:24:05 2022 -0500
+++ b/tests/test-completion.t	Mon Dec 19 16:22:01 2022 +0100
@@ -77,6 +77,7 @@
   debug-delta-find
   debug-repair-issue6528
   debug-revlog-index
+  debug-revlog-stats
   debugancestor
   debugantivirusrunning
   debugapplystreamclonebundle
@@ -271,6 +272,7 @@
   debug-delta-find: changelog, manifest, dir, template, source
   debug-repair-issue6528: to-report, from-report, paranoid, dry-run
   debug-revlog-index: changelog, manifest, dir, template
+  debug-revlog-stats: changelog, manifest, filelogs, template
   debugancestor: 
   debugantivirusrunning: 
   debugapplystreamclonebundle: 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-debug-revlog-stats.t	Mon Dec 19 16:22:01 2022 +0100
@@ -0,0 +1,77 @@
+Force revlog max inline value to be smaller than default
+
+  $ mkdir $TESTTMP/ext
+  $ cat << EOF > $TESTTMP/ext/small_inline.py
+  > from mercurial import revlog
+  > revlog._maxinline = 8
+  > EOF
+
+  $ cat << EOF >> $HGRCPATH
+  > [extensions]
+  > small_inline=$TESTTMP/ext/small_inline.py
+  > EOF
+
+  $ hg init repo
+  $ cd repo
+
+Try on an empty repository
+
+  $ hg debug-revlog-stats
+  rev-count   data-size inl type      target 
+          0           0 yes changelog 
+          0           0 yes manifest  
+
+  $ mkdir folder
+  $ touch a b folder/c folder/d
+  $ hg commit -Aqm 0
+  $ echo "text" > a
+  $ hg rm b
+  $ echo "longer string" > folder/d
+  $ hg commit -Aqm 1
+
+Differences in data size observed with pure is due to different compression
+algorithms
+
+  $ hg debug-revlog-stats
+  rev-count   data-size inl type      target 
+          2         138 no  changelog  (no-pure !)
+          2         137 no  changelog  (pure !)
+          2         177 no  manifest   (no-pure !)
+          2         168 no  manifest   (pure !)
+          2           6 yes file      a
+          1           0 yes file      b
+          1           0 yes file      folder/c
+          2          15 no  file      folder/d
+
+Test 'changelog' command argument
+
+  $ hg debug-revlog-stats -c
+  rev-count   data-size inl type      target 
+          2         138 no  changelog  (no-pure !)
+          2         137 no  changelog  (pure !)
+
+Test 'manifest' command argument
+
+  $ hg debug-revlog-stats -m
+  rev-count   data-size inl type      target 
+          2         177 no  manifest   (no-pure !)
+          2         168 no  manifest   (pure !)
+
+Test 'file' command argument
+
+  $ hg debug-revlog-stats -f
+  rev-count   data-size inl type      target 
+          2           6 yes file      a
+          1           0 yes file      b
+          1           0 yes file      folder/c
+          2          15 no  file      folder/d
+
+Test multiple command arguments
+
+  $ hg debug-revlog-stats -cm
+  rev-count   data-size inl type      target 
+          2         138 no  changelog  (no-pure !)
+          2         137 no  changelog  (pure !)
+          2         177 no  manifest   (no-pure !)
+          2         168 no  manifest   (pure !)
+
--- a/tests/test-help.t	Fri Dec 16 22:24:05 2022 -0500
+++ b/tests/test-help.t	Mon Dec 19 16:22:01 2022 +0100
@@ -985,6 +985,8 @@
                  details.
    debug-revlog-index
                  dump index data for a revlog
+   debug-revlog-stats
+                 display statistics about revlogs in the store
    debugancestor
                  find the ancestor revision of two revisions in a given index
    debugantivirusrunning