changeset 30461:932b18c95e11

commands: print chunk type in debugrevlog Each data entry ("chunk") in a revlog has a type based on the first byte of the data. This type indicates how to interpret the data. This seems like a useful thing to be able to query through a debug command. So let's add that to `hg debugrevlog`. This does make `hg debugrevlog` slightly slower, as it has to read more than just the index. However, even on the mozilla-unified manifest (which is ~200MB spread over ~350K revisions), this takes <400ms.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 17 Nov 2016 20:30:00 -0800
parents 94ca0e13d1fc
children 2e736f01a710
files mercurial/commands.py tests/test-debugcommands.t
diffstat 2 files changed, 40 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/commands.py	Thu Nov 17 20:17:51 2016 -0800
+++ b/mercurial/commands.py	Thu Nov 17 20:30:00 2016 -0800
@@ -15,6 +15,7 @@
 import re
 import shlex
 import socket
+import string
 import sys
 import tempfile
 import time
@@ -3194,6 +3195,8 @@
     datasize = [None, 0, 0]
     fullsize = [None, 0, 0]
     deltasize = [None, 0, 0]
+    chunktypecounts = {}
+    chunktypesizes = {}
 
     def addsize(size, l):
         if l[0] is None or size < l[0]:
@@ -3231,6 +3234,20 @@
             elif delta != nullrev:
                 numother += 1
 
+        # Obtain data on the raw chunks in the revlog.
+        chunk = r._chunkraw(rev, rev)[1]
+        if chunk:
+            chunktype = chunk[0]
+        else:
+            chunktype = 'empty'
+
+        if chunktype not in chunktypecounts:
+            chunktypecounts[chunktype] = 0
+            chunktypesizes[chunktype] = 0
+
+        chunktypecounts[chunktype] += 1
+        chunktypesizes[chunktype] += size
+
     # Adjust size min value for empty cases
     for size in (datasize, fullsize, deltasize):
         if size[0] is None:
@@ -3282,6 +3299,24 @@
     ui.write(('    full      : ') + fmt % pcfmt(fulltotal, totalsize))
     ui.write(('    deltas    : ') + fmt % pcfmt(deltatotal, totalsize))
 
+    def fmtchunktype(chunktype):
+        if chunktype == 'empty':
+            return '    %s     : ' % chunktype
+        elif chunktype in string.ascii_letters:
+            return '    0x%s (%s)  : ' % (hex(chunktype), chunktype)
+        else:
+            return '    0x%s      : ' % hex(chunktype)
+
+    ui.write('\n')
+    ui.write(('chunks        : ') + fmt2 % numrevs)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
+    ui.write(('chunks size   : ') + fmt2 % totalsize)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
+
     ui.write('\n')
     fmt = dfmtstr(max(avgchainlen, compratio))
     ui.write(('avg chain length  : ') + fmt % avgchainlen)
--- a/tests/test-debugcommands.t	Thu Nov 17 20:17:51 2016 -0800
+++ b/tests/test-debugcommands.t	Thu Nov 17 20:30:00 2016 -0800
@@ -22,6 +22,11 @@
       full      : 44 (100.00%)
       deltas    :  0 ( 0.00%)
   
+  chunks        :  1
+      0x75 (u)  :  1 (100.00%)
+  chunks size   : 44
+      0x75 (u)  : 44 (100.00%)
+  
   avg chain length  : 0
   max chain length  : 0
   compression ratio : 0