debug-revlog: move the code in revlogutils module
We have a module dedicated to debug code, let us use it.
--- a/mercurial/debugcommands.py Mon Nov 07 14:13:59 2022 -0500
+++ b/mercurial/debugcommands.py Mon Nov 07 14:24:52 2022 -0500
@@ -21,7 +21,6 @@
import socket
import ssl
import stat
-import string
import subprocess
import sys
import time
@@ -3241,303 +3240,9 @@
if opts.get(b"dump"):
revlog_debug.dump(ui, r)
- return 0
-
- format = r._format_version
- v = r._format_flags
- flags = []
- gdelta = False
- if v & revlog.FLAG_INLINE_DATA:
- flags.append(b'inline')
- if v & revlog.FLAG_GENERALDELTA:
- gdelta = True
- flags.append(b'generaldelta')
- if not flags:
- flags = [b'(none)']
-
- ### tracks merge vs single parent
- nummerges = 0
-
- ### tracks ways the "delta" are build
- # nodelta
- numempty = 0
- numemptytext = 0
- numemptydelta = 0
- # full file content
- numfull = 0
- # intermediate snapshot against a prior snapshot
- numsemi = 0
- # snapshot count per depth
- numsnapdepth = collections.defaultdict(lambda: 0)
- # delta against previous revision
- numprev = 0
- # delta against first or second parent (not prev)
- nump1 = 0
- nump2 = 0
- # delta against neither prev nor parents
- numother = 0
- # delta against prev that are also first or second parent
- # (details of `numprev`)
- nump1prev = 0
- nump2prev = 0
-
- # data about delta chain of each revs
- chainlengths = []
- chainbases = []
- chainspans = []
-
- # data about each revision
- datasize = [None, 0, 0]
- fullsize = [None, 0, 0]
- semisize = [None, 0, 0]
- # snapshot count per depth
- snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
- deltasize = [None, 0, 0]
- chunktypecounts = {}
- chunktypesizes = {}
-
- def addsize(size, l):
- if l[0] is None or size < l[0]:
- l[0] = size
- if size > l[1]:
- l[1] = size
- l[2] += size
-
- numrevs = len(r)
- for rev in range(numrevs):
- p1, p2 = r.parentrevs(rev)
- delta = r.deltaparent(rev)
- if format > 0:
- addsize(r.rawsize(rev), datasize)
- if p2 != nullrev:
- nummerges += 1
- size = r.length(rev)
- if delta == nullrev:
- chainlengths.append(0)
- chainbases.append(r.start(rev))
- chainspans.append(size)
- if size == 0:
- numempty += 1
- numemptytext += 1
- else:
- numfull += 1
- numsnapdepth[0] += 1
- addsize(size, fullsize)
- addsize(size, snapsizedepth[0])
- else:
- chainlengths.append(chainlengths[delta] + 1)
- baseaddr = chainbases[delta]
- revaddr = r.start(rev)
- chainbases.append(baseaddr)
- chainspans.append((revaddr - baseaddr) + size)
- if size == 0:
- numempty += 1
- numemptydelta += 1
- elif r.issnapshot(rev):
- addsize(size, semisize)
- numsemi += 1
- depth = r.snapshotdepth(rev)
- numsnapdepth[depth] += 1
- addsize(size, snapsizedepth[depth])
- else:
- addsize(size, deltasize)
- if delta == rev - 1:
- numprev += 1
- if delta == p1:
- nump1prev += 1
- elif delta == p2:
- nump2prev += 1
- elif delta == p1:
- nump1 += 1
- elif delta == p2:
- nump2 += 1
- elif delta != nullrev:
- numother += 1
-
- # Obtain data on the raw chunks in the revlog.
- if util.safehasattr(r, b'_getsegmentforrevs'):
- segment = r._getsegmentforrevs(rev, rev)[1]
- else:
- segment = r._revlog._getsegmentforrevs(rev, rev)[1]
- if segment:
- chunktype = bytes(segment[0:1])
- else:
- chunktype = b'empty'
-
- if chunktype not in chunktypecounts:
- chunktypecounts[chunktype] = 0
- chunktypesizes[chunktype] = 0
-
- chunktypecounts[chunktype] += 1
- chunktypesizes[chunktype] += size
-
- # Adjust size min value for empty cases
- for size in (datasize, fullsize, semisize, deltasize):
- if size[0] is None:
- size[0] = 0
-
- numdeltas = numrevs - numfull - numempty - numsemi
- numoprev = numprev - nump1prev - nump2prev
- totalrawsize = datasize[2]
- datasize[2] /= numrevs
- fulltotal = fullsize[2]
- if numfull == 0:
- fullsize[2] = 0
else:
- fullsize[2] /= numfull
- semitotal = semisize[2]
- snaptotal = {}
- if numsemi > 0:
- semisize[2] /= numsemi
- for depth in snapsizedepth:
- snaptotal[depth] = snapsizedepth[depth][2]
- snapsizedepth[depth][2] /= numsnapdepth[depth]
-
- deltatotal = deltasize[2]
- if numdeltas > 0:
- deltasize[2] /= numdeltas
- totalsize = fulltotal + semitotal + deltatotal
- avgchainlen = sum(chainlengths) / numrevs
- maxchainlen = max(chainlengths)
- maxchainspan = max(chainspans)
- compratio = 1
- if totalsize:
- compratio = totalrawsize / totalsize
-
- basedfmtstr = b'%%%dd\n'
- basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
-
- def dfmtstr(max):
- return basedfmtstr % len(str(max))
-
- def pcfmtstr(max, padding=0):
- return basepcfmtstr % (len(str(max)), b' ' * padding)
-
- def pcfmt(value, total):
- if total:
- return (value, 100 * float(value) / total)
- else:
- return value, 100.0
-
- ui.writenoi18n(b'format : %d\n' % format)
- ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
-
- ui.write(b'\n')
- fmt = pcfmtstr(totalsize)
- fmt2 = dfmtstr(totalsize)
- ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
- ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
- ui.writenoi18n(
- b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
- )
- ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
- ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
- ui.writenoi18n(
- b' text : '
- + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
- )
- ui.writenoi18n(
- b' delta : '
- + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
- )
- ui.writenoi18n(
- b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
- )
- for depth in sorted(numsnapdepth):
- ui.write(
- (b' lvl-%-3d : ' % depth)
- + fmt % pcfmt(numsnapdepth[depth], numrevs)
- )
- ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
- ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
- ui.writenoi18n(
- b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
- )
- for depth in sorted(numsnapdepth):
- ui.write(
- (b' lvl-%-3d : ' % depth)
- + fmt % pcfmt(snaptotal[depth], totalsize)
- )
- ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
-
- def fmtchunktype(chunktype):
- if chunktype == b'empty':
- return b' %s : ' % chunktype
- elif chunktype in pycompat.bytestr(string.ascii_letters):
- return b' 0x%s (%s) : ' % (hex(chunktype), chunktype)
- else:
- return b' 0x%s : ' % hex(chunktype)
-
- ui.write(b'\n')
- ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
- for chunktype in sorted(chunktypecounts):
- ui.write(fmtchunktype(chunktype))
- ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
- ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
- for chunktype in sorted(chunktypecounts):
- ui.write(fmtchunktype(chunktype))
- ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
-
- ui.write(b'\n')
- fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
- ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
- ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
- ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
- ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
-
- if format > 0:
- ui.write(b'\n')
- ui.writenoi18n(
- b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
- % tuple(datasize)
- )
- ui.writenoi18n(
- b'full revision size (min/max/avg) : %d / %d / %d\n'
- % tuple(fullsize)
- )
- ui.writenoi18n(
- b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
- % tuple(semisize)
- )
- for depth in sorted(snapsizedepth):
- if depth == 0:
- continue
- ui.writenoi18n(
- b' level-%-3d (min/max/avg) : %d / %d / %d\n'
- % ((depth,) + tuple(snapsizedepth[depth]))
- )
- ui.writenoi18n(
- b'delta size (min/max/avg) : %d / %d / %d\n'
- % tuple(deltasize)
- )
-
- if numdeltas > 0:
- ui.write(b'\n')
- fmt = pcfmtstr(numdeltas)
- fmt2 = pcfmtstr(numdeltas, 4)
- ui.writenoi18n(
- b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
- )
- if numprev > 0:
- ui.writenoi18n(
- b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
- )
- ui.writenoi18n(
- b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
- )
- ui.writenoi18n(
- b' other : ' + fmt2 % pcfmt(numoprev, numprev)
- )
- if gdelta:
- ui.writenoi18n(
- b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
- )
- ui.writenoi18n(
- b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
- )
- ui.writenoi18n(
- b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
- )
+ revlog_debug.debug_revlog(ui, r)
+ return 0
@command(
--- a/mercurial/revlogutils/debug.py Mon Nov 07 14:13:59 2022 -0500
+++ b/mercurial/revlogutils/debug.py Mon Nov 07 14:24:52 2022 -0500
@@ -6,8 +6,12 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
+import collections
+import string
+
from .. import (
node as nodemod,
+ util,
)
from . import (
@@ -267,3 +271,305 @@
clen,
)
)
+
+
+def debug_revlog(ui, revlog):
+ """code for `hg debugrevlog`"""
+ r = revlog
+ format = r._format_version
+ v = r._format_flags
+ flags = []
+ gdelta = False
+ if v & constants.FLAG_INLINE_DATA:
+ flags.append(b'inline')
+ if v & constants.FLAG_GENERALDELTA:
+ gdelta = True
+ flags.append(b'generaldelta')
+ if not flags:
+ flags = [b'(none)']
+
+ ### tracks merge vs single parent
+ nummerges = 0
+
+ ### tracks ways the "delta" are build
+ # nodelta
+ numempty = 0
+ numemptytext = 0
+ numemptydelta = 0
+ # full file content
+ numfull = 0
+ # intermediate snapshot against a prior snapshot
+ numsemi = 0
+ # snapshot count per depth
+ numsnapdepth = collections.defaultdict(lambda: 0)
+ # delta against previous revision
+ numprev = 0
+ # delta against first or second parent (not prev)
+ nump1 = 0
+ nump2 = 0
+ # delta against neither prev nor parents
+ numother = 0
+ # delta against prev that are also first or second parent
+ # (details of `numprev`)
+ nump1prev = 0
+ nump2prev = 0
+
+ # data about delta chain of each revs
+ chainlengths = []
+ chainbases = []
+ chainspans = []
+
+ # data about each revision
+ datasize = [None, 0, 0]
+ fullsize = [None, 0, 0]
+ semisize = [None, 0, 0]
+ # snapshot count per depth
+ snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
+ deltasize = [None, 0, 0]
+ chunktypecounts = {}
+ chunktypesizes = {}
+
+ def addsize(size, l):
+ if l[0] is None or size < l[0]:
+ l[0] = size
+ if size > l[1]:
+ l[1] = size
+ l[2] += size
+
+ numrevs = len(r)
+ for rev in range(numrevs):
+ p1, p2 = r.parentrevs(rev)
+ delta = r.deltaparent(rev)
+ if format > 0:
+ addsize(r.rawsize(rev), datasize)
+ if p2 != nodemod.nullrev:
+ nummerges += 1
+ size = r.length(rev)
+ if delta == nodemod.nullrev:
+ chainlengths.append(0)
+ chainbases.append(r.start(rev))
+ chainspans.append(size)
+ if size == 0:
+ numempty += 1
+ numemptytext += 1
+ else:
+ numfull += 1
+ numsnapdepth[0] += 1
+ addsize(size, fullsize)
+ addsize(size, snapsizedepth[0])
+ else:
+ chainlengths.append(chainlengths[delta] + 1)
+ baseaddr = chainbases[delta]
+ revaddr = r.start(rev)
+ chainbases.append(baseaddr)
+ chainspans.append((revaddr - baseaddr) + size)
+ if size == 0:
+ numempty += 1
+ numemptydelta += 1
+ elif r.issnapshot(rev):
+ addsize(size, semisize)
+ numsemi += 1
+ depth = r.snapshotdepth(rev)
+ numsnapdepth[depth] += 1
+ addsize(size, snapsizedepth[depth])
+ else:
+ addsize(size, deltasize)
+ if delta == rev - 1:
+ numprev += 1
+ if delta == p1:
+ nump1prev += 1
+ elif delta == p2:
+ nump2prev += 1
+ elif delta == p1:
+ nump1 += 1
+ elif delta == p2:
+ nump2 += 1
+ elif delta != nodemod.nullrev:
+ numother += 1
+
+ # Obtain data on the raw chunks in the revlog.
+ if util.safehasattr(r, '_getsegmentforrevs'):
+ segment = r._getsegmentforrevs(rev, rev)[1]
+ else:
+ segment = r._revlog._getsegmentforrevs(rev, rev)[1]
+ if segment:
+ chunktype = bytes(segment[0:1])
+ else:
+ chunktype = b'empty'
+
+ if chunktype not in chunktypecounts:
+ chunktypecounts[chunktype] = 0
+ chunktypesizes[chunktype] = 0
+
+ chunktypecounts[chunktype] += 1
+ chunktypesizes[chunktype] += size
+
+ # Adjust size min value for empty cases
+ for size in (datasize, fullsize, semisize, deltasize):
+ if size[0] is None:
+ size[0] = 0
+
+ numdeltas = numrevs - numfull - numempty - numsemi
+ numoprev = numprev - nump1prev - nump2prev
+ totalrawsize = datasize[2]
+ datasize[2] /= numrevs
+ fulltotal = fullsize[2]
+ if numfull == 0:
+ fullsize[2] = 0
+ else:
+ fullsize[2] /= numfull
+ semitotal = semisize[2]
+ snaptotal = {}
+ if numsemi > 0:
+ semisize[2] /= numsemi
+ for depth in snapsizedepth:
+ snaptotal[depth] = snapsizedepth[depth][2]
+ snapsizedepth[depth][2] /= numsnapdepth[depth]
+
+ deltatotal = deltasize[2]
+ if numdeltas > 0:
+ deltasize[2] /= numdeltas
+ totalsize = fulltotal + semitotal + deltatotal
+ avgchainlen = sum(chainlengths) / numrevs
+ maxchainlen = max(chainlengths)
+ maxchainspan = max(chainspans)
+ compratio = 1
+ if totalsize:
+ compratio = totalrawsize / totalsize
+
+ basedfmtstr = b'%%%dd\n'
+ basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
+
+ def dfmtstr(max):
+ return basedfmtstr % len(str(max))
+
+ def pcfmtstr(max, padding=0):
+ return basepcfmtstr % (len(str(max)), b' ' * padding)
+
+ def pcfmt(value, total):
+ if total:
+ return (value, 100 * float(value) / total)
+ else:
+ return value, 100.0
+
+ ui.writenoi18n(b'format : %d\n' % format)
+ ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
+
+ ui.write(b'\n')
+ fmt = pcfmtstr(totalsize)
+ fmt2 = dfmtstr(totalsize)
+ ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
+ ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
+ ui.writenoi18n(
+ b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
+ )
+ ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
+ ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
+ ui.writenoi18n(
+ b' text : '
+ + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
+ )
+ ui.writenoi18n(
+ b' delta : '
+ + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
+ )
+ ui.writenoi18n(
+ b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
+ )
+ for depth in sorted(numsnapdepth):
+ ui.write(
+ (b' lvl-%-3d : ' % depth)
+ + fmt % pcfmt(numsnapdepth[depth], numrevs)
+ )
+ ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
+ ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
+ ui.writenoi18n(
+ b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
+ )
+ for depth in sorted(numsnapdepth):
+ ui.write(
+ (b' lvl-%-3d : ' % depth)
+ + fmt % pcfmt(snaptotal[depth], totalsize)
+ )
+ ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
+
+ letters = string.ascii_letters.encode('ascii')
+
+ def fmtchunktype(chunktype):
+ if chunktype == b'empty':
+ return b' %s : ' % chunktype
+ elif chunktype in letters:
+ return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
+ else:
+ return b' 0x%s : ' % nodemod.hex(chunktype)
+
+ ui.write(b'\n')
+ ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
+ for chunktype in sorted(chunktypecounts):
+ ui.write(fmtchunktype(chunktype))
+ ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
+ ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
+ for chunktype in sorted(chunktypecounts):
+ ui.write(fmtchunktype(chunktype))
+ ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
+
+ ui.write(b'\n')
+ fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
+ ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
+ ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
+ ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
+ ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
+
+ if format > 0:
+ ui.write(b'\n')
+ ui.writenoi18n(
+ b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
+ % tuple(datasize)
+ )
+ ui.writenoi18n(
+ b'full revision size (min/max/avg) : %d / %d / %d\n'
+ % tuple(fullsize)
+ )
+ ui.writenoi18n(
+ b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
+ % tuple(semisize)
+ )
+ for depth in sorted(snapsizedepth):
+ if depth == 0:
+ continue
+ ui.writenoi18n(
+ b' level-%-3d (min/max/avg) : %d / %d / %d\n'
+ % ((depth,) + tuple(snapsizedepth[depth]))
+ )
+ ui.writenoi18n(
+ b'delta size (min/max/avg) : %d / %d / %d\n'
+ % tuple(deltasize)
+ )
+
+ if numdeltas > 0:
+ ui.write(b'\n')
+ fmt = pcfmtstr(numdeltas)
+ fmt2 = pcfmtstr(numdeltas, 4)
+ ui.writenoi18n(
+ b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
+ )
+ if numprev > 0:
+ ui.writenoi18n(
+ b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
+ )
+ ui.writenoi18n(
+ b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
+ )
+ ui.writenoi18n(
+ b' other : ' + fmt2 % pcfmt(numoprev, numprev)
+ )
+ if gdelta:
+ ui.writenoi18n(
+ b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
+ )
+ ui.writenoi18n(
+ b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
+ )
+ ui.writenoi18n(
+ b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
+ )