# HG changeset patch # User Pierre-Yves David # Date 1667849092 18000 # Node ID bd3b6f363fb9daef356e384aec7efada40e6b47b # Parent 7c0a383849a8a3d3343692f892197aad6179c7bb debug-revlog: move the code in revlogutils module We have a module dedicated to debug code, let us use it. diff -r 7c0a383849a8 -r bd3b6f363fb9 mercurial/debugcommands.py --- a/mercurial/debugcommands.py Mon Nov 07 14:13:59 2022 -0500 +++ b/mercurial/debugcommands.py Mon Nov 07 14:24:52 2022 -0500 @@ -21,7 +21,6 @@ import socket import ssl import stat -import string import subprocess import sys import time @@ -3241,303 +3240,9 @@ if opts.get(b"dump"): revlog_debug.dump(ui, r) - return 0 - - format = r._format_version - v = r._format_flags - flags = [] - gdelta = False - if v & revlog.FLAG_INLINE_DATA: - flags.append(b'inline') - if v & revlog.FLAG_GENERALDELTA: - gdelta = True - flags.append(b'generaldelta') - if not flags: - flags = [b'(none)'] - - ### tracks merge vs single parent - nummerges = 0 - - ### tracks ways the "delta" are build - # nodelta - numempty = 0 - numemptytext = 0 - numemptydelta = 0 - # full file content - numfull = 0 - # intermediate snapshot against a prior snapshot - numsemi = 0 - # snapshot count per depth - numsnapdepth = collections.defaultdict(lambda: 0) - # delta against previous revision - numprev = 0 - # delta against first or second parent (not prev) - nump1 = 0 - nump2 = 0 - # delta against neither prev nor parents - numother = 0 - # delta against prev that are also first or second parent - # (details of `numprev`) - nump1prev = 0 - nump2prev = 0 - - # data about delta chain of each revs - chainlengths = [] - chainbases = [] - chainspans = [] - - # data about each revision - datasize = [None, 0, 0] - fullsize = [None, 0, 0] - semisize = [None, 0, 0] - # snapshot count per depth - snapsizedepth = collections.defaultdict(lambda: [None, 0, 0]) - deltasize = [None, 0, 0] - chunktypecounts = {} - chunktypesizes = {} - - def addsize(size, l): - if l[0] is None or size < l[0]: - l[0] = size - if size > l[1]: - l[1] = size - l[2] += size - - numrevs = len(r) - for rev in range(numrevs): - p1, p2 = r.parentrevs(rev) - delta = r.deltaparent(rev) - if format > 0: - addsize(r.rawsize(rev), datasize) - if p2 != nullrev: - nummerges += 1 - size = r.length(rev) - if delta == nullrev: - chainlengths.append(0) - chainbases.append(r.start(rev)) - chainspans.append(size) - if size == 0: - numempty += 1 - numemptytext += 1 - else: - numfull += 1 - numsnapdepth[0] += 1 - addsize(size, fullsize) - addsize(size, snapsizedepth[0]) - else: - chainlengths.append(chainlengths[delta] + 1) - baseaddr = chainbases[delta] - revaddr = r.start(rev) - chainbases.append(baseaddr) - chainspans.append((revaddr - baseaddr) + size) - if size == 0: - numempty += 1 - numemptydelta += 1 - elif r.issnapshot(rev): - addsize(size, semisize) - numsemi += 1 - depth = r.snapshotdepth(rev) - numsnapdepth[depth] += 1 - addsize(size, snapsizedepth[depth]) - else: - addsize(size, deltasize) - if delta == rev - 1: - numprev += 1 - if delta == p1: - nump1prev += 1 - elif delta == p2: - nump2prev += 1 - elif delta == p1: - nump1 += 1 - elif delta == p2: - nump2 += 1 - elif delta != nullrev: - numother += 1 - - # Obtain data on the raw chunks in the revlog. - if util.safehasattr(r, b'_getsegmentforrevs'): - segment = r._getsegmentforrevs(rev, rev)[1] - else: - segment = r._revlog._getsegmentforrevs(rev, rev)[1] - if segment: - chunktype = bytes(segment[0:1]) - else: - chunktype = b'empty' - - if chunktype not in chunktypecounts: - chunktypecounts[chunktype] = 0 - chunktypesizes[chunktype] = 0 - - chunktypecounts[chunktype] += 1 - chunktypesizes[chunktype] += size - - # Adjust size min value for empty cases - for size in (datasize, fullsize, semisize, deltasize): - if size[0] is None: - size[0] = 0 - - numdeltas = numrevs - numfull - numempty - numsemi - numoprev = numprev - nump1prev - nump2prev - totalrawsize = datasize[2] - datasize[2] /= numrevs - fulltotal = fullsize[2] - if numfull == 0: - fullsize[2] = 0 else: - fullsize[2] /= numfull - semitotal = semisize[2] - snaptotal = {} - if numsemi > 0: - semisize[2] /= numsemi - for depth in snapsizedepth: - snaptotal[depth] = snapsizedepth[depth][2] - snapsizedepth[depth][2] /= numsnapdepth[depth] - - deltatotal = deltasize[2] - if numdeltas > 0: - deltasize[2] /= numdeltas - totalsize = fulltotal + semitotal + deltatotal - avgchainlen = sum(chainlengths) / numrevs - maxchainlen = max(chainlengths) - maxchainspan = max(chainspans) - compratio = 1 - if totalsize: - compratio = totalrawsize / totalsize - - basedfmtstr = b'%%%dd\n' - basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n' - - def dfmtstr(max): - return basedfmtstr % len(str(max)) - - def pcfmtstr(max, padding=0): - return basepcfmtstr % (len(str(max)), b' ' * padding) - - def pcfmt(value, total): - if total: - return (value, 100 * float(value) / total) - else: - return value, 100.0 - - ui.writenoi18n(b'format : %d\n' % format) - ui.writenoi18n(b'flags : %s\n' % b', '.join(flags)) - - ui.write(b'\n') - fmt = pcfmtstr(totalsize) - fmt2 = dfmtstr(totalsize) - ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) - ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs)) - ui.writenoi18n( - b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs) - ) - ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) - ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs)) - ui.writenoi18n( - b' text : ' - + fmt % pcfmt(numemptytext, numemptytext + numemptydelta) - ) - ui.writenoi18n( - b' delta : ' - + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta) - ) - ui.writenoi18n( - b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs) - ) - for depth in sorted(numsnapdepth): - ui.write( - (b' lvl-%-3d : ' % depth) - + fmt % pcfmt(numsnapdepth[depth], numrevs) - ) - ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs)) - ui.writenoi18n(b'revision size : ' + fmt2 % totalsize) - ui.writenoi18n( - b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize) - ) - for depth in sorted(numsnapdepth): - ui.write( - (b' lvl-%-3d : ' % depth) - + fmt % pcfmt(snaptotal[depth], totalsize) - ) - ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize)) - - def fmtchunktype(chunktype): - if chunktype == b'empty': - return b' %s : ' % chunktype - elif chunktype in pycompat.bytestr(string.ascii_letters): - return b' 0x%s (%s) : ' % (hex(chunktype), chunktype) - else: - return b' 0x%s : ' % hex(chunktype) - - ui.write(b'\n') - ui.writenoi18n(b'chunks : ' + fmt2 % numrevs) - for chunktype in sorted(chunktypecounts): - ui.write(fmtchunktype(chunktype)) - ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) - ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize) - for chunktype in sorted(chunktypecounts): - ui.write(fmtchunktype(chunktype)) - ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) - - ui.write(b'\n') - fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio)) - ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen) - ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen) - ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan) - ui.writenoi18n(b'compression ratio : ' + fmt % compratio) - - if format > 0: - ui.write(b'\n') - ui.writenoi18n( - b'uncompressed data size (min/max/avg) : %d / %d / %d\n' - % tuple(datasize) - ) - ui.writenoi18n( - b'full revision size (min/max/avg) : %d / %d / %d\n' - % tuple(fullsize) - ) - ui.writenoi18n( - b'inter-snapshot size (min/max/avg) : %d / %d / %d\n' - % tuple(semisize) - ) - for depth in sorted(snapsizedepth): - if depth == 0: - continue - ui.writenoi18n( - b' level-%-3d (min/max/avg) : %d / %d / %d\n' - % ((depth,) + tuple(snapsizedepth[depth])) - ) - ui.writenoi18n( - b'delta size (min/max/avg) : %d / %d / %d\n' - % tuple(deltasize) - ) - - if numdeltas > 0: - ui.write(b'\n') - fmt = pcfmtstr(numdeltas) - fmt2 = pcfmtstr(numdeltas, 4) - ui.writenoi18n( - b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas) - ) - if numprev > 0: - ui.writenoi18n( - b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev) - ) - ui.writenoi18n( - b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev) - ) - ui.writenoi18n( - b' other : ' + fmt2 % pcfmt(numoprev, numprev) - ) - if gdelta: - ui.writenoi18n( - b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas) - ) - ui.writenoi18n( - b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas) - ) - ui.writenoi18n( - b'deltas against other : ' + fmt % pcfmt(numother, numdeltas) - ) + revlog_debug.debug_revlog(ui, r) + return 0 @command( diff -r 7c0a383849a8 -r bd3b6f363fb9 mercurial/revlogutils/debug.py --- a/mercurial/revlogutils/debug.py Mon Nov 07 14:13:59 2022 -0500 +++ b/mercurial/revlogutils/debug.py Mon Nov 07 14:24:52 2022 -0500 @@ -6,8 +6,12 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import collections +import string + from .. import ( node as nodemod, + util, ) from . import ( @@ -267,3 +271,305 @@ clen, ) ) + + +def debug_revlog(ui, revlog): + """code for `hg debugrevlog`""" + r = revlog + format = r._format_version + v = r._format_flags + flags = [] + gdelta = False + if v & constants.FLAG_INLINE_DATA: + flags.append(b'inline') + if v & constants.FLAG_GENERALDELTA: + gdelta = True + flags.append(b'generaldelta') + if not flags: + flags = [b'(none)'] + + ### tracks merge vs single parent + nummerges = 0 + + ### tracks ways the "delta" are build + # nodelta + numempty = 0 + numemptytext = 0 + numemptydelta = 0 + # full file content + numfull = 0 + # intermediate snapshot against a prior snapshot + numsemi = 0 + # snapshot count per depth + numsnapdepth = collections.defaultdict(lambda: 0) + # delta against previous revision + numprev = 0 + # delta against first or second parent (not prev) + nump1 = 0 + nump2 = 0 + # delta against neither prev nor parents + numother = 0 + # delta against prev that are also first or second parent + # (details of `numprev`) + nump1prev = 0 + nump2prev = 0 + + # data about delta chain of each revs + chainlengths = [] + chainbases = [] + chainspans = [] + + # data about each revision + datasize = [None, 0, 0] + fullsize = [None, 0, 0] + semisize = [None, 0, 0] + # snapshot count per depth + snapsizedepth = collections.defaultdict(lambda: [None, 0, 0]) + deltasize = [None, 0, 0] + chunktypecounts = {} + chunktypesizes = {} + + def addsize(size, l): + if l[0] is None or size < l[0]: + l[0] = size + if size > l[1]: + l[1] = size + l[2] += size + + numrevs = len(r) + for rev in range(numrevs): + p1, p2 = r.parentrevs(rev) + delta = r.deltaparent(rev) + if format > 0: + addsize(r.rawsize(rev), datasize) + if p2 != nodemod.nullrev: + nummerges += 1 + size = r.length(rev) + if delta == nodemod.nullrev: + chainlengths.append(0) + chainbases.append(r.start(rev)) + chainspans.append(size) + if size == 0: + numempty += 1 + numemptytext += 1 + else: + numfull += 1 + numsnapdepth[0] += 1 + addsize(size, fullsize) + addsize(size, snapsizedepth[0]) + else: + chainlengths.append(chainlengths[delta] + 1) + baseaddr = chainbases[delta] + revaddr = r.start(rev) + chainbases.append(baseaddr) + chainspans.append((revaddr - baseaddr) + size) + if size == 0: + numempty += 1 + numemptydelta += 1 + elif r.issnapshot(rev): + addsize(size, semisize) + numsemi += 1 + depth = r.snapshotdepth(rev) + numsnapdepth[depth] += 1 + addsize(size, snapsizedepth[depth]) + else: + addsize(size, deltasize) + if delta == rev - 1: + numprev += 1 + if delta == p1: + nump1prev += 1 + elif delta == p2: + nump2prev += 1 + elif delta == p1: + nump1 += 1 + elif delta == p2: + nump2 += 1 + elif delta != nodemod.nullrev: + numother += 1 + + # Obtain data on the raw chunks in the revlog. + if util.safehasattr(r, '_getsegmentforrevs'): + segment = r._getsegmentforrevs(rev, rev)[1] + else: + segment = r._revlog._getsegmentforrevs(rev, rev)[1] + if segment: + chunktype = bytes(segment[0:1]) + else: + chunktype = b'empty' + + if chunktype not in chunktypecounts: + chunktypecounts[chunktype] = 0 + chunktypesizes[chunktype] = 0 + + chunktypecounts[chunktype] += 1 + chunktypesizes[chunktype] += size + + # Adjust size min value for empty cases + for size in (datasize, fullsize, semisize, deltasize): + if size[0] is None: + size[0] = 0 + + numdeltas = numrevs - numfull - numempty - numsemi + numoprev = numprev - nump1prev - nump2prev + totalrawsize = datasize[2] + datasize[2] /= numrevs + fulltotal = fullsize[2] + if numfull == 0: + fullsize[2] = 0 + else: + fullsize[2] /= numfull + semitotal = semisize[2] + snaptotal = {} + if numsemi > 0: + semisize[2] /= numsemi + for depth in snapsizedepth: + snaptotal[depth] = snapsizedepth[depth][2] + snapsizedepth[depth][2] /= numsnapdepth[depth] + + deltatotal = deltasize[2] + if numdeltas > 0: + deltasize[2] /= numdeltas + totalsize = fulltotal + semitotal + deltatotal + avgchainlen = sum(chainlengths) / numrevs + maxchainlen = max(chainlengths) + maxchainspan = max(chainspans) + compratio = 1 + if totalsize: + compratio = totalrawsize / totalsize + + basedfmtstr = b'%%%dd\n' + basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n' + + def dfmtstr(max): + return basedfmtstr % len(str(max)) + + def pcfmtstr(max, padding=0): + return basepcfmtstr % (len(str(max)), b' ' * padding) + + def pcfmt(value, total): + if total: + return (value, 100 * float(value) / total) + else: + return value, 100.0 + + ui.writenoi18n(b'format : %d\n' % format) + ui.writenoi18n(b'flags : %s\n' % b', '.join(flags)) + + ui.write(b'\n') + fmt = pcfmtstr(totalsize) + fmt2 = dfmtstr(totalsize) + ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) + ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs)) + ui.writenoi18n( + b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs) + ) + ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) + ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs)) + ui.writenoi18n( + b' text : ' + + fmt % pcfmt(numemptytext, numemptytext + numemptydelta) + ) + ui.writenoi18n( + b' delta : ' + + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta) + ) + ui.writenoi18n( + b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs) + ) + for depth in sorted(numsnapdepth): + ui.write( + (b' lvl-%-3d : ' % depth) + + fmt % pcfmt(numsnapdepth[depth], numrevs) + ) + ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs)) + ui.writenoi18n(b'revision size : ' + fmt2 % totalsize) + ui.writenoi18n( + b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize) + ) + for depth in sorted(numsnapdepth): + ui.write( + (b' lvl-%-3d : ' % depth) + + fmt % pcfmt(snaptotal[depth], totalsize) + ) + ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize)) + + letters = string.ascii_letters.encode('ascii') + + def fmtchunktype(chunktype): + if chunktype == b'empty': + return b' %s : ' % chunktype + elif chunktype in letters: + return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype) + else: + return b' 0x%s : ' % nodemod.hex(chunktype) + + ui.write(b'\n') + ui.writenoi18n(b'chunks : ' + fmt2 % numrevs) + for chunktype in sorted(chunktypecounts): + ui.write(fmtchunktype(chunktype)) + ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) + ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize) + for chunktype in sorted(chunktypecounts): + ui.write(fmtchunktype(chunktype)) + ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) + + ui.write(b'\n') + fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio)) + ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen) + ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen) + ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan) + ui.writenoi18n(b'compression ratio : ' + fmt % compratio) + + if format > 0: + ui.write(b'\n') + ui.writenoi18n( + b'uncompressed data size (min/max/avg) : %d / %d / %d\n' + % tuple(datasize) + ) + ui.writenoi18n( + b'full revision size (min/max/avg) : %d / %d / %d\n' + % tuple(fullsize) + ) + ui.writenoi18n( + b'inter-snapshot size (min/max/avg) : %d / %d / %d\n' + % tuple(semisize) + ) + for depth in sorted(snapsizedepth): + if depth == 0: + continue + ui.writenoi18n( + b' level-%-3d (min/max/avg) : %d / %d / %d\n' + % ((depth,) + tuple(snapsizedepth[depth])) + ) + ui.writenoi18n( + b'delta size (min/max/avg) : %d / %d / %d\n' + % tuple(deltasize) + ) + + if numdeltas > 0: + ui.write(b'\n') + fmt = pcfmtstr(numdeltas) + fmt2 = pcfmtstr(numdeltas, 4) + ui.writenoi18n( + b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas) + ) + if numprev > 0: + ui.writenoi18n( + b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev) + ) + ui.writenoi18n( + b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev) + ) + ui.writenoi18n( + b' other : ' + fmt2 % pcfmt(numoprev, numprev) + ) + if gdelta: + ui.writenoi18n( + b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas) + ) + ui.writenoi18n( + b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas) + ) + ui.writenoi18n( + b'deltas against other : ' + fmt % pcfmt(numother, numdeltas) + )