Mercurial > hg
changeset 51070:d7f975e49f20
delta-chain: move the debugdeltachain command in revlogutils
There is a dedicated `mercurial.revlogutils.debug` module were this code fits
well.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 18 Sep 2023 23:26:00 +0200 |
parents | a63e1f7987a7 |
children | 793a058f64bd |
files | mercurial/debugcommands.py mercurial/revlogutils/debug.py |
diffstat | 2 files changed, 183 insertions(+), 190 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/debugcommands.py Thu Oct 12 09:04:12 2023 +0200 +++ b/mercurial/debugcommands.py Mon Sep 18 23:26:00 2023 +0200 @@ -105,9 +105,7 @@ ) from .revlogutils import ( - constants as revlog_constants, debug as revlog_debug, - deltas as deltautil, nodemap, rewrite, sidedata, @@ -799,200 +797,23 @@ The sparse read can be enabled with experimental.sparse-read = True """ - r = cmdutil.openrevlog( + revlog = cmdutil.openrevlog( repo, b'debugdeltachain', file_, pycompat.byteskwargs(opts) ) - index = r.index - start = r.start - length = r.length - generaldelta = r.delta_config.general_delta - withsparseread = r.data_config.with_sparse_read - - # security to avoid crash on corrupted revlogs - total_revs = len(index) - - chain_size_cache = {} - - def revinfo(rev): - e = index[rev] - compsize = e[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] - uncompsize = e[revlog_constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] - - base = e[revlog_constants.ENTRY_DELTA_BASE] - p1 = e[revlog_constants.ENTRY_PARENT_1] - p2 = e[revlog_constants.ENTRY_PARENT_2] - - # If the parents of a revision has an empty delta, we never try to delta - # against that parent, but directly against the delta base of that - # parent (recursively). It avoids adding a useless entry in the chain. - # - # However we need to detect that as a special case for delta-type, that - # is not simply "other". - p1_base = p1 - if p1 != nullrev and p1 < total_revs: - e1 = index[p1] - while e1[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: - new_base = e1[revlog_constants.ENTRY_DELTA_BASE] - if ( - new_base == p1_base - or new_base == nullrev - or new_base >= total_revs - ): - break - p1_base = new_base - e1 = index[p1_base] - p2_base = p2 - if p2 != nullrev and p2 < total_revs: - e2 = index[p2] - while e2[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: - new_base = e2[revlog_constants.ENTRY_DELTA_BASE] - if ( - new_base == p2_base - or new_base == nullrev - or new_base >= total_revs - ): - break - p2_base = new_base - e2 = index[p2_base] - - if generaldelta: - if base == p1: - deltatype = b'p1' - elif base == p2: - deltatype = b'p2' - elif base == rev: - deltatype = b'base' - elif base == p1_base: - deltatype = b'skip1' - elif base == p2_base: - deltatype = b'skip2' - elif r.issnapshot(rev): - deltatype = b'snap' - elif base == rev - 1: - deltatype = b'prev' - else: - deltatype = b'other' - else: - if base == rev: - deltatype = b'base' - else: - deltatype = b'prev' - - chain = r._deltachain(rev)[0] - chain_size = 0 - for iter_rev in reversed(chain): - cached = chain_size_cache.get(iter_rev) - if cached is not None: - chain_size += cached - break - e = index[iter_rev] - chain_size += e[revlog_constants.ENTRY_DATA_COMPRESSED_LENGTH] - chain_size_cache[rev] = chain_size - - return p1, p2, compsize, uncompsize, deltatype, chain, chain_size - fm = ui.formatter(b'debugdeltachain', pycompat.byteskwargs(opts)) - fm.plain( - b' rev p1 p2 chain# chainlen prev delta ' - b'size rawsize chainsize ratio lindist extradist ' - b'extraratio' - ) - if withsparseread: - fm.plain(b' readsize largestblk rddensity srchunks') - fm.plain(b'\n') - - chainbases = {} - for rev in r: - p1, p2, comp, uncomp, deltatype, chain, chainsize = revinfo(rev) - chainbase = chain[0] - chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) - basestart = start(chainbase) - revstart = start(rev) - lineardist = revstart + comp - basestart - extradist = lineardist - chainsize - try: - prevrev = chain[-2] - except IndexError: - prevrev = -1 - - if uncomp != 0: - chainratio = float(chainsize) / float(uncomp) - else: - chainratio = chainsize - - if chainsize != 0: - extraratio = float(extradist) / float(chainsize) - else: - extraratio = extradist - + lines = revlog_debug.debug_delta_chain(revlog) + # first entry is the header + header = next(lines) + fm.plain(header) + for entry in lines: + label = b' '.join(e[0] for e in entry) + format = b' '.join(e[1] for e in entry) + values = [e[3] for e in entry] + data = dict((e[2], e[3]) for e in entry) fm.startitem() - fm.write( - b'rev p1 p2 chainid chainlen prevrev deltatype compsize ' - b'uncompsize chainsize chainratio lindist extradist ' - b'extraratio', - b'%7d %7d %7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f', - rev, - p1, - p2, - chainid, - len(chain), - prevrev, - deltatype, - comp, - uncomp, - chainsize, - chainratio, - lineardist, - extradist, - extraratio, - rev=rev, - chainid=chainid, - chainlen=len(chain), - prevrev=prevrev, - deltatype=deltatype, - compsize=comp, - uncompsize=uncomp, - chainsize=chainsize, - chainratio=chainratio, - lindist=lineardist, - extradist=extradist, - extraratio=extraratio, - ) - if withsparseread: - readsize = 0 - largestblock = 0 - srchunks = 0 - - for revschunk in deltautil.slicechunk(r, chain): - srchunks += 1 - blkend = start(revschunk[-1]) + length(revschunk[-1]) - blksize = blkend - start(revschunk[0]) - - readsize += blksize - if largestblock < blksize: - largestblock = blksize - - if readsize: - readdensity = float(chainsize) / float(readsize) - else: - readdensity = 1 - - fm.write( - b'readsize largestblock readdensity srchunks', - b' %10d %10d %9.5f %8d', - readsize, - largestblock, - readdensity, - srchunks, - readsize=readsize, - largestblock=largestblock, - readdensity=readdensity, - srchunks=srchunks, - ) - + fm.write(label, format, *values, **data) fm.plain(b'\n') - fm.end()
--- a/mercurial/revlogutils/debug.py Thu Oct 12 09:04:12 2023 +0200 +++ b/mercurial/revlogutils/debug.py Mon Sep 18 23:26:00 2023 +0200 @@ -710,3 +710,175 @@ fm.write(b'revlog.target', b' %s', revlog_target) fm.plain(b'\n') + + +def debug_delta_chain(revlog): + r = revlog + index = r.index + start = r.start + length = r.length + generaldelta = r.delta_config.general_delta + withsparseread = r.data_config.with_sparse_read + + # security to avoid crash on corrupted revlogs + total_revs = len(index) + + chain_size_cache = {} + + def revinfo(rev): + e = index[rev] + compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH] + uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] + + base = e[constants.ENTRY_DELTA_BASE] + p1 = e[constants.ENTRY_PARENT_1] + p2 = e[constants.ENTRY_PARENT_2] + + # If the parents of a revision has an empty delta, we never try to + # delta against that parent, but directly against the delta base of + # that parent (recursively). It avoids adding a useless entry in the + # chain. + # + # However we need to detect that as a special case for delta-type, that + # is not simply "other". + p1_base = p1 + if p1 != nodemod.nullrev and p1 < total_revs: + e1 = index[p1] + while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: + new_base = e1[constants.ENTRY_DELTA_BASE] + if ( + new_base == p1_base + or new_base == nodemod.nullrev + or new_base >= total_revs + ): + break + p1_base = new_base + e1 = index[p1_base] + p2_base = p2 + if p2 != nodemod.nullrev and p2 < total_revs: + e2 = index[p2] + while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0: + new_base = e2[constants.ENTRY_DELTA_BASE] + if ( + new_base == p2_base + or new_base == nodemod.nullrev + or new_base >= total_revs + ): + break + p2_base = new_base + e2 = index[p2_base] + + if generaldelta: + if base == p1: + deltatype = b'p1' + elif base == p2: + deltatype = b'p2' + elif base == rev: + deltatype = b'base' + elif base == p1_base: + deltatype = b'skip1' + elif base == p2_base: + deltatype = b'skip2' + elif r.issnapshot(rev): + deltatype = b'snap' + elif base == rev - 1: + deltatype = b'prev' + else: + deltatype = b'other' + else: + if base == rev: + deltatype = b'base' + else: + deltatype = b'prev' + + chain = r._deltachain(rev)[0] + chain_size = 0 + for iter_rev in reversed(chain): + cached = chain_size_cache.get(iter_rev) + if cached is not None: + chain_size += cached + break + e = index[iter_rev] + chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH] + chain_size_cache[rev] = chain_size + + return p1, p2, compsize, uncompsize, deltatype, chain, chain_size + + header = ( + b' rev p1 p2 chain# chainlen prev delta ' + b'size rawsize chainsize ratio lindist extradist ' + b'extraratio' + ) + if withsparseread: + header += b' readsize largestblk rddensity srchunks' + header += b'\n' + yield header + + chainbases = {} + for rev in r: + p1, p2, comp, uncomp, deltatype, chain, chainsize = revinfo(rev) + chainbase = chain[0] + chainid = chainbases.setdefault(chainbase, len(chainbases) + 1) + basestart = start(chainbase) + revstart = start(rev) + lineardist = revstart + comp - basestart + extradist = lineardist - chainsize + try: + prevrev = chain[-2] + except IndexError: + prevrev = -1 + + if uncomp != 0: + chainratio = float(chainsize) / float(uncomp) + else: + chainratio = chainsize + + if chainsize != 0: + extraratio = float(extradist) / float(chainsize) + else: + extraratio = extradist + + # label, display-format, data-key, value + entry = [ + (b'rev', b'%7d', 'rev', rev), + (b'p1', b'%7d', 'p1', p1), + (b'p2', b'%7d', 'p2', p2), + (b'chainid', b'%7d', 'chainid', chainid), + (b'chainlen', b'%8d', 'chainlen', len(chain)), + (b'prevrev', b'%8d', 'prevrev', prevrev), + (b'deltatype', b'%7s', 'deltatype', deltatype), + (b'compsize', b'%10d', 'compsize', comp), + (b'uncompsize', b'%10d', 'uncompsize', uncomp), + (b'chainsize', b'%10d', 'chainsize', chainsize), + (b'chainratio', b'%9.5f', 'chainratio', chainratio), + (b'lindist', b'%9d', 'lindist', lineardist), + (b'extradist', b'%9d', 'extradist', extradist), + (b'extraratio', b'%10.5f', 'extraratio', extraratio), + ] + if withsparseread: + readsize = 0 + largestblock = 0 + srchunks = 0 + + for revschunk in deltautil.slicechunk(r, chain): + srchunks += 1 + blkend = start(revschunk[-1]) + length(revschunk[-1]) + blksize = blkend - start(revschunk[0]) + + readsize += blksize + if largestblock < blksize: + largestblock = blksize + + if readsize: + readdensity = float(chainsize) / float(readsize) + else: + readdensity = 1 + entry.extend( + [ + (b'readsize', b'%10d', 'readsize', readsize), + (b'largestblock', b'%10d', 'largestblock', largestblock), + (b'readdensity', b'%9.5f', 'readdensity', readdensity), + (b'srchunks', b'%8d', 'srchunks', srchunks), + ] + ) + yield entry