# HG changeset patch # User Pierre-Yves David # Date 1537892626 -7200 # Node ID 683ceec8d37e4b51c59f089b12f0087993d2de2b # Parent 1421ff5c5c96455caa47bfc669cf877729888740 pullbundle: add a command to generate cache hit statistic This new command should help checking that the stable range produce reusable bundle. diff -r 1421ff5c5c96 -r 683ceec8d37e hgext3rd/pullbundle.py --- a/hgext3rd/pullbundle.py Tue Sep 25 13:44:32 2018 +0200 +++ b/hgext3rd/pullbundle.py Tue Sep 25 18:23:46 2018 +0200 @@ -72,16 +72,21 @@ extensions next to it. As soon as stable range have been upstreamed, we won't need the dependency to the evolve extension anymore. """ + +import collections import errno +import random import os from mercurial import ( changegroup, discovery, + error, exchange, narrowspec, node as nodemod, registrar, + scmutil, util, ) @@ -92,6 +97,9 @@ # minimumhgversion = '' buglink = 'https://bz.mercurial-scm.org/' +cmdtable = {} +command = registrar.command(cmdtable) + configtable = {} configitem = registrar.configitem(configtable) @@ -437,3 +445,114 @@ pversion = version partdata = (cachedata, nbchanges, pversion) return _makepartfromstream(newpart, repo, *partdata) + +@command('^debugpullbundlecacheoverlap', + [('', 'count', 100, _('of "client" pulling')), + ], + _('hg debugpullbundlecacheoverlap [--client 100] REVSET')) +def debugpullbundlecacheoverlap(ui, repo, *revs, **opts): + '''Display statistic on bundle cache hit + + This command "simulate pulls from multiple clients. Each using a random + subset of revisions defined by REVSET. And display statistic about the + overlap in bundle necessary to serve them. + ''' + actionrevs = scmutil.revrange(repo, revs) + if not revs: + raise error.Abort('No revision selected') + count = opts['count'] + + bundlehits = collections.defaultdict(lambda: 0) + pullstats = [] + + rlen = lambda rangeid: repo.stablerange.rangelength(repo, rangeid) + + repo.ui.write("gathering %d sample pulls within %d revisions\n" + % (count, len(actionrevs))) + for i in xrange(count): + repo.ui.progress('gathering data', i, total=count) + outgoing = takeonesample(repo, actionrevs) + ranges = sliceoutgoing(repo, outgoing) + hitranges = 0 + hitchanges = 0 + totalchanges = 0 + for rangeid, __ in ranges: + length = rlen(rangeid) + totalchanges += length + if bundlehits[rangeid]: + hitranges += 1 + hitchanges += rlen(rangeid) + bundlehits[rangeid] += 1 + stats = (len(outgoing.missing), + totalchanges, + hitchanges, + len(ranges), + hitranges, + ) + pullstats.append(stats) + repo.ui.progress('gathering data', None) + + sizes = [] + changesmissing = [] + totalchanges = 0 + totalcached = 0 + changesratio = [] + rangesratio = [] + bundlecount = [] + for entry in pullstats: + sizes.append(entry[0]) + changesmissing.append(entry[1] - entry[2]) + changesratio.append(entry[2] / float(entry[1])) + rangesratio.append(entry[4] / float(entry[3])) + bundlecount.append(entry[3]) + totalchanges += entry[1] + totalcached += entry[2] + + sizesdist = distribution(sizes) + repo.ui.write(fmtdist('pull size', sizesdist)) + changesmissingdist = distribution(changesmissing) + repo.ui.write(fmtdist('non-cached changesets', changesmissingdist)) + changesratiodist = distribution(changesratio) + repo.ui.write(fmtdist('ratio of cached changesets', changesratiodist)) + bundlecountdist = distribution(bundlecount) + repo.ui.write(fmtdist('bundle count', bundlecountdist)) + rangesratiodist = distribution(rangesratio) + repo.ui.write(fmtdist('ratio of cached bundles', rangesratiodist)) + repo.ui.write('changesets served:\n') + repo.ui.write(' total: %7d\n' % totalchanges) + repo.ui.write(' from cache: %7d (%2d%%)\n' + % (totalcached, (totalcached * 100 // totalchanges))) + repo.ui.write(' bundle: %7d\n' % sum(bundlecount)) + +def takeonesample(repo, revs): + node = repo.changelog.node + pulled = random.sample(revs, max(4, len(revs) // 1000)) + pulled = repo.revs('%ld::%ld', pulled, pulled) + nodes = [node(r) for r in pulled] + return outgoingfromnodes(repo, nodes) + +def distribution(data): + data.sort() + length = len(data) + return { + 'min': data[0], + '10%': data[length // 10], + '25%': data[length // 4], + '50%': data[length // 2], + '75%': data[(length // 4) * 3], + '90%': data[(length // 10) * 9], + 'max': data[-1], + } + +STATSFORMAT = """{name}: + min: {min} + 10%: {10%} + 25%: {25%} + 50%: {50%} + 75%: {75%} + 90%: {90%} + max: {max} +""" + +def fmtdist(name, data): + return STATSFORMAT.format(name=name, **data)