# HG changeset patch # User Pierre-Yves David # Date 1615827702 -3600 # Node ID 6b26e64325544f56d2089a7613e5be4ffb7318fc # Parent b6ac612445e05909f0ca5542e08040b57e126c2a perf-helpers: add a search-discovery-case script This a small script I built to look for interesting discovery case. It is fairly basic but could be useful in various situation so lets put it in the main repositories. Differential Revision: https://phab.mercurial-scm.org/D10225 diff -r b6ac612445e0 -r 6b26e6432554 contrib/perf-utils/search-discovery-case --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/perf-utils/search-discovery-case Mon Mar 15 18:01:42 2021 +0100 @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +# Search for interesting discovery instance +# +# search-discovery-case REPO [REPO]… +# +# This use a subsetmaker extension (next to this script) to generate a steam of +# random discovery instance. When interesting case are discovered, information +# about them are print on the stdout. +from __future__ import print_function + +import json +import os +import queue +import random +import signal +import subprocess +import sys +import threading + +this_script = os.path.abspath(sys.argv[0]) +this_dir = os.path.dirname(this_script) +hg_dir = os.path.join(this_dir, '..', '..') +HG_REPO = os.path.normpath(hg_dir) +HG_BIN = os.path.join(HG_REPO, 'hg') + +JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) + + +SLICING = ('scratch', 'randomantichain', 'rev') + + +def nb_revs(repo_path): + cmd = [ + HG_BIN, + '--repository', + repo_path, + 'log', + '--template', + '{rev}', + '--rev', + 'tip', + ] + s = subprocess.Popen(cmd, stdout=subprocess.PIPE) + out, err = s.communicate() + return int(out) + + +repos = [] +for repo in sys.argv[1:]: + size = nb_revs(repo) + repos.append((repo, size)) + + +def pick_one(repo): + pick = random.choice(SLICING) + seed = random.randint(0, 100000) + if pick == 'scratch': + start = int(repo[1] * 0.3) + end = int(repo[1] * 0.7) + nb = random.randint(start, end) + return ('scratch', nb, seed) + elif pick == 'randomantichain': + return ('randomantichain', seed) + elif pick == 'rev': + start = int(repo[1] * 0.3) + end = int(repo[1]) + rev = random.randint(start, end) + return ('rev', rev) + else: + assert False + + +done = threading.Event() +cases = queue.Queue(maxsize=10 * JOB) +results = queue.Queue() + + +def worker(): + while not done.is_set(): + c = cases.get() + if c is None: + return + try: + res = process(c) + results.put((c, res)) + except Exception as exc: + print('processing-failed: %s %s' % (c, exc), file=sys.stderr) + c = (c[0], c[2], c[1]) + try: + res = process(c) + results.put((c, res)) + except Exception as exc: + print('processing-failed: %s %s' % (c, exc), file=sys.stderr) + + +SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') + + +CMD_BASE = ( + HG_BIN, + 'debugdiscovery', + '--template', + 'json', + '--config', + 'extensions.subset=%s' % SUBSET_PATH, +) +# '--local-as-revs "$left" --local-as-revs "$right"' +# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt +# ) + + +def to_revsets(case): + t = case[0] + if t == 'scratch': + return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) + elif t == 'randomantichain': + return '::randomantichain(all(), "%d")' % case[1] + elif t == 'rev': + return '::%d' % case[1] + else: + assert False + + +def process(case): + (repo, left, right) = case + cmd = list(CMD_BASE) + cmd.append('-R') + cmd.append(repo[0]) + cmd.append('--local-as-revs') + cmd.append(to_revsets(left)) + cmd.append('--remote-as-revs') + cmd.append(to_revsets(right)) + s = subprocess.Popen(cmd, stdout=subprocess.PIPE) + out, err = s.communicate() + return json.loads(out)[0] + + +def interesting_boundary(res): + """check if a case is interesting or not + + For now we are mostly interrested in case were we do multiple roundstrip + and where the boundary is somewhere in the middle of the undecided set. + + Ideally, we would make this configurable, but this is not a focus for now + + return None or (round-trip, undecided-common, undecided-missing) + """ + roundtrips = res["total-roundtrips"] + if roundtrips <= 1: + return None + undecided_common = res["nb-ini_und-common"] + undecided_missing = res["nb-ini_und-missing"] + if undecided_common == 0: + return None + if undecided_missing == 0: + return None + return (roundtrips, undecided_common, undecided_missing) + + +def end(*args, **kwargs): + done.set() + + +def format_case(case): + return '-'.join(str(s) for s in case) + + +signal.signal(signal.SIGINT, end) + +for i in range(JOB): + threading.Thread(target=worker).start() + +nb_cases = 0 +while not done.is_set(): + repo = random.choice(repos) + left = pick_one(repo) + right = pick_one(repo) + cases.put((repo, left, right)) + while not results.empty(): + # results has a single reader so this is fine + c, res = results.get_nowait() + boundary = interesting_boundary(res) + if boundary is not None: + print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) + sys.stdout.flush() + + nb_cases += 1 + if not nb_cases % 100: + print('[%d cases generated]' % nb_cases, file=sys.stderr) + +for i in range(JOB): + try: + cases.put_nowait(None) + except queue.Full: + pass + +print('[%d cases generated]' % nb_cases, file=sys.stderr) +print('[ouput generation is over]' % nb_cases, file=sys.stderr)