Mercurial > hg
comparison contrib/perf-utils/search-discovery-case @ 46771:6b26e6432554
perf-helpers: add a search-discovery-case script
This a small script I built to look for interesting discovery case. It is fairly
basic but could be useful in various situation so lets put it in the main
repositories.
Differential Revision: https://phab.mercurial-scm.org/D10225
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 15 Mar 2021 18:01:42 +0100 |
parents | |
children | 6000f5b25c9b |
comparison
equal
deleted
inserted
replaced
46770:b6ac612445e0 | 46771:6b26e6432554 |
---|---|
1 #!/usr/bin/env python3 | |
2 # Search for interesting discovery instance | |
3 # | |
4 # search-discovery-case REPO [REPO]… | |
5 # | |
6 # This use a subsetmaker extension (next to this script) to generate a steam of | |
7 # random discovery instance. When interesting case are discovered, information | |
8 # about them are print on the stdout. | |
9 from __future__ import print_function | |
10 | |
11 import json | |
12 import os | |
13 import queue | |
14 import random | |
15 import signal | |
16 import subprocess | |
17 import sys | |
18 import threading | |
19 | |
20 this_script = os.path.abspath(sys.argv[0]) | |
21 this_dir = os.path.dirname(this_script) | |
22 hg_dir = os.path.join(this_dir, '..', '..') | |
23 HG_REPO = os.path.normpath(hg_dir) | |
24 HG_BIN = os.path.join(HG_REPO, 'hg') | |
25 | |
26 JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) | |
27 | |
28 | |
29 SLICING = ('scratch', 'randomantichain', 'rev') | |
30 | |
31 | |
32 def nb_revs(repo_path): | |
33 cmd = [ | |
34 HG_BIN, | |
35 '--repository', | |
36 repo_path, | |
37 'log', | |
38 '--template', | |
39 '{rev}', | |
40 '--rev', | |
41 'tip', | |
42 ] | |
43 s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
44 out, err = s.communicate() | |
45 return int(out) | |
46 | |
47 | |
48 repos = [] | |
49 for repo in sys.argv[1:]: | |
50 size = nb_revs(repo) | |
51 repos.append((repo, size)) | |
52 | |
53 | |
54 def pick_one(repo): | |
55 pick = random.choice(SLICING) | |
56 seed = random.randint(0, 100000) | |
57 if pick == 'scratch': | |
58 start = int(repo[1] * 0.3) | |
59 end = int(repo[1] * 0.7) | |
60 nb = random.randint(start, end) | |
61 return ('scratch', nb, seed) | |
62 elif pick == 'randomantichain': | |
63 return ('randomantichain', seed) | |
64 elif pick == 'rev': | |
65 start = int(repo[1] * 0.3) | |
66 end = int(repo[1]) | |
67 rev = random.randint(start, end) | |
68 return ('rev', rev) | |
69 else: | |
70 assert False | |
71 | |
72 | |
73 done = threading.Event() | |
74 cases = queue.Queue(maxsize=10 * JOB) | |
75 results = queue.Queue() | |
76 | |
77 | |
78 def worker(): | |
79 while not done.is_set(): | |
80 c = cases.get() | |
81 if c is None: | |
82 return | |
83 try: | |
84 res = process(c) | |
85 results.put((c, res)) | |
86 except Exception as exc: | |
87 print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
88 c = (c[0], c[2], c[1]) | |
89 try: | |
90 res = process(c) | |
91 results.put((c, res)) | |
92 except Exception as exc: | |
93 print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
94 | |
95 | |
96 SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') | |
97 | |
98 | |
99 CMD_BASE = ( | |
100 HG_BIN, | |
101 'debugdiscovery', | |
102 '--template', | |
103 'json', | |
104 '--config', | |
105 'extensions.subset=%s' % SUBSET_PATH, | |
106 ) | |
107 # '--local-as-revs "$left" --local-as-revs "$right"' | |
108 # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt | |
109 # ) | |
110 | |
111 | |
112 def to_revsets(case): | |
113 t = case[0] | |
114 if t == 'scratch': | |
115 return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) | |
116 elif t == 'randomantichain': | |
117 return '::randomantichain(all(), "%d")' % case[1] | |
118 elif t == 'rev': | |
119 return '::%d' % case[1] | |
120 else: | |
121 assert False | |
122 | |
123 | |
124 def process(case): | |
125 (repo, left, right) = case | |
126 cmd = list(CMD_BASE) | |
127 cmd.append('-R') | |
128 cmd.append(repo[0]) | |
129 cmd.append('--local-as-revs') | |
130 cmd.append(to_revsets(left)) | |
131 cmd.append('--remote-as-revs') | |
132 cmd.append(to_revsets(right)) | |
133 s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
134 out, err = s.communicate() | |
135 return json.loads(out)[0] | |
136 | |
137 | |
138 def interesting_boundary(res): | |
139 """check if a case is interesting or not | |
140 | |
141 For now we are mostly interrested in case were we do multiple roundstrip | |
142 and where the boundary is somewhere in the middle of the undecided set. | |
143 | |
144 Ideally, we would make this configurable, but this is not a focus for now | |
145 | |
146 return None or (round-trip, undecided-common, undecided-missing) | |
147 """ | |
148 roundtrips = res["total-roundtrips"] | |
149 if roundtrips <= 1: | |
150 return None | |
151 undecided_common = res["nb-ini_und-common"] | |
152 undecided_missing = res["nb-ini_und-missing"] | |
153 if undecided_common == 0: | |
154 return None | |
155 if undecided_missing == 0: | |
156 return None | |
157 return (roundtrips, undecided_common, undecided_missing) | |
158 | |
159 | |
160 def end(*args, **kwargs): | |
161 done.set() | |
162 | |
163 | |
164 def format_case(case): | |
165 return '-'.join(str(s) for s in case) | |
166 | |
167 | |
168 signal.signal(signal.SIGINT, end) | |
169 | |
170 for i in range(JOB): | |
171 threading.Thread(target=worker).start() | |
172 | |
173 nb_cases = 0 | |
174 while not done.is_set(): | |
175 repo = random.choice(repos) | |
176 left = pick_one(repo) | |
177 right = pick_one(repo) | |
178 cases.put((repo, left, right)) | |
179 while not results.empty(): | |
180 # results has a single reader so this is fine | |
181 c, res = results.get_nowait() | |
182 boundary = interesting_boundary(res) | |
183 if boundary is not None: | |
184 print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) | |
185 sys.stdout.flush() | |
186 | |
187 nb_cases += 1 | |
188 if not nb_cases % 100: | |
189 print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
190 | |
191 for i in range(JOB): | |
192 try: | |
193 cases.put_nowait(None) | |
194 except queue.Full: | |
195 pass | |
196 | |
197 print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
198 print('[ouput generation is over]' % nb_cases, file=sys.stderr) |