perf-helper: add a small extension with revsets to select repository subset
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 15 Mar 2021 16:35:54 +0100
changeset 46766 cb70dabe5718
parent 46765 62c2857a174b
child 46767 36b4640ccb6a
perf-helper: add a small extension with revsets to select repository subset Playing with discovery requires building interesting case. To do this we need revsets to try to generate them. We start with a quite simple one. See documentation for details. Differential Revision: https://phab.mercurial-scm.org/D10221
contrib/perf-utils/subsetmaker.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/perf-utils/subsetmaker.py	Mon Mar 15 16:35:54 2021 +0100
@@ -0,0 +1,94 @@
+"""revset to select sample of repository
+
+Hopefully this is useful to create interesting discovery cases.
+"""
+
+import collections
+import random
+
+from mercurial.i18n import _
+
+from mercurial import (
+    registrar,
+    revset,
+    revsetlang,
+    smartset,
+)
+
+revsetpredicate = registrar.revsetpredicate()
+
+
+@revsetpredicate(b'scratch(REVS, <count>, [seed])')
+def scratch(repo, subset, x):
+    """randomly remove <count> revision from the repository top
+
+    This subset is created by recursively picking changeset starting from the
+    heads. It can be summarized using the following algorithm::
+
+        selected = set()
+        for i in range(<count>):
+            unselected = repo.revs("not <selected>")
+            candidates = repo.revs("heads(<unselected>)")
+            pick = random.choice(candidates)
+            selected.add(pick)
+    """
+    m = _(b"scratch expects revisions, count argument and an optional seed")
+    args = revsetlang.getargs(x, 2, 3, m)
+    if len(args) == 2:
+        x, n = args
+        rand = random
+    elif len(args) == 3:
+        x, n, seed = args
+        seed = revsetlang.getinteger(seed, _(b"seed should be a number"))
+        rand = random.Random(seed)
+    else:
+        assert False
+
+    n = revsetlang.getinteger(n, _(b"scratch expects a number"))
+
+    selected = set()
+    heads = set()
+    children_count = collections.defaultdict(lambda: 0)
+    parents = repo.changelog._uncheckedparentrevs
+
+    baseset = revset.getset(repo, smartset.fullreposet(repo), x)
+    baseset.sort()
+    for r in baseset:
+        heads.add(r)
+
+        p1, p2 = parents(r)
+        if p1 >= 0:
+            heads.discard(p1)
+            children_count[p1] += 1
+        if p2 >= 0:
+            heads.discard(p2)
+            children_count[p2] += 1
+
+    for h in heads:
+        assert children_count[h] == 0
+
+    selected = set()
+    for x in range(n):
+        if not heads:
+            break
+        pick = rand.choice(list(heads))
+        heads.remove(pick)
+        assert pick not in selected
+        selected.add(pick)
+        p1, p2 = parents(pick)
+        if p1 in children_count:
+            assert p1 in children_count
+            children_count[p1] -= 1
+            assert children_count[p1] >= 0
+            if children_count[p1] == 0:
+                assert p1 not in selected, (r, p1)
+                heads.add(p1)
+        if p2 in children_count:
+            assert p2 in children_count
+            children_count[p2] -= 1
+            assert children_count[p2] >= 0
+            if children_count[p2] == 0:
+                assert p2 not in selected, (r, p2)
+                heads.add(p2)
+
+    return smartset.baseset(selected) & subset