filecache: abstract the fetching of the list of tracked file
We will need it for a coming fix that will requires to check a variable list of
file for the changelog.
Differential Revision: https://phab.mercurial-scm.org/D11479
"""revset to select sample of repository
Hopefully this is useful to create interesting discovery cases.
"""
import collections
import random
from mercurial.i18n import _
from mercurial import (
registrar,
revset,
revsetlang,
smartset,
)
revsetpredicate = registrar.revsetpredicate()
@revsetpredicate(b'subsetspec("<spec>")')
def subsetmarkerspec(repo, subset, x):
"""use a shorthand spec as used by search-discovery-case
Supported format are:
- "scratch-count-seed": not scratch(all(), count, "seed")
- "randomantichain-seed": ::randomantichain(all(), "seed")
- "rev-REV": "::REV"
"""
args = revsetlang.getargs(
x, 0, 1, _(b'subsetspec("spec") required an argument')
)
spec = revsetlang.getstring(args[0], _(b"spec should be a string"))
case = spec.split(b'-')
t = case[0]
if t == b'scratch':
spec_revset = b'not scratch(all(), %s, "%s")' % (case[1], case[2])
elif t == b'randomantichain':
spec_revset = b'::randomantichain(all(), "%s")' % case[1]
elif t == b'rev':
spec_revset = b'::%d' % case[1]
else:
assert False, spec
selected = repo.revs(spec_revset)
return selected & subset
@revsetpredicate(b'scratch(REVS, <count>, [seed])')
def scratch(repo, subset, x):
"""randomly remove <count> revision from the repository top
This subset is created by recursively picking changeset starting from the
heads. It can be summarized using the following algorithm::
selected = set()
for i in range(<count>):
unselected = repo.revs("not <selected>")
candidates = repo.revs("heads(<unselected>)")
pick = random.choice(candidates)
selected.add(pick)
"""
m = _(b"scratch expects revisions, count argument and an optional seed")
args = revsetlang.getargs(x, 2, 3, m)
if len(args) == 2:
x, n = args
rand = random
elif len(args) == 3:
x, n, seed = args
seed = revsetlang.getinteger(seed, _(b"seed should be a number"))
rand = random.Random(seed)
else:
assert False
n = revsetlang.getinteger(n, _(b"scratch expects a number"))
selected = set()
heads = set()
children_count = collections.defaultdict(lambda: 0)
parents = repo.changelog._uncheckedparentrevs
baseset = revset.getset(repo, smartset.fullreposet(repo), x)
baseset.sort()
for r in baseset:
heads.add(r)
p1, p2 = parents(r)
if p1 >= 0:
heads.discard(p1)
children_count[p1] += 1
if p2 >= 0:
heads.discard(p2)
children_count[p2] += 1
for h in heads:
assert children_count[h] == 0
selected = set()
for x in range(n):
if not heads:
break
pick = rand.choice(list(heads))
heads.remove(pick)
assert pick not in selected
selected.add(pick)
p1, p2 = parents(pick)
if p1 in children_count:
assert p1 in children_count
children_count[p1] -= 1
assert children_count[p1] >= 0
if children_count[p1] == 0:
assert p1 not in selected, (r, p1)
heads.add(p1)
if p2 in children_count:
assert p2 in children_count
children_count[p2] -= 1
assert children_count[p2] >= 0
if children_count[p2] == 0:
assert p2 not in selected, (r, p2)
heads.add(p2)
return smartset.baseset(selected) & subset
@revsetpredicate(b'randomantichain(REVS, [seed])')
def antichain(repo, subset, x):
"""Pick a random anti-chain in the repository
A antichain is a set of changeset where there isn't any element that is
either a descendant or ancestors of any other element in the set. In other
word, all the elements are independant. It can be summarized with the
following algorithm::
selected = set()
unselected = repo.revs('all()')
while unselected:
pick = random.choice(unselected)
selected.add(pick)
unselected -= repo.revs('::<pick> + <pick>::')
"""
args = revsetlang.getargs(
x, 1, 2, _(b"randomantichain expects revisions and an optional seed")
)
if len(args) == 1:
(x,) = args
rand = random
elif len(args) == 2:
x, seed = args
seed = revsetlang.getinteger(seed, _(b"seed should be a number"))
rand = random.Random(seed)
else:
assert False
selected = set()
baseset = revset.getset(repo, smartset.fullreposet(repo), x)
undecided = baseset
while undecided:
pick = rand.choice(list(undecided))
selected.add(pick)
undecided = repo.revs(
'%ld and not (::%ld or %ld::head())', baseset, selected, selected
)
return smartset.baseset(selected) & subset