Mercurial > hg
view rust/hg-core/tests/test_missing_ancestors.rs @ 48817:c1ad4364a5b5
cext: use PyLong symbols
We no longer need to support Python 2. So use the Python 3 symbol
names directly.
Differential Revision: https://phab.mercurial-scm.org/D12228
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sun, 20 Feb 2022 15:48:35 -0700 |
parents | 0dc698c91ca0 |
children | 58074252db3c |
line wrap: on
line source
use hg::testing::VecGraph; use hg::Revision; use hg::*; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng, RngCore, SeedableRng}; use rand_distr::LogNormal; use std::cmp::min; use std::collections::HashSet; use std::env; use std::fmt::Debug; fn build_random_graph( nodes_opt: Option<usize>, rootprob_opt: Option<f64>, mergeprob_opt: Option<f64>, prevprob_opt: Option<f64>, ) -> VecGraph { let nodes = nodes_opt.unwrap_or(100); let rootprob = rootprob_opt.unwrap_or(0.05); let mergeprob = mergeprob_opt.unwrap_or(0.2); let prevprob = prevprob_opt.unwrap_or(0.7); let mut rng = thread_rng(); let mut vg: VecGraph = Vec::with_capacity(nodes); for i in 0..nodes { if i == 0 || rng.gen_bool(rootprob) { vg.push([NULL_REVISION, NULL_REVISION]) } else if i == 1 { vg.push([0, NULL_REVISION]) } else if rng.gen_bool(mergeprob) { let p1 = { if i == 2 || rng.gen_bool(prevprob) { (i - 1) as Revision } else { rng.gen_range(0..i - 1) as Revision } }; // p2 is a random revision lower than i and different from p1 let mut p2 = rng.gen_range(0..i - 1) as Revision; if p2 >= p1 { p2 = p2 + 1; } vg.push([p1, p2]); } else if rng.gen_bool(prevprob) { vg.push([(i - 1) as Revision, NULL_REVISION]) } else { vg.push([rng.gen_range(0..i - 1) as Revision, NULL_REVISION]) } } vg } /// Compute the ancestors set of all revisions of a VecGraph fn ancestors_sets(vg: &VecGraph) -> Vec<HashSet<Revision>> { let mut ancs: Vec<HashSet<Revision>> = Vec::new(); for i in 0..vg.len() { let mut ancs_i = HashSet::new(); ancs_i.insert(i as Revision); for p in vg[i].iter().cloned() { if p != NULL_REVISION { ancs_i.extend(&ancs[p as usize]); } } ancs.push(ancs_i); } ancs } #[derive(Clone, Debug)] enum MissingAncestorsAction { InitialBases(HashSet<Revision>), AddBases(HashSet<Revision>), RemoveAncestorsFrom(HashSet<Revision>), MissingAncestors(HashSet<Revision>), } /// An instrumented naive yet obviously correct implementation /// /// It also records all its actions for easy reproduction for replay /// of problematic cases struct NaiveMissingAncestors<'a> { ancestors_sets: &'a Vec<HashSet<Revision>>, graph: &'a VecGraph, // used for error reporting only bases: HashSet<Revision>, history: Vec<MissingAncestorsAction>, // for error reporting, assuming we are in a random test random_seed: String, } impl<'a> NaiveMissingAncestors<'a> { fn new( graph: &'a VecGraph, ancestors_sets: &'a Vec<HashSet<Revision>>, bases: &HashSet<Revision>, random_seed: &str, ) -> Self { Self { ancestors_sets: ancestors_sets, bases: bases.clone(), graph: graph, history: vec![MissingAncestorsAction::InitialBases(bases.clone())], random_seed: random_seed.into(), } } fn add_bases(&mut self, new_bases: HashSet<Revision>) { self.bases.extend(&new_bases); self.history .push(MissingAncestorsAction::AddBases(new_bases)) } fn remove_ancestors_from(&mut self, revs: &mut HashSet<Revision>) { revs.remove(&NULL_REVISION); self.history .push(MissingAncestorsAction::RemoveAncestorsFrom(revs.clone())); for base in self.bases.iter().cloned() { if base != NULL_REVISION { for rev in &self.ancestors_sets[base as usize] { revs.remove(&rev); } } } } fn missing_ancestors( &mut self, revs: impl IntoIterator<Item = Revision>, ) -> Vec<Revision> { let revs_as_set: HashSet<Revision> = revs.into_iter().collect(); let mut missing: HashSet<Revision> = HashSet::new(); for rev in revs_as_set.iter().cloned() { if rev != NULL_REVISION { missing.extend(&self.ancestors_sets[rev as usize]) } } self.history .push(MissingAncestorsAction::MissingAncestors(revs_as_set)); for base in self.bases.iter().cloned() { if base != NULL_REVISION { for rev in &self.ancestors_sets[base as usize] { missing.remove(&rev); } } } let mut res: Vec<Revision> = missing.iter().cloned().collect(); res.sort(); res } fn assert_eq<T>(&self, left: T, right: T) where T: PartialEq + Debug, { if left == right { return; } panic!( "Equality assertion failed (left != right) left={:?} right={:?} graph={:?} current bases={:?} history={:?} random seed={} ", left, right, self.graph, self.bases, self.history, self.random_seed, ); } } /// Choose a set of random revisions /// /// The size of the set is taken from a LogNormal distribution /// with default mu=1.1 and default sigma=0.8. Quoting the Python /// test this is taken from: /// the default mu and sigma give us a nice distribution of mostly /// single-digit counts (including 0) with some higher ones /// The sample may include NULL_REVISION fn sample_revs<R: RngCore>( rng: &mut R, maxrev: Revision, mu_opt: Option<f64>, sigma_opt: Option<f64>, ) -> HashSet<Revision> { let mu = mu_opt.unwrap_or(1.1); let sigma = sigma_opt.unwrap_or(0.8); let log_normal = LogNormal::new(mu, sigma).unwrap(); let nb = min(maxrev as usize, log_normal.sample(rng).floor() as usize); let dist = Uniform::from(NULL_REVISION..maxrev); return rng.sample_iter(&dist).take(nb).collect(); } /// Produces the hexadecimal representation of a slice of bytes fn hex_bytes(bytes: &[u8]) -> String { let mut s = String::with_capacity(bytes.len() * 2); for b in bytes { s.push_str(&format!("{:x}", b)); } s } /// Fill a random seed from its hexadecimal representation. /// /// This signature is meant to be consistent with `RngCore::fill_bytes` fn seed_parse_in(hex: &str, seed: &mut [u8]) { if hex.len() != 32 { panic!("Seed {} is too short for 128 bits hex", hex); } for i in 0..8 { seed[i] = u8::from_str_radix(&hex[2 * i..2 * (i + 1)], 16) .unwrap_or_else(|_e| panic!("Seed {} is not 128 bits hex", hex)); } } /// Parse the parameters for `test_missing_ancestors()` /// /// Returns (graphs, instances, calls per instance) fn parse_test_missing_ancestors_params(var: &str) -> (usize, usize, usize) { let err_msg = "TEST_MISSING_ANCESTORS format: GRAPHS,INSTANCES,CALLS"; let params: Vec<usize> = var .split(',') .map(|n| n.trim().parse().expect(err_msg)) .collect(); if params.len() != 3 { panic!("{}", err_msg); } (params[0], params[1], params[2]) } #[test] /// This test creates lots of random VecGraphs, /// and compare a bunch of MissingAncestors for them with /// NaiveMissingAncestors that rely on precomputed transitive closures of /// these VecGraphs (ancestors_sets). /// /// For each generater graph, several instances of `MissingAncestors` are /// created, whose methods are called and checked a given number of times. /// /// This test can be parametrized by two environment variables: /// /// - TEST_RANDOM_SEED: must be 128 bits in hexadecimal /// - TEST_MISSING_ANCESTORS: "GRAPHS,INSTANCES,CALLS". The default is /// "100,10,10" /// /// This is slow: it runs on my workstation in about 5 seconds with the /// default parameters with a plain `cargo --test`. /// /// If you want to run it faster, especially if you're changing the /// parameters, use `cargo test --release`. /// For me, that gets it down to 0.15 seconds with the default parameters fn test_missing_ancestors_compare_naive() { let (graphcount, testcount, inccount) = match env::var("TEST_MISSING_ANCESTORS") { Err(env::VarError::NotPresent) => (100, 10, 10), Ok(val) => parse_test_missing_ancestors_params(&val), Err(env::VarError::NotUnicode(_)) => { panic!("TEST_MISSING_ANCESTORS is invalid"); } }; let mut seed: [u8; 16] = [0; 16]; match env::var("TEST_RANDOM_SEED") { Ok(val) => { seed_parse_in(&val, &mut seed); } Err(env::VarError::NotPresent) => { thread_rng().fill_bytes(&mut seed); } Err(env::VarError::NotUnicode(_)) => { panic!("TEST_RANDOM_SEED must be 128 bits in hex"); } } let hex_seed = hex_bytes(&seed); eprintln!("Random seed: {}", hex_seed); let mut rng = rand_pcg::Pcg32::from_seed(seed); eprint!("Checking MissingAncestors against brute force implementation "); eprint!("for {} random graphs, ", graphcount); eprintln!( "with {} instances for each and {} calls per instance", testcount, inccount, ); for g in 0..graphcount { if g != 0 && g % 100 == 0 { eprintln!("Tested with {} graphs", g); } let graph = build_random_graph(None, None, None, None); let graph_len = graph.len() as Revision; let ancestors_sets = ancestors_sets(&graph); for _testno in 0..testcount { let bases: HashSet<Revision> = sample_revs(&mut rng, graph_len, None, None); let mut inc = MissingAncestors::<VecGraph>::new( graph.clone(), bases.clone(), ); let mut naive = NaiveMissingAncestors::new( &graph, &ancestors_sets, &bases, &hex_seed, ); for _m in 0..inccount { if rng.gen_bool(0.2) { let new_bases = sample_revs(&mut rng, graph_len, None, None); inc.add_bases(new_bases.iter().cloned()); naive.add_bases(new_bases); } if rng.gen_bool(0.4) { // larger set so that there are more revs to remove from let mut hrevs = sample_revs(&mut rng, graph_len, Some(1.5), None); let mut rrevs = hrevs.clone(); inc.remove_ancestors_from(&mut hrevs).unwrap(); naive.remove_ancestors_from(&mut rrevs); naive.assert_eq(hrevs, rrevs); } else { let revs = sample_revs(&mut rng, graph_len, None, None); let hm = inc.missing_ancestors(revs.iter().cloned()).unwrap(); let rm = naive.missing_ancestors(revs.iter().cloned()); naive.assert_eq(hm, rm); } } } } }