comparison rust/hg-core/src/ancestors.rs @ 40271:dbc28c91f7ff

rust: pure Rust lazyancestors iterator This is the first of a patch series aiming to provide an alternative implementation in the Rust programming language of the _lazyancestorsiter from the ancestor module. This iterator has been brought to our attention by the people at Octobus, as a potential good candidate for incremental "oxydation" (rewriting in Rust), because it has shown performance issues lately and it merely deals with ints (revision numbers) obtained by calling the index, whih should be directly callable from Rust code, being itself implemented as a C extension. The idea behind this series is to provide a minimal example of Rust code collaborating with existing C and Python code. To open the way to gradually rewriting more of Mercurial's Python code in Rust, without being forced to pay a large initial cost of rewriting the existing fast core into Rust. This patch does not introduce any bindings to other Mercurial code yet. Instead, it introduces the necessary abstractions to address the problem independently, and unit-test it. Since this is the first use of Rust as a Python module within Mercurial, the hg-core crate gets created within this patch. See its Cargo.toml for more details. Someone with a rustc/cargo installation may chdir into rust/hg-core and run the tests by issuing: cargo test --lib The algorithm is a bit simplified (see details in docstrings), and at its simplest becomes rather trivial, showcasing that Rust has batteries included too: BinaryHeap, the Rust analog of Python's heapq does actually all the work. The implementation can be further optimized and probably be made more idiomatic Rust.
author Georges Racinet <gracinet@anybox.fr>
date Thu, 27 Sep 2018 17:03:16 +0200
parents
children 72b94f946e90
comparison
equal deleted inserted replaced
40270:8783710b1d58 40271:dbc28c91f7ff
1 // ancestors.rs
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use std::collections::{BinaryHeap, HashSet};
12
13 /// Iterator over the ancestors of a given list of revisions
14 /// This is a generic type, defined and implemented for any Graph, so that
15 /// it's easy to
16 ///
17 /// - unit test in pure Rust
18 /// - bind to main Mercurial code, potentially in several ways and have these
19 /// bindings evolve over time
20 pub struct AncestorsIterator<G: Graph> {
21 graph: G,
22 visit: BinaryHeap<Revision>,
23 seen: HashSet<Revision>,
24 stoprev: Revision,
25 }
26
27 impl<G: Graph> AncestorsIterator<G> {
28 /// Constructor.
29 ///
30 /// if `inclusive` is true, then the init revisions are emitted in
31 /// particular, otherwise iteration starts from their parents.
32 pub fn new<I>(
33 graph: G,
34 initrevs: I,
35 stoprev: Revision,
36 inclusive: bool,
37 ) -> Result<Self, GraphError>
38 where
39 I: IntoIterator<Item = Revision>,
40 {
41 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
42 if inclusive {
43 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
44 let seen = visit.iter().map(|&x| x).collect();
45 return Ok(AncestorsIterator {
46 visit: visit,
47 seen: seen,
48 stoprev: stoprev,
49 graph: graph,
50 });
51 }
52 let mut this = AncestorsIterator {
53 visit: BinaryHeap::new(),
54 seen: HashSet::new(),
55 stoprev: stoprev,
56 graph: graph,
57 };
58 this.seen.insert(NULL_REVISION);
59 for rev in filtered_initrevs {
60 this.conditionally_push_parents(rev)?;
61 }
62 Ok(this)
63 }
64
65 #[inline]
66 fn conditionally_push_rev(&mut self, rev: Revision) {
67 if self.stoprev <= rev && !self.seen.contains(&rev) {
68 self.seen.insert(rev);
69 self.visit.push(rev);
70 }
71 }
72
73 #[inline]
74 fn conditionally_push_parents(
75 &mut self,
76 rev: Revision,
77 ) -> Result<(), GraphError> {
78 let parents = self.graph.parents(rev)?;
79 self.conditionally_push_rev(parents.0);
80 self.conditionally_push_rev(parents.1);
81 Ok(())
82 }
83 }
84
85 /// Main implementation.
86 ///
87 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
88 /// with a few non crucial differences:
89 ///
90 /// - there's no filtering of invalid parent revisions. Actually, it should be
91 /// consistent and more efficient to filter them from the end caller.
92 /// - we don't use the equivalent of `heapq.heapreplace()`, but we should, for
93 /// the same reasons (using `peek_mut`)
94 /// - we don't have the optimization for adjacent revs (case where p1 == rev-1)
95 /// - we save a few pushes by comparing with `stoprev` before pushing
96 ///
97 /// Error treatment:
98 /// We swallow the possible GraphError of conditionally_push_parents() to
99 /// respect the Iterator trait in a simple manner: never emitting parents
100 /// for the returned revision. We finds this good enough for now, because:
101 ///
102 /// - there's a good chance that invalid revisionss are fed from the start,
103 /// and `new()` doesn't swallow the error result.
104 /// - this is probably what the Python implementation produces anyway, due
105 /// to filtering at each step, and Python code is currently the only
106 /// concrete caller we target, so we shouldn't need a finer error treatment
107 /// for the time being.
108 impl<G: Graph> Iterator for AncestorsIterator<G> {
109 type Item = Revision;
110
111 fn next(&mut self) -> Option<Revision> {
112 let current = match self.visit.pop() {
113 None => {
114 return None;
115 }
116 Some(i) => i,
117 };
118 self.conditionally_push_parents(current).unwrap_or(());
119 Some(current)
120 }
121 }
122
123 #[cfg(test)]
124 mod tests {
125
126 use super::*;
127
128 #[derive(Clone, Debug)]
129 struct Stub;
130
131 /// This is the same as the dict from test-ancestors.py
132 impl Graph for Stub {
133 fn parents(
134 &self,
135 rev: Revision,
136 ) -> Result<(Revision, Revision), GraphError> {
137 match rev {
138 0 => Ok((-1, -1)),
139 1 => Ok((0, -1)),
140 2 => Ok((1, -1)),
141 3 => Ok((1, -1)),
142 4 => Ok((2, -1)),
143 5 => Ok((4, -1)),
144 6 => Ok((4, -1)),
145 7 => Ok((4, -1)),
146 8 => Ok((-1, -1)),
147 9 => Ok((6, 7)),
148 10 => Ok((5, -1)),
149 11 => Ok((3, 7)),
150 12 => Ok((9, -1)),
151 13 => Ok((8, -1)),
152 r => Err(GraphError::ParentOutOfRange(r)),
153 }
154 }
155 }
156
157 fn list_ancestors<G: Graph>(
158 graph: G,
159 initrevs: Vec<Revision>,
160 stoprev: Revision,
161 inclusive: bool,
162 ) -> Vec<Revision> {
163 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
164 .unwrap()
165 .collect()
166 }
167
168 #[test]
169 /// Same tests as test-ancestor.py, without membership
170 /// (see also test-ancestor.py.out)
171 fn test_list_ancestor() {
172 assert_eq!(list_ancestors(Stub, vec![], 0, false), vec![]);
173 assert_eq!(
174 list_ancestors(Stub, vec![11, 13], 0, false),
175 vec![8, 7, 4, 3, 2, 1, 0]
176 );
177 assert_eq!(list_ancestors(Stub, vec![1, 3], 0, false), vec![1, 0]);
178 assert_eq!(
179 list_ancestors(Stub, vec![11, 13], 0, true),
180 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
181 );
182 assert_eq!(list_ancestors(Stub, vec![11, 13], 6, false), vec![8, 7]);
183 assert_eq!(
184 list_ancestors(Stub, vec![11, 13], 6, true),
185 vec![13, 11, 8, 7]
186 );
187 assert_eq!(list_ancestors(Stub, vec![11, 13], 11, true), vec![13, 11]);
188 assert_eq!(list_ancestors(Stub, vec![11, 13], 12, true), vec![13]);
189 assert_eq!(
190 list_ancestors(Stub, vec![10, 1], 0, true),
191 vec![10, 5, 4, 2, 1, 0]
192 );
193 }
194
195 #[test]
196 /// Corner case that's not directly in test-ancestors.py, but
197 /// that happens quite often, as demonstrated by running the whole
198 /// suite.
199 /// For instance, run tests/test-obsolete-checkheads.t
200 fn test_nullrev_input() {
201 let mut iter =
202 AncestorsIterator::new(Stub, vec![-1], 0, false).unwrap();
203 assert_eq!(iter.next(), None)
204 }
205
206 /// A corrupted Graph, supporting error handling tests
207 struct Corrupted;
208
209 impl Graph for Corrupted {
210 fn parents(
211 &self,
212 rev: Revision,
213 ) -> Result<(Revision, Revision), GraphError> {
214 match rev {
215 1 => Ok((0, -1)),
216 r => Err(GraphError::ParentOutOfRange(r)),
217 }
218 }
219 }
220
221 #[test]
222 fn test_initrev_out_of_range() {
223 // inclusive=false looks up initrev's parents right away
224 match AncestorsIterator::new(Stub, vec![25], 0, false) {
225 Ok(_) => panic!("Should have been ParentOutOfRange"),
226 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
227 }
228 }
229
230 #[test]
231 fn test_next_out_of_range() {
232 // inclusive=false looks up initrev's parents right away
233 let mut iter =
234 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
235 assert_eq!(iter.next(), Some(0));
236 assert_eq!(iter.next(), None);
237 }
238 }