comparison mercurial/dagop.py @ 32903:27932a76a88d

dagop: split module hosting DAG-related algorithms from revset This module hosts the following functions. They are somewhat similar (e.g. scanning revisions using heap queue or stack) and seem non-trivial in algorithmic point of view. - _revancestors() - _revdescendants() - reachableroots() - _toposort() I was thinking of adding revset._fileancestors() generator for better follow() implementation, but it would be called from context.py as well. So I decided to create new module. Naming is hard. I couldn't come up with any better module name, so it's called "dag operation" now. I rejected the following candidates: - ancestor.py - existing, revlog-level DAG algorithm - ancestorset.py - doesn't always return a set - dagalgorithm.py - hard to type - dagutil.py - existing - revancestor.py - I want to add fileancestors() % wc -l mercurial/dagop.py mercurial/revset.py 339 mercurial/dagop.py 2020 mercurial/revset.py 2359 total
author Yuya Nishihara <yuya@tcha.org>
date Sun, 16 Oct 2016 18:03:24 +0900
parents mercurial/revset.py@8e02829bec61
children 582080a4a812
comparison
equal deleted inserted replaced
32902:642feee29d70 32903:27932a76a88d
1 # dagop.py - graph ancestry and topology algorithm for revset
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import heapq
11
12 from . import (
13 error,
14 node,
15 smartset,
16 )
17
18 baseset = smartset.baseset
19 generatorset = smartset.generatorset
20
21 def revancestors(repo, revs, followfirst):
22 """Like revlog.ancestors(), but supports followfirst."""
23 if followfirst:
24 cut = 1
25 else:
26 cut = None
27 cl = repo.changelog
28
29 def iterate():
30 revs.sort(reverse=True)
31 irevs = iter(revs)
32 h = []
33
34 inputrev = next(irevs, None)
35 if inputrev is not None:
36 heapq.heappush(h, -inputrev)
37
38 seen = set()
39 while h:
40 current = -heapq.heappop(h)
41 if current == inputrev:
42 inputrev = next(irevs, None)
43 if inputrev is not None:
44 heapq.heappush(h, -inputrev)
45 if current not in seen:
46 seen.add(current)
47 yield current
48 try:
49 for parent in cl.parentrevs(current)[:cut]:
50 if parent != node.nullrev:
51 heapq.heappush(h, -parent)
52 except error.WdirUnsupported:
53 for parent in repo[current].parents()[:cut]:
54 if parent.rev() != node.nullrev:
55 heapq.heappush(h, -parent.rev())
56
57 return generatorset(iterate(), iterasc=False)
58
59 def revdescendants(repo, revs, followfirst):
60 """Like revlog.descendants() but supports followfirst."""
61 if followfirst:
62 cut = 1
63 else:
64 cut = None
65
66 def iterate():
67 cl = repo.changelog
68 # XXX this should be 'parentset.min()' assuming 'parentset' is a
69 # smartset (and if it is not, it should.)
70 first = min(revs)
71 nullrev = node.nullrev
72 if first == nullrev:
73 # Are there nodes with a null first parent and a non-null
74 # second one? Maybe. Do we care? Probably not.
75 for i in cl:
76 yield i
77 else:
78 seen = set(revs)
79 for i in cl.revs(first + 1):
80 for x in cl.parentrevs(i)[:cut]:
81 if x != nullrev and x in seen:
82 seen.add(i)
83 yield i
84 break
85
86 return generatorset(iterate(), iterasc=True)
87
88 def _reachablerootspure(repo, minroot, roots, heads, includepath):
89 """return (heads(::<roots> and ::<heads>))
90
91 If includepath is True, return (<roots>::<heads>)."""
92 if not roots:
93 return []
94 parentrevs = repo.changelog.parentrevs
95 roots = set(roots)
96 visit = list(heads)
97 reachable = set()
98 seen = {}
99 # prefetch all the things! (because python is slow)
100 reached = reachable.add
101 dovisit = visit.append
102 nextvisit = visit.pop
103 # open-code the post-order traversal due to the tiny size of
104 # sys.getrecursionlimit()
105 while visit:
106 rev = nextvisit()
107 if rev in roots:
108 reached(rev)
109 if not includepath:
110 continue
111 parents = parentrevs(rev)
112 seen[rev] = parents
113 for parent in parents:
114 if parent >= minroot and parent not in seen:
115 dovisit(parent)
116 if not reachable:
117 return baseset()
118 if not includepath:
119 return reachable
120 for rev in sorted(seen):
121 for parent in seen[rev]:
122 if parent in reachable:
123 reached(rev)
124 return reachable
125
126 def reachableroots(repo, roots, heads, includepath=False):
127 """return (heads(::<roots> and ::<heads>))
128
129 If includepath is True, return (<roots>::<heads>)."""
130 if not roots:
131 return baseset()
132 minroot = roots.min()
133 roots = list(roots)
134 heads = list(heads)
135 try:
136 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
137 except AttributeError:
138 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
139 revs = baseset(revs)
140 revs.sort()
141 return revs
142
143 def toposort(revs, parentsfunc, firstbranch=()):
144 """Yield revisions from heads to roots one (topo) branch at a time.
145
146 This function aims to be used by a graph generator that wishes to minimize
147 the number of parallel branches and their interleaving.
148
149 Example iteration order (numbers show the "true" order in a changelog):
150
151 o 4
152 |
153 o 1
154 |
155 | o 3
156 | |
157 | o 2
158 |/
159 o 0
160
161 Note that the ancestors of merges are understood by the current
162 algorithm to be on the same branch. This means no reordering will
163 occur behind a merge.
164 """
165
166 ### Quick summary of the algorithm
167 #
168 # This function is based around a "retention" principle. We keep revisions
169 # in memory until we are ready to emit a whole branch that immediately
170 # "merges" into an existing one. This reduces the number of parallel
171 # branches with interleaved revisions.
172 #
173 # During iteration revs are split into two groups:
174 # A) revision already emitted
175 # B) revision in "retention". They are stored as different subgroups.
176 #
177 # for each REV, we do the following logic:
178 #
179 # 1) if REV is a parent of (A), we will emit it. If there is a
180 # retention group ((B) above) that is blocked on REV being
181 # available, we emit all the revisions out of that retention
182 # group first.
183 #
184 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
185 # available, if such subgroup exist, we add REV to it and the subgroup is
186 # now awaiting for REV.parents() to be available.
187 #
188 # 3) finally if no such group existed in (B), we create a new subgroup.
189 #
190 #
191 # To bootstrap the algorithm, we emit the tipmost revision (which
192 # puts it in group (A) from above).
193
194 revs.sort(reverse=True)
195
196 # Set of parents of revision that have been emitted. They can be considered
197 # unblocked as the graph generator is already aware of them so there is no
198 # need to delay the revisions that reference them.
199 #
200 # If someone wants to prioritize a branch over the others, pre-filling this
201 # set will force all other branches to wait until this branch is ready to be
202 # emitted.
203 unblocked = set(firstbranch)
204
205 # list of groups waiting to be displayed, each group is defined by:
206 #
207 # (revs: lists of revs waiting to be displayed,
208 # blocked: set of that cannot be displayed before those in 'revs')
209 #
210 # The second value ('blocked') correspond to parents of any revision in the
211 # group ('revs') that is not itself contained in the group. The main idea
212 # of this algorithm is to delay as much as possible the emission of any
213 # revision. This means waiting for the moment we are about to display
214 # these parents to display the revs in a group.
215 #
216 # This first implementation is smart until it encounters a merge: it will
217 # emit revs as soon as any parent is about to be emitted and can grow an
218 # arbitrary number of revs in 'blocked'. In practice this mean we properly
219 # retains new branches but gives up on any special ordering for ancestors
220 # of merges. The implementation can be improved to handle this better.
221 #
222 # The first subgroup is special. It corresponds to all the revision that
223 # were already emitted. The 'revs' lists is expected to be empty and the
224 # 'blocked' set contains the parents revisions of already emitted revision.
225 #
226 # You could pre-seed the <parents> set of groups[0] to a specific
227 # changesets to select what the first emitted branch should be.
228 groups = [([], unblocked)]
229 pendingheap = []
230 pendingset = set()
231
232 heapq.heapify(pendingheap)
233 heappop = heapq.heappop
234 heappush = heapq.heappush
235 for currentrev in revs:
236 # Heap works with smallest element, we want highest so we invert
237 if currentrev not in pendingset:
238 heappush(pendingheap, -currentrev)
239 pendingset.add(currentrev)
240 # iterates on pending rev until after the current rev have been
241 # processed.
242 rev = None
243 while rev != currentrev:
244 rev = -heappop(pendingheap)
245 pendingset.remove(rev)
246
247 # Seek for a subgroup blocked, waiting for the current revision.
248 matching = [i for i, g in enumerate(groups) if rev in g[1]]
249
250 if matching:
251 # The main idea is to gather together all sets that are blocked
252 # on the same revision.
253 #
254 # Groups are merged when a common blocking ancestor is
255 # observed. For example, given two groups:
256 #
257 # revs [5, 4] waiting for 1
258 # revs [3, 2] waiting for 1
259 #
260 # These two groups will be merged when we process
261 # 1. In theory, we could have merged the groups when
262 # we added 2 to the group it is now in (we could have
263 # noticed the groups were both blocked on 1 then), but
264 # the way it works now makes the algorithm simpler.
265 #
266 # We also always keep the oldest subgroup first. We can
267 # probably improve the behavior by having the longest set
268 # first. That way, graph algorithms could minimise the length
269 # of parallel lines their drawing. This is currently not done.
270 targetidx = matching.pop(0)
271 trevs, tparents = groups[targetidx]
272 for i in matching:
273 gr = groups[i]
274 trevs.extend(gr[0])
275 tparents |= gr[1]
276 # delete all merged subgroups (except the one we kept)
277 # (starting from the last subgroup for performance and
278 # sanity reasons)
279 for i in reversed(matching):
280 del groups[i]
281 else:
282 # This is a new head. We create a new subgroup for it.
283 targetidx = len(groups)
284 groups.append(([], {rev}))
285
286 gr = groups[targetidx]
287
288 # We now add the current nodes to this subgroups. This is done
289 # after the subgroup merging because all elements from a subgroup
290 # that relied on this rev must precede it.
291 #
292 # we also update the <parents> set to include the parents of the
293 # new nodes.
294 if rev == currentrev: # only display stuff in rev
295 gr[0].append(rev)
296 gr[1].remove(rev)
297 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
298 gr[1].update(parents)
299 for p in parents:
300 if p not in pendingset:
301 pendingset.add(p)
302 heappush(pendingheap, -p)
303
304 # Look for a subgroup to display
305 #
306 # When unblocked is empty (if clause), we were not waiting for any
307 # revisions during the first iteration (if no priority was given) or
308 # if we emitted a whole disconnected set of the graph (reached a
309 # root). In that case we arbitrarily take the oldest known
310 # subgroup. The heuristic could probably be better.
311 #
312 # Otherwise (elif clause) if the subgroup is blocked on
313 # a revision we just emitted, we can safely emit it as
314 # well.
315 if not unblocked:
316 if len(groups) > 1: # display other subset
317 targetidx = 1
318 gr = groups[1]
319 elif not gr[1] & unblocked:
320 gr = None
321
322 if gr is not None:
323 # update the set of awaited revisions with the one from the
324 # subgroup
325 unblocked |= gr[1]
326 # output all revisions in the subgroup
327 for r in gr[0]:
328 yield r
329 # delete the subgroup that you just output
330 # unless it is groups[0] in which case you just empty it.
331 if targetidx:
332 del groups[targetidx]
333 else:
334 gr[0][:] = []
335 # Check if we have some subgroup waiting for revisions we are not going to
336 # iterate over
337 for g in groups:
338 for r in g[0]:
339 yield r