comparison mercurial/graphmod.py @ 29347:98535ad46fc0

revset: move groupbranchiter over from graphmod This move is to prepare the adaptation of this function into a toposort predicate.
author Martijn Pieters <mjpieters@fb.com>
date Mon, 13 Jun 2016 18:20:00 +0100
parents 09d0022cad83
children 2188f170f5b6
comparison
equal deleted inserted replaced
29346:38e0c83c7ee4 29347:98535ad46fc0
16 context of the graph returned. Type is a constant specifying the node type. 16 context of the graph returned. Type is a constant specifying the node type.
17 Data depends on type. 17 Data depends on type.
18 """ 18 """
19 19
20 from __future__ import absolute_import 20 from __future__ import absolute_import
21
22 import heapq
23 21
24 from .node import nullrev 22 from .node import nullrev
25 from . import ( 23 from . import (
26 revset, 24 revset,
27 util, 25 util,
35 # point. A number prefix means only the last N characters of the current block 33 # point. A number prefix means only the last N characters of the current block
36 # will use that style, the rest will use the PARENT style. Add a - sign 34 # will use that style, the rest will use the PARENT style. Add a - sign
37 # (so making N negative) and all but the first N characters use that style. 35 # (so making N negative) and all but the first N characters use that style.
38 EDGES = {PARENT: '|', GRANDPARENT: ':', MISSINGPARENT: None} 36 EDGES = {PARENT: '|', GRANDPARENT: ':', MISSINGPARENT: None}
39 37
40 def groupbranchiter(revs, parentsfunc, firstbranch=()):
41 """Yield revisions from heads to roots one (topo) branch at a time.
42
43 This function aims to be used by a graph generator that wishes to minimize
44 the number of parallel branches and their interleaving.
45
46 Example iteration order (numbers show the "true" order in a changelog):
47
48 o 4
49 |
50 o 1
51 |
52 | o 3
53 | |
54 | o 2
55 |/
56 o 0
57
58 Note that the ancestors of merges are understood by the current
59 algorithm to be on the same branch. This means no reordering will
60 occur behind a merge.
61 """
62
63 ### Quick summary of the algorithm
64 #
65 # This function is based around a "retention" principle. We keep revisions
66 # in memory until we are ready to emit a whole branch that immediately
67 # "merges" into an existing one. This reduces the number of parallel
68 # branches with interleaved revisions.
69 #
70 # During iteration revs are split into two groups:
71 # A) revision already emitted
72 # B) revision in "retention". They are stored as different subgroups.
73 #
74 # for each REV, we do the following logic:
75 #
76 # 1) if REV is a parent of (A), we will emit it. If there is a
77 # retention group ((B) above) that is blocked on REV being
78 # available, we emit all the revisions out of that retention
79 # group first.
80 #
81 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
82 # available, if such subgroup exist, we add REV to it and the subgroup is
83 # now awaiting for REV.parents() to be available.
84 #
85 # 3) finally if no such group existed in (B), we create a new subgroup.
86 #
87 #
88 # To bootstrap the algorithm, we emit the tipmost revision (which
89 # puts it in group (A) from above).
90
91 revs.sort(reverse=True)
92
93 # Set of parents of revision that have been emitted. They can be considered
94 # unblocked as the graph generator is already aware of them so there is no
95 # need to delay the revisions that reference them.
96 #
97 # If someone wants to prioritize a branch over the others, pre-filling this
98 # set will force all other branches to wait until this branch is ready to be
99 # emitted.
100 unblocked = set(firstbranch)
101
102 # list of groups waiting to be displayed, each group is defined by:
103 #
104 # (revs: lists of revs waiting to be displayed,
105 # blocked: set of that cannot be displayed before those in 'revs')
106 #
107 # The second value ('blocked') correspond to parents of any revision in the
108 # group ('revs') that is not itself contained in the group. The main idea
109 # of this algorithm is to delay as much as possible the emission of any
110 # revision. This means waiting for the moment we are about to display
111 # these parents to display the revs in a group.
112 #
113 # This first implementation is smart until it encounters a merge: it will
114 # emit revs as soon as any parent is about to be emitted and can grow an
115 # arbitrary number of revs in 'blocked'. In practice this mean we properly
116 # retains new branches but gives up on any special ordering for ancestors
117 # of merges. The implementation can be improved to handle this better.
118 #
119 # The first subgroup is special. It corresponds to all the revision that
120 # were already emitted. The 'revs' lists is expected to be empty and the
121 # 'blocked' set contains the parents revisions of already emitted revision.
122 #
123 # You could pre-seed the <parents> set of groups[0] to a specific
124 # changesets to select what the first emitted branch should be.
125 groups = [([], unblocked)]
126 pendingheap = []
127 pendingset = set()
128
129 heapq.heapify(pendingheap)
130 heappop = heapq.heappop
131 heappush = heapq.heappush
132 for currentrev in revs:
133 # Heap works with smallest element, we want highest so we invert
134 if currentrev not in pendingset:
135 heappush(pendingheap, -currentrev)
136 pendingset.add(currentrev)
137 # iterates on pending rev until after the current rev have been
138 # processed.
139 rev = None
140 while rev != currentrev:
141 rev = -heappop(pendingheap)
142 pendingset.remove(rev)
143
144 # Seek for a subgroup blocked, waiting for the current revision.
145 matching = [i for i, g in enumerate(groups) if rev in g[1]]
146
147 if matching:
148 # The main idea is to gather together all sets that are blocked
149 # on the same revision.
150 #
151 # Groups are merged when a common blocking ancestor is
152 # observed. For example, given two groups:
153 #
154 # revs [5, 4] waiting for 1
155 # revs [3, 2] waiting for 1
156 #
157 # These two groups will be merged when we process
158 # 1. In theory, we could have merged the groups when
159 # we added 2 to the group it is now in (we could have
160 # noticed the groups were both blocked on 1 then), but
161 # the way it works now makes the algorithm simpler.
162 #
163 # We also always keep the oldest subgroup first. We can
164 # probably improve the behavior by having the longest set
165 # first. That way, graph algorithms could minimise the length
166 # of parallel lines their drawing. This is currently not done.
167 targetidx = matching.pop(0)
168 trevs, tparents = groups[targetidx]
169 for i in matching:
170 gr = groups[i]
171 trevs.extend(gr[0])
172 tparents |= gr[1]
173 # delete all merged subgroups (except the one we kept)
174 # (starting from the last subgroup for performance and
175 # sanity reasons)
176 for i in reversed(matching):
177 del groups[i]
178 else:
179 # This is a new head. We create a new subgroup for it.
180 targetidx = len(groups)
181 groups.append(([], set([rev])))
182
183 gr = groups[targetidx]
184
185 # We now add the current nodes to this subgroups. This is done
186 # after the subgroup merging because all elements from a subgroup
187 # that relied on this rev must precede it.
188 #
189 # we also update the <parents> set to include the parents of the
190 # new nodes.
191 if rev == currentrev: # only display stuff in rev
192 gr[0].append(rev)
193 gr[1].remove(rev)
194 parents = [p for p in parentsfunc(rev) if p > nullrev]
195 gr[1].update(parents)
196 for p in parents:
197 if p not in pendingset:
198 pendingset.add(p)
199 heappush(pendingheap, -p)
200
201 # Look for a subgroup to display
202 #
203 # When unblocked is empty (if clause), we were not waiting for any
204 # revisions during the first iteration (if no priority was given) or
205 # if we emitted a whole disconnected set of the graph (reached a
206 # root). In that case we arbitrarily take the oldest known
207 # subgroup. The heuristic could probably be better.
208 #
209 # Otherwise (elif clause) if the subgroup is blocked on
210 # a revision we just emitted, we can safely emit it as
211 # well.
212 if not unblocked:
213 if len(groups) > 1: # display other subset
214 targetidx = 1
215 gr = groups[1]
216 elif not gr[1] & unblocked:
217 gr = None
218
219 if gr is not None:
220 # update the set of awaited revisions with the one from the
221 # subgroup
222 unblocked |= gr[1]
223 # output all revisions in the subgroup
224 for r in gr[0]:
225 yield r
226 # delete the subgroup that you just output
227 # unless it is groups[0] in which case you just empty it.
228 if targetidx:
229 del groups[targetidx]
230 else:
231 gr[0][:] = []
232 # Check if we have some subgroup waiting for revisions we are not going to
233 # iterate over
234 for g in groups:
235 for r in g[0]:
236 yield r
237
238 def dagwalker(repo, revs): 38 def dagwalker(repo, revs):
239 """cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples 39 """cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples
240 40
241 This generator function walks through revisions (which should be ordered 41 This generator function walks through revisions (which should be ordered
242 from bigger to lower). It returns a tuple for each node. 42 from bigger to lower). It returns a tuple for each node.
257 firstbranchrevset = repo.ui.config( 57 firstbranchrevset = repo.ui.config(
258 'experimental', 'graph-group-branches.firstbranch', '') 58 'experimental', 'graph-group-branches.firstbranch', '')
259 if firstbranchrevset: 59 if firstbranchrevset:
260 firstbranch = repo.revs(firstbranchrevset) 60 firstbranch = repo.revs(firstbranchrevset)
261 parentrevs = repo.changelog.parentrevs 61 parentrevs = repo.changelog.parentrevs
262 revs = groupbranchiter(revs, parentrevs, firstbranch) 62 revs = revset.groupbranchiter(revs, parentrevs, firstbranch)
263 revs = revset.baseset(revs) 63 revs = revset.baseset(revs)
264 64
265 for rev in revs: 65 for rev in revs:
266 ctx = repo[rev] 66 ctx = repo[rev]
267 # partition into parents in the rev set and missing parents, then 67 # partition into parents in the rev set and missing parents, then