# HG changeset patch # User Pierre-Yves David # Date 1409847572 -7200 # Node ID f7ce0837eefdd08853a0807bdf58b60975c346d2 # Parent 1149920416256c6ad689de23d6e1acab28e7fdb6 graphmod: add a function for topological iteration This changeset introduces a function to perform topological (one branch after the other) iteration over a set of changesets. This first version has a lot of limitations, but the approach should be flexible enough to allow many improvements in the future. This changeset aims to set the first stone more than providing a complete solution. The algorithm does not need to know the whole set of nodes involved before emitting revision. This makes it a good candidate for usage in place like `hg log` or graphical tools that need a fast first result time. diff -r 114992041625 -r f7ce0837eefd mercurial/graphmod.py --- a/mercurial/graphmod.py Fri Oct 17 15:27:12 2014 -0700 +++ b/mercurial/graphmod.py Thu Sep 04 18:19:32 2014 +0200 @@ -22,6 +22,168 @@ CHANGESET = 'C' +def groupbranchiter(revs, parentsfunc): + """yield revision from heads to roots one (topo) branch after the other. + + This function aims to be used by a graph generator that wishes to minimize + the amount of parallel branches and their interleaving. + + Example iteration order: + + o 4 + | + o 1 + | + | o 3 + | | + | o 2 + |/ + o 0 + + Currently does not handle non-contiguous input. + + Currently consider every changeset under a merge to be on the same branch + using revision number to sort them. + + Could be easily extend to give priority to an initial branch.""" + ### Quick summary of the algorithm + # + # This function is based around a "retention" principle. We keep revisions + # in memory until we are ready to emit a whole branch that immediately + # "merge" into an existing one. This reduce the number of branch "ongoing" + # at the same time. + # + # During iteration revs are split into two groups: + # A) revision already emitted + # B) revision in "retention". They are stored as different subgroups. + # + # for each REV, we do the follow logic: + # + # a) if REV is a parent of (A), we will emit it. But before emitting it, + # we'll "free" all the revs from subgroup in (B) that were waiting for + # REV to be available. So we emit all revision of such subgroup before + # emitting REV + # + # b) else, we'll search for a subgroup in (B) awaiting for REV to be + # available, if such subgroup exist, we add REV to it and the subgroup is + # now awaiting for REV.parents() to be available. + # + # c) finally if no such group existed in (B), we create a new subgroup. + # + # + # To bootstrap the algorithm, we emit the tipmost revision. + + revs.sort(reverse=True) + + # Set of parents of revision that have been yield. They can be considered + # unblocked as the graph generator is already aware of them so there is no + # need to delay the one that reference them. + unblocked = set() + + # list of group waiting to be displayed, each group is defined by: + # + # (revs: lists of revs waiting to be displayed, + # blocked: set of that cannot be displayed before those in 'revs') + # + # The second value ('blocked') correspond to parents of any revision in the + # group ('revs') that is not itself contained in the group. The main idea + # of this algorithm is to delay as much as possible the emission of any + # revision. This means waiting for the moment we are about to display + # theses parents to display the revs in a group. + # + # This first implementation is smart until it meet a merge: it will emit + # revs as soon as any parents is about to be emitted and can grow an + # arbitrary number of revs in 'blocked'. In practice this mean we properly + # retains new branches but give up on any special ordering for ancestors of + # merges. The implementation can be improved to handle this better. + # + # The first subgroup is special. It correspond to all the revision that + # were already emitted. The 'revs' lists is expected to be empty and the + # 'blocked' set contains the parents revisions of already emitted revision. + # + # You could pre-seed the set of groups[0] to a specific + # changesets to select what the first emitted branch should be. + # + # We do not support revisions will hole yet, but adding such support would + # be easy. The iteration will have to be done using both input revision and + # parents (see cl.ancestors function + a few tweaks) but only revisions + # parts of the initial set should be emitted. + groups = [([], unblocked)] + for current in revs: + # Look for a subgroup blocked, waiting for the current revision. + matching = [i for i, g in enumerate(groups) if current in g[1]] + + if matching: + # The main idea is to gather together all sets that await on the + # same revision. + # + # This merging is done at the time we are about to add this common + # awaited to the subgroup for simplicity purpose. Such merge could + # happen sooner when we update the "blocked" set of revision. + # + # We also always keep the oldest subgroup first. We can probably + # improve the behavior by having the longuest set first. That way, + # graph algorythms could minimise the length of parallele lines + # their draw. This is currently not done. + targetidx = matching.pop(0) + trevs, tparents = groups[targetidx] + for i in matching: + gr = groups[i] + trevs.extend(gr[0]) + tparents |= gr[1] + # delete all merged subgroups (but the one we keep) + # (starting from the last subgroup for performance and sanity reason) + for i in reversed(matching): + del groups[i] + else: + # This is a new head. We create a new subgroup for it. + targetidx = len(groups) + groups.append(([], set([current]))) + + gr = groups[targetidx] + + # We now adds the current nodes to this subgroups. This is done after + # the subgroup merging because all elements from a subgroup that relied + # on this rev must preceed it. + # + # we also update the set to includes the parents on the + # new nodes. + gr[0].append(current) + gr[1].remove(current) + gr[1].update([p for p in parentsfunc(current) if p > nullrev]) + + # Look for a subgroup to display + # + # When unblocked is empty (if clause), We are not waiting over any + # revision during the first iteration (if no priority was given) or if + # we outputed a whole disconnected sets of the graph (reached a root). + # In that case we arbitrarily takes the oldest known subgroup. The + # heuristique could probably be better. + # + # Otherwise (elif clause) this mean we have some emitted revision. if + # the subgroup awaits on the same revision that the outputed ones, we + # can safely output it. + if not unblocked: + if len(groups) > 1: # display other subset + targetidx = 1 + gr = groups[1] + elif not gr[1] & unblocked: + gr = None + + if gr is not None: + # update the set of awaited revisions with the one from the + # subgroup + unblocked |= gr[1] + # output all revisions in the subgroup + for r in gr[0]: + yield r + # delete the subgroup that you just output + # unless it is groups[0] in which case you just empty it. + if targetidx: + del groups[targetidx] + else: + gr[0][:] = [] + def dagwalker(repo, revs): """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples