Mercurial > evolve
view hgext3rd/evolve/stablerange.py @ 2184:3ec0be20e365
stablerange: add a cache for stablesort ordering
This will be very handy for merge, cf inline documentation for details.
author | Pierre-Yves David <pierre-yves.david@ens-lyon.org> |
---|---|
date | Wed, 22 Mar 2017 20:11:19 +0100 |
parents | 3c2992afee71 |
children | 6b98ceed0626 |
line wrap: on
line source
# Code dedicated to the computation and properties of "stable ranges" # # These stable ranges are use for obsolescence markers discovery # # Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import math from mercurial import ( commands, cmdutil, localrepo, node as nodemod, scmutil, util, ) from mercurial.i18n import _ from . import ( exthelper, ) eh = exthelper.exthelper() ################################## ### Stable topological sorting ### ################################## @eh.command( 'debugstablesort', [ ('', 'rev', [], 'heads to start from'), ] + commands.formatteropts, _('')) def debugstablesort(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ revs = scmutil.revrange(repo, opts['rev']) displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True) for r in stablesort(repo, revs): ctx = repo[r] displayer.show(ctx) displayer.flush(ctx) displayer.close() def stablesort(repo, revs): """return '::revs' topologically sorted in "stable" order This is a depth first traversal starting from 'nullrev', using node as a tie breaker. """ # Various notes: # # * Bitbucket is used dates as tie breaker, that might be a good idea. # # * It seemds we can traverse in the same order from (one) head to bottom, # if we the following record data for each merge: # # - highest (stablesort-wise) common ancestors, # - order of parents (tablesort-wise) cl = repo.changelog parents = cl.parentrevs nullrev = nodemod.nullrev n = cl.node # step 1: We need a parents -> children mapping for 2 reasons. # # * we build the order from nullrev to tip # # * we need to detect branching children = collections.defaultdict(list) for r in cl.ancestors(revs, inclusive=True): p1, p2 = parents(r) children[p1].append(r) if p2 != nullrev: children[p2].append(r) # step two: walk back up # * pick lowest node in case of branching # * stack disregarded part of the branching # * process merge when both parents are yielded # track what changeset has been seen = [0] * (max(revs) + 2) seen[-1] = True # nullrev is known # starts from repository roots # reuse the list form the mapping as we won't need it again anyway stack = children[nullrev] if not stack: return [] if 1 < len(stack): stack.sort(key=n, reverse=True) # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already result = [] current = stack.pop() while current is not None or stack: if current is None: # previous iteration reached a merge or an unready merge, current = stack.pop() if seen[current]: current = None continue p1, p2 = parents(current) if not (seen[p1] and seen[p2]): # we can't iterate on this merge yet because other child is not # yielded yet (and we are topo sorting) we can discard it for now # because it will be reached from the other child. current = None continue assert not seen[current] seen[current] = True result.append(current) # could be yield, cf earlier comment cs = children[current] if not cs: current = None elif 1 == len(cs): current = cs[0] else: cs.sort(key=n, reverse=True) current = cs.pop() # proceed on smallest stack.extend(cs) # stack the rest for later assert len(result) == len(set(result)) return result ################################# ### Stable Range computation ### ################################# def subrangesclosure(repo, heads): """set of all standard subrange under heads This is intended for debug purposes. Range are returned from largest to smallest in terms of number of revision it contains.""" subranges = repo.stablerange.subranges toproceed = [stablerange(repo, r, 0, ) for r in heads] ranges = set(toproceed) while toproceed: entry = toproceed.pop() for r in subranges(repo, entry): if r not in ranges: ranges.add(r) toproceed.append(r) ranges = list(ranges) n = repo.changelog.node rangelength = repo.stablerange.rangelength ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0]))) return ranges class stablerangecache(dict): def __init__(self): self._depthcache = {} self._subrangescache = {} # To slices merge, we need to walk their descendant in reverse stable # sort order. For now we perform a full stable sort their descendant # and then use the relevant top most part. This order is going to be # the same for all ranges headed at the same merge. So we cache these # value to reuse them accross the same invocation. self._stablesortcache = {} def warmup(self, repo, heads): """warm the cache up to 'heads'""" for r in repo.revs("::%ld", heads): self.depthrev(repo, r) def depthrev(self, repo, rev): repo = repo.unfiltered() cl = repo.changelog cache = self._depthcache nullrev = nodemod.nullrev stack = [rev] while stack: revdepth = None current = stack[-1] revdepth = cache.get(current) if revdepth is not None: stack.pop() continue p1, p2 = cl.parentrevs(current) if p1 == nullrev: # root case revdepth = 1 elif p2 == nullrev: # linear commit case parentdepth = cache.get(p1) if parentdepth is None: stack.append(p1) else: revdepth = parentdepth + 1 else: # merge case revdepth = self._depthmerge(cl, current, p1, p2, stack, cache) if revdepth is not None: cache[current] = revdepth stack.pop() # actual_depth = len(list(cl.ancestors([rev], inclusive=True))) # assert revdepth == actual_depth, (rev, revdepth, actual_depth) return revdepth def rangelength(self, repo, rangeid): headrev, index = rangeid[0], rangeid[1] return self.depthrev(repo, headrev) - index def subranges(self, repo, rangeid): cached = self._subrangescache.get(rangeid) if cached is not None: return cached if self.rangelength(repo, rangeid) == 1: value = [] else: slicepoint = self._slicepoint(repo, rangeid) value = self._slicesrangeat(repo, rangeid, slicepoint) self._subrangescache[rangeid] = value return value def revsfromrange(self, repo, rangeid): # get all revs under heads in stable order allrevs = self._stablesortcache.get(rangeid[0]) if allrevs is None: allrevs = stablesort(repo, [rangeid[0]]) self._stablesortcache[rangeid[0]] = allrevs # takes from index revs = allrevs[rangeid[1]:] # sanity checks assert len(revs) == self.rangelength(repo, rangeid) return revs @staticmethod def _depthmerge(cl, rev, p1, p2, stack, cache): # sub method to simplify the main 'depthrev' one revdepth = None depth_p1 = cache.get(p1) depth_p2 = cache.get(p2) missingparent = False if depth_p1 is None: stack.append(p1) missingparent = True if depth_p2 is None: stack.append(p2) missingparent = True if missingparent: return None # computin depth of a merge # XXX the common ancestors heads could be cached ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2)) ancrevs = [cl.rev(a) for a in ancnodes] anyunkown = False ancdepth = [] for r in ancrevs: d = cache.get(r) if d is None: anyunkown = True stack.append(r) ancdepth.append((r, d)) if anyunkown: return None if not ancrevs: # unrelated branch, (no common root) revdepth = depth_p1 + depth_p2 + 1 elif len(ancrevs) == 1: # one unique branch point: # we can compute depth without any walk depth_anc = ancdepth[0][1] revdepth = depth_p1 + (depth_p2 - depth_anc) + 1 else: # multiple ancestors, we pick one that is # * the deepest (less changeset outside of it), # * lowest revs because more chance to have descendant of other "above" anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0])) revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev])) return revdepth def _slicepoint(self, repo, rangeid): rangedepth = self.depthrev(repo, rangeid[0]) step = _hlp2(rangedepth) standard_start = 0 while standard_start < rangeid[1] and 0 < step: if standard_start + step < rangedepth: standard_start += step step //= 2 if rangeid[1] == standard_start: slicesize = _hlp2(self.rangelength(repo, rangeid)) slicepoint = rangeid[1] + slicesize else: assert standard_start < rangedepth slicepoint = standard_start return slicepoint def _slicesrangeat(self, repo, rangeid, globalindex): p1, p2 = repo.changelog.parentrevs(rangeid[0]) if p2 != nodemod.nullrev: return self._slicesrangeatmerge(repo, rangeid, globalindex) assert p1 != nodemod.nullrev rangedepth = self.depthrev(repo, rangeid[0]) topsize = rangedepth - globalindex parentrange = stablerange(repo, p1, rangeid[1], rangeid._revs[:-1]) if topsize == 1: top = stablerange(repo, rangeid[0], globalindex, [rangeid[0]]) return [parentrange, top] else: # XXX recursive call, python have issue with them # The best way to break it would be directly 'self.subranges' # In that other method we could make sure subrangess for # (p1(rev), idx) are available before slicing (rev, idx). But the # heavy object approach makes it a bit inconvenient so that will # wait for that heavy object to be gone. parentsubranges = self.subranges(repo, parentrange) slices = parentsubranges[:-1] # pop the top top = stablerange(repo, rangeid[0], globalindex, rangeid._revs[-topsize:]) slices.append(top) return slices def _slicesrangeatmerge(self, repo, rangeid, globalindex): localindex = globalindex - rangeid[1] cl = repo.changelog result = [] bottom = rangeid._revs[:localindex] top = stablerange(repo, rangeid[0], globalindex, rangeid._revs[localindex:]) # rangedepth = repo.stablerange.depthrev(repo, rangeid[0]) toprootdepth = repo.stablerange.depthrev(repo, top._revs[0]) if toprootdepth + self.rangelength(repo, top) == rangedepth + 1: bheads = [bottom[-1]] else: bheads = set(bottom) parentrevs = cl.parentrevs du = bheads.difference_update for r in bottom: du(parentrevs(r)) # if len(bheads) == 1: # assert 1 == len(repo.revs('roots(%ld)', top._revs)) if len(bheads) == 1: newhead = bottom[-1] bottomdepth = repo.stablerange.depthrev(repo, newhead) newstart = bottomdepth - len(bottom) result.append(stablerange(repo, newhead, newstart, bottom)) else: # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid)) cl = repo.changelog for h in bheads: subset = cl.ancestors([h], inclusive=True) hrevs = [r for r in bottom if r in subset] start = repo.stablerange.depthrev(repo, h) - len(hrevs) entry = stablerange(repo, h, start, [r for r in bottom if r in subset]) result.append(entry) result.append(top) return result def _hlp2(i): """return highest power of two lower than 'i'""" return 2 ** int(math.log(i - 1, 2)) class stablerange(object): def __init__(self, repo, head, index, revs=None): self._repo = repo.unfiltered() self._head = head self._index = index if revs is not None: length = self._repo.stablerange.rangelength(self._repo, self) assert len(revs) == length self._revs = revs depth = self._repo.stablerange.depthrev(self._repo, self[0]) assert index < depth, (head, index, depth, revs) def __hash__(self): return hash((self._head, self._index)) def __eq__(self, other): if type(self) != type(other): raise NotImplementedError() return (self._head, self._index) == (other._head, other._index) def __getitem__(self, idx): """small helper function to prepare for the migration to tuple""" if idx == 0: return self._head elif idx == 1: return self._index else: raise IndexError(idx) @util.propertycache def _revs(self): return self._repo.stablerange.revsfromrange(self._repo, self) @eh.reposetup def setupcache(ui, repo): class stablerangerepo(repo.__class__): @localrepo.unfilteredpropertycache def stablerange(self): return stablerangecache() repo.__class__ = stablerangerepo