Mercurial > evolve
view hgext3rd/evolve/stablerange.py @ 2231:f872738bb5b3
stablerange: add a proper debugstablerange commands
This commands allows to inspect the standard stable range of a range. That
should come handy.
author | Pierre-Yves David <pierre-yves.david@ens-lyon.org> |
---|---|
date | Fri, 24 Mar 2017 10:12:02 +0100 |
parents | 4b621b56e3a1 |
children | 6b95bcc402fe |
line wrap: on
line source
# Code dedicated to the computation and properties of "stable ranges" # # These stable ranges are use for obsolescence markers discovery # # Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import heapq import math from mercurial import ( commands, cmdutil, localrepo, node as nodemod, scmutil, ) from mercurial.i18n import _ from . import ( exthelper, ) eh = exthelper.exthelper() ################################## ### Stable topological sorting ### ################################## @eh.command( 'debugstablesort', [ ('', 'rev', [], 'heads to start from'), ] + commands.formatteropts, _('')) def debugstablesort(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ revs = scmutil.revrange(repo, opts['rev']) displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True) for r in stablesort(repo, revs): ctx = repo[r] displayer.show(ctx) displayer.flush(ctx) displayer.close() def stablesort(repo, revs, mergecallback=None): """return '::revs' topologically sorted in "stable" order This is a depth first traversal starting from 'nullrev', using node as a tie breaker. """ # Various notes: # # * Bitbucket is used dates as tie breaker, that might be a good idea. # # * It seemds we can traverse in the same order from (one) head to bottom, # if we the following record data for each merge: # # - highest (stablesort-wise) common ancestors, # - order of parents (tablesort-wise) cl = repo.changelog parents = cl.parentrevs nullrev = nodemod.nullrev n = cl.node # step 1: We need a parents -> children mapping for 2 reasons. # # * we build the order from nullrev to tip # # * we need to detect branching children = collections.defaultdict(list) for r in cl.ancestors(revs, inclusive=True): p1, p2 = parents(r) children[p1].append(r) if p2 != nullrev: children[p2].append(r) # step two: walk back up # * pick lowest node in case of branching # * stack disregarded part of the branching # * process merge when both parents are yielded # track what changeset has been seen = [0] * (max(revs) + 2) seen[-1] = True # nullrev is known # starts from repository roots # reuse the list form the mapping as we won't need it again anyway stack = children[nullrev] if not stack: return [] if 1 < len(stack): stack.sort(key=n, reverse=True) # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already result = [] current = stack.pop() while current is not None or stack: if current is None: # previous iteration reached a merge or an unready merge, current = stack.pop() if seen[current]: current = None continue p1, p2 = parents(current) if not (seen[p1] and seen[p2]): # we can't iterate on this merge yet because other child is not # yielded yet (and we are topo sorting) we can discard it for now # because it will be reached from the other child. current = None continue assert not seen[current] seen[current] = True result.append(current) # could be yield, cf earlier comment if mergecallback is not None and p2 != nullrev: mergecallback(result, current) cs = children[current] if not cs: current = None elif 1 == len(cs): current = cs[0] else: cs.sort(key=n, reverse=True) current = cs.pop() # proceed on smallest stack.extend(cs) # stack the rest for later assert len(result) == len(set(result)) return result ################################# ### Stable Range computation ### ################################# def _hlp2(i): """return highest power of two lower than 'i'""" return 2 ** int(math.log(i - 1, 2)) def subrangesclosure(repo, heads): """set of all standard subrange under heads This is intended for debug purposes. Range are returned from largest to smallest in terms of number of revision it contains.""" subranges = repo.stablerange.subranges toproceed = [(r, 0, ) for r in heads] ranges = set(toproceed) while toproceed: entry = toproceed.pop() for r in subranges(repo, entry): if r not in ranges: ranges.add(r) toproceed.append(r) ranges = list(ranges) n = repo.changelog.node rangelength = repo.stablerange.rangelength ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0]))) return ranges @eh.command( 'debugstablerange', [ ('', 'rev', [], 'operate on (rev, 0) ranges for rev in REVS'), ('', 'subranges', False, 'recursively display data for subranges too'), ], _('')) def debugstablerange(ui, repo, **opts): """display standard stable subrange for a set of ranges Range as displayed as '<node>-<index> (<rev>, <depth>, <length>)', use --verbose to get the extra details in (). """ short = nodemod.short revs = scmutil.revrange(repo, opts['rev']) # prewarm depth cache unfi = repo.unfiltered() node = unfi.changelog.node stablerange = unfi.stablerange depth = stablerange.depthrev length = stablerange.rangelength subranges = stablerange.subranges repo.stablerange.warmup(repo, revs) if opts['subranges']: ranges = subrangesclosure(repo, revs) else: ranges = [(r, 0) for r in revs] if ui.verbose: template = '%s-%d (%d, %d, %d)' def _rangestring(repo, rangeid): return template % ( short(node(rangeid[0])), rangeid[1], rangeid[0], depth(unfi, rangeid[0]), length(unfi, rangeid) ) else: template = '%s-%d' def _rangestring(repo, rangeid): return template % ( short(node(rangeid[0])), rangeid[1], ) for r in ranges: subs = subranges(unfi, r) subsstr = ', '.join(_rangestring(unfi, s) for s in subs) rstr = _rangestring(unfi, r) ui.status('%s - %s\n' % (rstr, subsstr)) class stablerange(object): def __init__(self): self._depthcache = {} self._subrangescache = {} # To slices merge, we need to walk their descendant in reverse stable # sort order. For now we perform a full stable sort their descendant # and then use the relevant top most part. This order is going to be # the same for all ranges headed at the same merge. So we cache these # value to reuse them accross the same invocation. self._stablesortcache = {} # something useful to compute the above # mergerev -> stablesort, length self._stablesortprepared = {} # caching parent call # as we do so many of them self._parentscache = {} # The first part of the stable sorted list of revision of a merge will # shared with the one of others. This means we can reuse subranges # computed from that point to compute some of the subranges from the # merge. self._inheritancecache = {} def warmup(self, repo, heads): """warm the cache up to 'heads'""" repo = repo.unfiltered() # subrange should be warmed from head to range to be able to benefit # from revsfromrange cache. otherwise each merge will trigger its own # stablesort. # # we use the revnumber as an approximation for depth ui = repo.ui revs = repo.revs("::%ld", heads) nbrevs = len(revs) rangeheap = [] for idx, r in enumerate(revs): if not idx % 1000: ui.progress(_("filling depth cache"), idx, total=nbrevs) # warm up depth self.depthrev(repo, r) rangeheap.append((-r, (r, 0))) ui.progress(_("filling depth cache"), None, total=nbrevs) heappop = heapq.heappop heappush = heapq.heappush heapify = heapq.heapify original = set(rangeheap) seen = 0 heapify(rangeheap) while rangeheap: value = heappop(rangeheap) if value in original: if not seen % 1000: ui.progress(_("filling stablerange cache"), seen, total=nbrevs) seen += 1 original.remove(value) # might have been added from other source __, rangeid = value if self._getsub(rangeid) is None: for sub in self.subranges(repo, rangeid): if self._getsub(sub) is None: heappush(rangeheap, (-sub[0], sub)) ui.progress(_("filling stablerange cache"), None, total=nbrevs) def depthrev(self, repo, rev): repo = repo.unfiltered() cl = repo.changelog depth = self._getdepth nullrev = nodemod.nullrev stack = [rev] while stack: revdepth = None current = stack[-1] revdepth = depth(current) if revdepth is not None: stack.pop() continue p1, p2 = self._parents(current, cl.parentrevs) if p1 == nullrev: # root case revdepth = 1 elif p2 == nullrev: # linear commit case parentdepth = depth(p1) if parentdepth is None: stack.append(p1) else: revdepth = parentdepth + 1 else: # merge case revdepth = self._depthmerge(cl, current, p1, p2, stack) if revdepth is not None: self._setdepth(current, revdepth) stack.pop() # actual_depth = len(list(cl.ancestors([rev], inclusive=True))) # assert revdepth == actual_depth, (rev, revdepth, actual_depth) return revdepth def rangelength(self, repo, rangeid): headrev, index = rangeid[0], rangeid[1] return self.depthrev(repo, headrev) - index def subranges(self, repo, rangeid): cached = self._getsub(rangeid) if cached is not None: return cached value = self._subranges(repo, rangeid) self._setsub(rangeid, value) return value def revsfromrange(self, repo, rangeid): headrev, index = rangeid rangelength = self.rangelength(repo, rangeid) if rangelength == 1: revs = [headrev] else: # get all revs under heads in stable order # # note: In the general case we can just walk down and then request # data about the merge. But I'm not sure this function will be even # call for the general case. allrevs = self._stablesortcache.get(headrev) if allrevs is None: allrevs = self._getrevsfrommerge(repo, headrev) if allrevs is None: allrevs = stablesort(repo, [headrev], mergecallback=self._filestablesortcache) self._stablesortcache[headrev] = allrevs # takes from index revs = allrevs[index:] # sanity checks assert len(revs) == rangelength return revs def _parents(self, rev, func): parents = self._parentscache.get(rev) if parents is None: parents = func(rev) self._parentscache[rev] = parents return parents def _getdepth(self, rev): """utility function used to access the depth cache This mostly exist to help the on disk persistence.""" return self._depthcache.get(rev) def _setdepth(self, rev, value): """utility function used to set the depth cache This mostly exist to help the on disk persistence.""" self._depthcache[rev] = value def _getsub(self, rev): """utility function used to access the subranges cache This mostly exist to help the on disk persistence""" return self._subrangescache.get(rev) def _setsub(self, rev, value): """utility function used to set the subranges cache This mostly exist to help the on disk persistence.""" self._subrangescache[rev] = value def _filestablesortcache(self, sortedrevs, merge): if merge not in self._stablesortprepared: self._stablesortprepared[merge] = (sortedrevs, len(sortedrevs)) def _getrevsfrommerge(self, repo, merge): prepared = self._stablesortprepared.get(merge) if prepared is None: return None mergedepth = self.depthrev(repo, merge) allrevs = prepared[0][:prepared[1]] nbextrarevs = prepared[1] - mergedepth if not nbextrarevs: return allrevs anc = repo.changelog.ancestors([merge], inclusive=True) top = [] counter = nbextrarevs for rev in reversed(allrevs): if rev in anc: top.append(rev) else: counter -= 1 if counter <= 0: break bottomidx = prepared[1] - (nbextrarevs + len(top)) revs = allrevs[:bottomidx] revs.extend(reversed(top)) return revs def _inheritancepoint(self, repo, merge): """Find the inheritance point of a Merge The first part of the stable sorted list of revision of a merge will shared with the one of others. This means we can reuse subranges computed from that point to compute some of the subranges from the merge. That point is latest point in the stable sorted list where the depth of the revisions match its index (that means all revision earlier in the stable sorted list are its ancestors, no dangling unrelated branches exists). """ value = self._inheritancecache.get(merge) if value is None: revs = self.revsfromrange(repo, (merge, 0)) i = reversed(revs) i.next() # pop the merge expected = len(revs) - 1 # Since we do warmup properly, we can expect the cache to be hot # for everythin under the merge we investigate cache = self._depthcache # note: we cannot do a binary search because element under the # inherited point might have mismatching depth because of inner # branching. for rev in i: if cache[rev] == expected: break expected -= 1 value = (expected - 1, rev) self._inheritancecache[merge] = value return value def _depthmerge(self, cl, rev, p1, p2, stack): # sub method to simplify the main 'depthrev' one revdepth = None depth = self._getdepth depth_p1 = depth(p1) depth_p2 = depth(p2) missingparent = False if depth_p1 is None: stack.append(p1) missingparent = True if depth_p2 is None: stack.append(p2) missingparent = True if missingparent: return None # computin depth of a merge # XXX the common ancestors heads could be cached ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2)) ancrevs = [cl.rev(a) for a in ancnodes] anyunkown = False ancdepth = [] for r in ancrevs: d = depth(r) if d is None: anyunkown = True stack.append(r) ancdepth.append((r, d)) if anyunkown: return None if not ancrevs: # unrelated branch, (no common root) revdepth = depth_p1 + depth_p2 + 1 elif len(ancrevs) == 1: # one unique branch point: # we can compute depth without any walk depth_anc = ancdepth[0][1] revdepth = depth_p1 + (depth_p2 - depth_anc) + 1 else: # multiple ancestors, we pick one that is # * the deepest (less changeset outside of it), # * lowest revs because more chance to have descendant of other "above" anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0])) revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev])) return revdepth def _subranges(self, repo, rangeid): if self.rangelength(repo, rangeid) == 1: return [] slicepoint = self._slicepoint(repo, rangeid) # make sure we have cache for all relevant parent first to prevent # recursion (python is bad with recursion stack = [] current = rangeid while current is not None: current = self._cold_reusable(repo, current, slicepoint) if current is not None: stack.append(current) while stack: # these call will directly compute the subranges self.subranges(repo, stack.pop()) return self._slicesrangeat(repo, rangeid, slicepoint) def _cold_reusable(self, repo, rangeid, slicepoint): """return parent range that it would be useful to prepare to slice rangeid at slicepoint This function also have the important task to update the revscache of the parent rev s if possible and needed""" p1, p2 = self._parents(rangeid[0], repo.changelog.parentrevs) if p2 == nodemod.nullrev: # regular changesets, we pick the parent reusablerev = p1 else: # merge, we try the inheritance point # if it is too low, it will be ditched by the depth check anyway index, reusablerev = self._inheritancepoint(repo, rangeid[0]) # if we reached the slicepoint, no need to go further if self.depthrev(repo, reusablerev) <= slicepoint: return None reurange = (reusablerev, rangeid[1]) # if we have an entry for the current range, lets update the cache # if we already have subrange for this range, no need to prepare it. if self._getsub(reurange) is not None: return None # look like we found a relevent parentrange with no cache yet return reurange def _slicepoint(self, repo, rangeid): rangedepth = self.depthrev(repo, rangeid[0]) step = _hlp2(rangedepth) standard_start = 0 while standard_start < rangeid[1] and 0 < step: if standard_start + step < rangedepth: standard_start += step step //= 2 if rangeid[1] == standard_start: slicesize = _hlp2(self.rangelength(repo, rangeid)) slicepoint = rangeid[1] + slicesize else: assert standard_start < rangedepth slicepoint = standard_start return slicepoint def _slicesrangeat(self, repo, rangeid, globalindex): p1, p2 = self._parents(rangeid[0], repo.changelog.parentrevs) if p2 == nodemod.nullrev: reuserev = p1 else: index, reuserev = self._inheritancepoint(repo, rangeid[0]) if index < globalindex: return self._slicesrangeatmerge(repo, rangeid, globalindex) assert reuserev != nodemod.nullrev reuserange = (reuserev, rangeid[1]) top = (rangeid[0], globalindex) if rangeid[1] + self.rangelength(repo, reuserange) == globalindex: return [reuserange, top] # This will not initiate a recursion since we took appropriate # precaution in the caller of this method to ensure it will be so. # It the parent is a merge that will not be the case but computing # subranges from a merge will not recurse. reusesubranges = self.subranges(repo, reuserange) slices = reusesubranges[:-1] # pop the top slices.append(top) return slices def _slicesrangeatmerge(self, repo, rangeid, globalindex): localindex = globalindex - rangeid[1] cl = repo.changelog result = [] allrevs = self.revsfromrange(repo, rangeid) bottomrevs = allrevs[:localindex] if globalindex == self.depthrev(repo, bottomrevs[-1]): # simple case, top revision in the bottom set contains exactly the # revision we needs result.append((bottomrevs[-1], rangeid[1])) else: parentrevs = cl.parentrevs parents = self._parents bheads = set(bottomrevs) du = bheads.difference_update reachableroots = repo.changelog.reachableroots minrev = min(bottomrevs) for r in bottomrevs: du(parents(r, parentrevs)) for h in bheads: # reachable roots is fast because is C # # It is worth noting that will use this kind of filtering from # "h" multiple time in a warming run. So using "ancestors" and # caching that should be faster. But python code filtering on # the ancestors end up being slower. hrevs = reachableroots(minrev, [h], bottomrevs, True) start = self.depthrev(repo, h) - len(hrevs) entry = (h, start) result.append(entry) # Talking about python code being slow, the following code is an # alternative implementation. # # It complexity is better since is does a single traversal on the # bottomset. However since it is all python it end up being # slower. # I'm keeping it here as an inspiration for a future C version # branches = [] # for current in reversed(bottomrevs): # ps = parents(current, parentrevs) # found = False # for brevs, bexpect in branches: # if current in bexpect: # found = True # brevs.append(current) # bexpect.discard(current) # bexpect.update(ps) # if not found: # branches.append(([current], set(ps))) # for revs, __ in reversed(branches): # head = revs[0] # index = self.depthrev(repo, head) - len(revs) # result.append((head, index)) # top part is trivial top = (rangeid[0], globalindex) result.append(top) return result @eh.reposetup def setupcache(ui, repo): class stablerangerepo(repo.__class__): @localrepo.unfilteredpropertycache def stablerange(self): return stablerange() repo.__class__ = stablerangerepo