Mercurial > evolve
view hgext3rd/evolve/stablerange.py @ 2132:d77262807411
stablerange: move standard slice point definition in main class
More migration of code away from the doomed individual class.
author | Pierre-Yves David <pierre-yves.david@ens-lyon.org> |
---|---|
date | Sun, 19 Mar 2017 04:43:33 +0100 |
parents | 86dd39478638 |
children | 323480863248 |
line wrap: on
line source
# Code dedicated to the computation and properties of "stable ranges" # # These stable ranges are use for obsolescence markers discovery # # Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import math import hashlib from mercurial import ( commands, cmdutil, localrepo, node as nodemod, scmutil, util, ) from mercurial.i18n import _ from . import ( exthelper, obsolete, ) eh = exthelper.exthelper() ################################## ### Stable topological sorting ### ################################## @eh.command( 'debugstablesort', [ ('', 'rev', [], 'heads to start from'), ] + commands.formatteropts, _('')) def debugstablesort(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ revs = scmutil.revrange(repo, opts['rev']) displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True) for r in stablesort(repo, revs): ctx = repo[r] displayer.show(ctx) displayer.flush(ctx) displayer.close() def stablesort(repo, revs): """return '::revs' topologically sorted in "stable" order This is a depth first traversal starting from 'nullrev', using node as a tie breaker. """ # Various notes: # # * Bitbucket is used dates as tie breaker, that might be a good idea. # # * It seemds we can traverse in the same order from (one) head to bottom, # if we the following record data for each merge: # # - highest (stablesort-wise) common ancestors, # - order of parents (tablesort-wise) cl = repo.changelog parents = cl.parentrevs nullrev = nodemod.nullrev n = cl.node # step 1: We need a parents -> children mapping for 2 reasons. # # * we build the order from nullrev to tip # # * we need to detect branching children = collections.defaultdict(list) for r in cl.ancestors(revs, inclusive=True): p1, p2 = parents(r) children[p1].append(r) if p2 != nullrev: children[p2].append(r) # step two: walk back up # * pick lowest node in case of branching # * stack disregarded part of the branching # * process merge when both parents are yielded # track what changeset has been seen = [0] * (max(revs) + 2) seen[-1] = True # nullrev is known # starts from repository roots # reuse the list form the mapping as we won't need it again anyway stack = children[nullrev] if not stack: return [] if 1 < len(stack): stack.sort(key=n, reverse=True) # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already result = [] current = stack.pop() while current is not None or stack: if current is None: # previous iteration reached a merge or an unready merge, current = stack.pop() if seen[current]: current = None continue p1, p2 = parents(current) if not (seen[p1] and seen[p2]): # we can't iterate on this merge yet because other child is not # yielded yet (and we are topo sorting) we can discard it for now # because it will be reached from the other child. current = None continue assert not seen[current] seen[current] = True result.append(current) # could be yield, cf earlier comment cs = children[current] if not cs: current = None elif 1 == len(cs): current = cs[0] else: cs.sort(key=n, reverse=True) current = cs.pop() # proceed on smallest stack.extend(cs) # stack the rest for later assert len(result) == len(set(result)) return result ################################# ### Stable Range computation ### ################################# class stablerangecache(dict): def __init__(self): self._depthcache = {} self._subrangescache = {} def depthrev(self, repo, rev): repo = repo.unfiltered() cl = repo.changelog cache = self._depthcache nullrev = nodemod.nullrev stack = [rev] while stack: revdepth = None current = stack[-1] revdepth = cache.get(current) if revdepth is not None: stack.pop() continue p1, p2 = cl.parentrevs(current) if p1 == nullrev: # root case revdepth = 1 elif p2 == nullrev: # linear commit case parentdepth = cache.get(p1) if parentdepth is None: stack.append(p1) else: revdepth = parentdepth + 1 else: # merge case revdepth = self._depthmerge(cl, current, p1, p2, stack, cache) if revdepth is not None: cache[current] = revdepth stack.pop() # actual_depth = len(list(cl.ancestors([rev], inclusive=True))) # assert revdepth == actual_depth, (rev, revdepth, actual_depth) return revdepth @staticmethod def _depthmerge(cl, rev, p1, p2, stack, cache): # sub method to simplify the main 'depthrev' one revdepth = None depth_p1 = cache.get(p1) depth_p2 = cache.get(p2) missingparent = False if depth_p1 is None: stack.append(p1) missingparent = True if depth_p2 is None: stack.append(p2) missingparent = True if missingparent: return None # computin depth of a merge # XXX the common ancestors heads could be cached ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2)) ancrevs = [cl.rev(a) for a in ancnodes] anyunkown = False ancdepth = [] for r in ancrevs: d = cache.get(r) if d is None: anyunkown = True stack.append(r) ancdepth.append((r, d)) if anyunkown: return None if not ancrevs: # unrelated branch, (no common root) revdepth = depth_p1 + depth_p2 + 1 elif len(ancrevs) == 1: # one unique branch point: # we can compute depth without any walk depth_anc = ancdepth[0][1] revdepth = depth_p1 + (depth_p2 - depth_anc) + 1 else: # multiple ancestors, we pick one that is # * the deepest (less changeset outside of it), # * lowest revs because more chance to have descendant of other "above" anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0])) revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev])) return revdepth def rangelength(self, repo, rangeid): headrev, index = rangeid.head, rangeid.index return self.depthrev(repo, headrev) - index def subranges(self, repo, rangeid): cached = self._subrangescache.get(rangeid) if cached is not None: return cached if self.rangelength(repo, rangeid) == 1: value = [] self._subrangescache[rangeid] = value return value return None def setsubranges(self, rangeid, value): # XXX temporary cache setter as value computation are performed outside # this class reach. return self._subrangescache.get(rangeid) def _slicepoint(self, repo, rangeid): rangedepth = self.depthrev(repo, rangeid.head) step = _hlp2(rangedepth) standard_start = 0 while standard_start < rangeid.index and 0 < step: if standard_start + step < rangedepth: standard_start += step step //= 2 if rangeid.index == standard_start: slicesize = _hlp2(len(rangeid)) slicepoint = rangeid.index + slicesize else: assert standard_start < rangedepth slicepoint = standard_start return slicepoint def _slicesrangeat(self, repo, rangeid, globalindex): localindex = globalindex - rangeid.index cl = repo.changelog result = [] bottom = rangeid._revs[:localindex] top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[localindex:]) # toprootdepth = repo.stablerange.depthrev(repo, top._revs[0]) if toprootdepth + len(top) == rangeid.depth + 1: bheads = [bottom[-1]] else: bheads = set(bottom) parentrevs = cl.parentrevs du = bheads.difference_update for r in bottom: du(parentrevs(r)) # if len(bheads) == 1: # assert 1 == len(repo.revs('roots(%ld)', top._revs)) if len(bheads) == 1: newhead = bottom[-1] bottomdepth = repo.stablerange.depthrev(repo, newhead) newstart = bottomdepth - len(bottom) result.append(stablerange(repo, newhead, newstart, bottom)) else: # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid)) cl = repo.changelog for h in bheads: subset = cl.ancestors([h], inclusive=True) hrevs = [r for r in bottom if r in subset] start = repo.stablerange.depthrev(repo, h) - len(hrevs) entry = stablerange(repo, h, start, [r for r in bottom if r in subset]) result.append(entry) result.append(top) return result def _hlp2(i): """return highest power of two lower than 'i'""" return 2 ** int(math.log(i - 1, 2)) class stablerange(object): def __init__(self, repo, head, index, revs=None): self._repo = repo.unfiltered() self.head = head self.index = index if revs is not None: assert len(revs) == len(self) self._revs = revs assert index < self.depth, (head, index, self.depth, revs) def __repr__(self): return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash)) def __hash__(self): return self._id def __eq__(self, other): if type(self) != type(other): raise NotImplementedError() return self.stablekey == other.stablekey @util.propertycache def _id(self): return hash(self.stablekey) @util.propertycache def stablekey(self): return (self.node, self.index) @util.propertycache def node(self): return self._repo.changelog.node(self.head) def __len__(self): return self.depth - self.index @util.propertycache def depth(self): return self._repo.stablerange.depthrev(self._repo, self.head) @util.propertycache def _revs(self): r = stablesort(self._repo, [self.head])[self.index:] assert len(r) == len(self), (self.head, self.index, len(r), len(self)) return r def subranges(self): cache = self._repo.stablerange cached = cache.subranges(self._repo, self) if cached is not None: return cached slicepoint = cache._slicepoint(self._repo, self) result = cache._slicesrangeat(self._repo, self, slicepoint) self._repo.stablerange.setsubranges(self, result) return result @util.propertycache def obshash(self): cache = self._repo.obsstore.rangeobshashcache obshash = cache.get(self) if obshash is not None: return obshash pieces = [] nullid = nodemod.nullid if len(self) == 1: tmarkers = self._repo.obsstore.relevantmarkers([self.node]) pieces = [] for m in tmarkers: mbin = obsolete._fm1encodeonemarker(m) pieces.append(mbin) pieces.sort() else: for subrange in self.subranges(): obshash = subrange.obshash if obshash != nullid: pieces.append(obshash) sha = hashlib.sha1() # note: if there is only one subrange with actual data, we'll just # reuse the same hash. if not pieces: obshash = nodemod.nullid elif len(pieces) != 1 or obshash is None: sha = hashlib.sha1() for p in pieces: sha.update(p) obshash = cache[self] = sha.digest() return obshash @eh.reposetup def setupcache(ui, repo): class stablerangerepo(repo.__class__): @localrepo.unfilteredpropertycache def stablerange(self): return stablerangecache() repo.__class__ = stablerangerepo