Mercurial > evolve
view hgext3rd/evolve/stablerange.py @ 2130:d784622dd5dc
stablerange: move the range class in the new module
Our ultimate goal is to remove this class for performance reason. however for
now, it contains most of the code we care about so we migrate it as a block
first.
author | Pierre-Yves David <pierre-yves.david@ens-lyon.org> |
---|---|
date | Sun, 19 Mar 2017 03:07:01 +0100 |
parents | d07bb7cbae2f |
children | 86dd39478638 |
line wrap: on
line source
# Code dedicated to the computation and properties of "stable ranges" # # These stable ranges are use for obsolescence markers discovery # # Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import math import hashlib from mercurial import ( commands, cmdutil, localrepo, node as nodemod, scmutil, util, ) from mercurial.i18n import _ from . import ( exthelper, obsolete, ) eh = exthelper.exthelper() ################################## ### Stable topological sorting ### ################################## @eh.command( 'debugstablesort', [ ('', 'rev', [], 'heads to start from'), ] + commands.formatteropts, _('')) def debugstablesort(ui, repo, **opts): """display the ::REVS set topologically sorted in a stable way """ revs = scmutil.revrange(repo, opts['rev']) displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True) for r in stablesort(repo, revs): ctx = repo[r] displayer.show(ctx) displayer.flush(ctx) displayer.close() def stablesort(repo, revs): """return '::revs' topologically sorted in "stable" order This is a depth first traversal starting from 'nullrev', using node as a tie breaker. """ # Various notes: # # * Bitbucket is used dates as tie breaker, that might be a good idea. # # * It seemds we can traverse in the same order from (one) head to bottom, # if we the following record data for each merge: # # - highest (stablesort-wise) common ancestors, # - order of parents (tablesort-wise) cl = repo.changelog parents = cl.parentrevs nullrev = nodemod.nullrev n = cl.node # step 1: We need a parents -> children mapping for 2 reasons. # # * we build the order from nullrev to tip # # * we need to detect branching children = collections.defaultdict(list) for r in cl.ancestors(revs, inclusive=True): p1, p2 = parents(r) children[p1].append(r) if p2 != nullrev: children[p2].append(r) # step two: walk back up # * pick lowest node in case of branching # * stack disregarded part of the branching # * process merge when both parents are yielded # track what changeset has been seen = [0] * (max(revs) + 2) seen[-1] = True # nullrev is known # starts from repository roots # reuse the list form the mapping as we won't need it again anyway stack = children[nullrev] if not stack: return [] if 1 < len(stack): stack.sort(key=n, reverse=True) # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already result = [] current = stack.pop() while current is not None or stack: if current is None: # previous iteration reached a merge or an unready merge, current = stack.pop() if seen[current]: current = None continue p1, p2 = parents(current) if not (seen[p1] and seen[p2]): # we can't iterate on this merge yet because other child is not # yielded yet (and we are topo sorting) we can discard it for now # because it will be reached from the other child. current = None continue assert not seen[current] seen[current] = True result.append(current) # could be yield, cf earlier comment cs = children[current] if not cs: current = None elif 1 == len(cs): current = cs[0] else: cs.sort(key=n, reverse=True) current = cs.pop() # proceed on smallest stack.extend(cs) # stack the rest for later assert len(result) == len(set(result)) return result ################################# ### Stable Range computation ### ################################# class stablerangecache(dict): def __init__(self): self._depthcache = {} self._subrangescache = {} def depthrev(self, repo, rev): repo = repo.unfiltered() cl = repo.changelog cache = self._depthcache nullrev = nodemod.nullrev stack = [rev] while stack: revdepth = None current = stack[-1] revdepth = cache.get(current) if revdepth is not None: stack.pop() continue p1, p2 = cl.parentrevs(current) if p1 == nullrev: # root case revdepth = 1 elif p2 == nullrev: # linear commit case parentdepth = cache.get(p1) if parentdepth is None: stack.append(p1) else: revdepth = parentdepth + 1 else: # merge case revdepth = self._depthmerge(cl, current, p1, p2, stack, cache) if revdepth is not None: cache[current] = revdepth stack.pop() # actual_depth = len(list(cl.ancestors([rev], inclusive=True))) # assert revdepth == actual_depth, (rev, revdepth, actual_depth) return revdepth @staticmethod def _depthmerge(cl, rev, p1, p2, stack, cache): # sub method to simplify the main 'depthrev' one revdepth = None depth_p1 = cache.get(p1) depth_p2 = cache.get(p2) missingparent = False if depth_p1 is None: stack.append(p1) missingparent = True if depth_p2 is None: stack.append(p2) missingparent = True if missingparent: return None # computin depth of a merge # XXX the common ancestors heads could be cached ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2)) ancrevs = [cl.rev(a) for a in ancnodes] anyunkown = False ancdepth = [] for r in ancrevs: d = cache.get(r) if d is None: anyunkown = True stack.append(r) ancdepth.append((r, d)) if anyunkown: return None if not ancrevs: # unrelated branch, (no common root) revdepth = depth_p1 + depth_p2 + 1 elif len(ancrevs) == 1: # one unique branch point: # we can compute depth without any walk depth_anc = ancdepth[0][1] revdepth = depth_p1 + (depth_p2 - depth_anc) + 1 else: # multiple ancestors, we pick one that is # * the deepest (less changeset outside of it), # * lowest revs because more chance to have descendant of other "above" anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0])) revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev])) return revdepth def rangelength(self, repo, rangeid): headrev, index = rangeid.head, rangeid.index return self.depthrev(repo, headrev) - index def subranges(self, repo, rangeid): cached = self._subrangescache.get(rangeid) if cached is not None: return cached if self.rangelength(repo, rangeid) == 1: value = [] self._subrangescache[rangeid] = value return value return None def setsubranges(self, rangeid, value): # XXX temporary cache setter as value computation are performed outside # this class reach. return self._subrangescache.get(rangeid) def _hlp2(i): """return highest power of two lower than 'i'""" return 2 ** int(math.log(i - 1, 2)) class stablerange(object): def __init__(self, repo, head, index, revs=None): self._repo = repo.unfiltered() self.head = head self.index = index if revs is not None: assert len(revs) == len(self) self._revs = revs assert index < self.depth, (head, index, self.depth, revs) def __repr__(self): return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash)) def __hash__(self): return self._id def __eq__(self, other): if type(self) != type(other): raise NotImplementedError() return self.stablekey == other.stablekey @util.propertycache def _id(self): return hash(self.stablekey) @util.propertycache def stablekey(self): return (self.node, self.index) @util.propertycache def node(self): return self._repo.changelog.node(self.head) def __len__(self): return self.depth - self.index @util.propertycache def depth(self): return self._repo.stablerange.depthrev(self._repo, self.head) @util.propertycache def _revs(self): r = stablesort(self._repo, [self.head])[self.index:] assert len(r) == len(self), (self.head, self.index, len(r), len(self)) return r def _slicesat(self, globalindex): localindex = globalindex - self.index cl = self._repo.changelog result = [] bottom = self._revs[:localindex] top = stablerange(self._repo, self.head, globalindex, self._revs[localindex:]) # toprootdepth = self._repo.stablerange.depthrev(self._repo, top._revs[0]) if toprootdepth + len(top) == self.depth + 1: bheads = [bottom[-1]] else: bheads = set(bottom) parentrevs = cl.parentrevs du = bheads.difference_update for r in bottom: du(parentrevs(r)) # if len(bheads) == 1: # assert 1 == len(self._repo.revs('roots(%ld)', top._revs)) if len(bheads) == 1: newhead = bottom[-1] bottomdepth = self._repo.stablerange.depthrev(self._repo, newhead) newstart = bottomdepth - len(bottom) result.append(stablerange(self._repo, newhead, newstart, bottom)) else: # assert 1 < len(bheads), (toprootdepth, len(top), len(self)) cl = self._repo.changelog for h in bheads: subset = cl.ancestors([h], inclusive=True) hrevs = [r for r in bottom if r in subset] start = self._repo.stablerange.depthrev(self._repo, h) - len(hrevs) entry = stablerange(self._repo, h, start, [r for r in bottom if r in subset]) result.append(entry) result.append(top) return result def subranges(self): cached = self._repo.stablerange.subranges(self._repo, self) if cached is not None: return cached step = _hlp2(self.depth) standard_start = 0 while standard_start < self.index and 0 < step: if standard_start + step < self.depth: standard_start += step step //= 2 if self.index == standard_start: slicesize = _hlp2(len(self)) slicepoint = self.index + slicesize else: assert standard_start < self.depth slicepoint = standard_start result = self._slicesat(slicepoint) self._repo.stablerange.setsubranges(self, result) return result @util.propertycache def obshash(self): cache = self._repo.obsstore.rangeobshashcache obshash = cache.get(self) if obshash is not None: return obshash pieces = [] nullid = nodemod.nullid if len(self) == 1: tmarkers = self._repo.obsstore.relevantmarkers([self.node]) pieces = [] for m in tmarkers: mbin = obsolete._fm1encodeonemarker(m) pieces.append(mbin) pieces.sort() else: for subrange in self.subranges(): obshash = subrange.obshash if obshash != nullid: pieces.append(obshash) sha = hashlib.sha1() # note: if there is only one subrange with actual data, we'll just # reuse the same hash. if not pieces: obshash = nodemod.nullid elif len(pieces) != 1 or obshash is None: sha = hashlib.sha1() for p in pieces: sha.update(p) obshash = cache[self] = sha.digest() return obshash @eh.reposetup def setupcache(ui, repo): class stablerangerepo(repo.__class__): @localrepo.unfilteredpropertycache def stablerange(self): return stablerangecache() repo.__class__ = stablerangerepo