view hgext3rd/evolve/stablerange.py @ 2132:d77262807411

stablerange: move standard slice point definition in main class More migration of code away from the doomed individual class.
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Sun, 19 Mar 2017 04:43:33 +0100
parents 86dd39478638
children 323480863248
line wrap: on
line source

# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
import math
import hashlib

from mercurial import (
    commands,
    cmdutil,
    localrepo,
    node as nodemod,
    scmutil,
    util,
)

from mercurial.i18n import _

from . import (
    exthelper,
    obsolete,
)

eh = exthelper.exthelper()

##################################
### Stable topological sorting ###
##################################
@eh.command(
    'debugstablesort',
    [
        ('', 'rev', [], 'heads to start from'),
    ] + commands.formatteropts,
    _(''))
def debugstablesort(ui, repo, **opts):
    """display the ::REVS set topologically sorted in a stable way
    """
    revs = scmutil.revrange(repo, opts['rev'])
    displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
    for r in stablesort(repo, revs):
        ctx = repo[r]
        displayer.show(ctx)
        displayer.flush(ctx)
    displayer.close()

def stablesort(repo, revs):
    """return '::revs' topologically sorted in "stable" order

    This is a depth first traversal starting from 'nullrev', using node as a
    tie breaker.
    """
    # Various notes:
    #
    # * Bitbucket is used dates as tie breaker, that might be a good idea.
    #
    # * It seemds we can traverse in the same order from (one) head to bottom,
    #   if we the following record data for each merge:
    #
    #  - highest (stablesort-wise) common ancestors,
    #  - order of parents (tablesort-wise)
    cl = repo.changelog
    parents = cl.parentrevs
    nullrev = nodemod.nullrev
    n = cl.node
    # step 1: We need a parents -> children mapping for 2 reasons.
    #
    # * we build the order from nullrev to tip
    #
    # * we need to detect branching
    children = collections.defaultdict(list)
    for r in cl.ancestors(revs, inclusive=True):
        p1, p2 = parents(r)
        children[p1].append(r)
        if p2 != nullrev:
            children[p2].append(r)
    # step two: walk back up
    # * pick lowest node in case of branching
    # * stack disregarded part of the branching
    # * process merge when both parents are yielded

    # track what changeset has been
    seen = [0] * (max(revs) + 2)
    seen[-1] = True # nullrev is known
    # starts from repository roots
    # reuse the list form the mapping as we won't need it again anyway
    stack = children[nullrev]
    if not stack:
        return []
    if 1 < len(stack):
        stack.sort(key=n, reverse=True)

    # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
    result = []

    current = stack.pop()
    while current is not None or stack:
        if current is None:
            # previous iteration reached a merge or an unready merge,
            current = stack.pop()
            if seen[current]:
                current = None
                continue
        p1, p2 = parents(current)
        if not (seen[p1] and seen[p2]):
            # we can't iterate on this merge yet because other child is not
            # yielded yet (and we are topo sorting) we can discard it for now
            # because it will be reached from the other child.
            current = None
            continue
        assert not seen[current]
        seen[current] = True
        result.append(current) # could be yield, cf earlier comment
        cs = children[current]
        if not cs:
            current = None
        elif 1 == len(cs):
            current = cs[0]
        else:
            cs.sort(key=n, reverse=True)
            current = cs.pop() # proceed on smallest
            stack.extend(cs)   # stack the rest for later
    assert len(result) == len(set(result))
    return result

#################################
### Stable Range computation  ###
#################################

class stablerangecache(dict):

    def __init__(self):
        self._depthcache = {}
        self._subrangescache = {}

    def depthrev(self, repo, rev):
        repo = repo.unfiltered()
        cl = repo.changelog
        cache = self._depthcache
        nullrev = nodemod.nullrev
        stack = [rev]
        while stack:
            revdepth = None
            current = stack[-1]
            revdepth = cache.get(current)
            if revdepth is not None:
                stack.pop()
                continue
            p1, p2 = cl.parentrevs(current)
            if p1 == nullrev:
                # root case
                revdepth = 1
            elif p2 == nullrev:
                # linear commit case
                parentdepth = cache.get(p1)
                if parentdepth is None:
                    stack.append(p1)
                else:
                    revdepth = parentdepth + 1
            else:
                # merge case
                revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
            if revdepth is not None:
                cache[current] = revdepth
                stack.pop()
        # actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
        # assert revdepth == actual_depth, (rev, revdepth, actual_depth)
        return revdepth

    @staticmethod
    def _depthmerge(cl, rev, p1, p2, stack, cache):
        # sub method to simplify the main 'depthrev' one
        revdepth = None
        depth_p1 = cache.get(p1)
        depth_p2 = cache.get(p2)
        missingparent = False
        if depth_p1 is None:
            stack.append(p1)
            missingparent = True
        if depth_p2 is None:
            stack.append(p2)
            missingparent = True
        if missingparent:
            return None
        # computin depth of a merge
        # XXX the common ancestors heads could be cached
        ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
        ancrevs = [cl.rev(a) for a in ancnodes]
        anyunkown = False
        ancdepth = []
        for r in ancrevs:
            d = cache.get(r)
            if d is None:
                anyunkown = True
                stack.append(r)
            ancdepth.append((r, d))
        if anyunkown:
            return None
        if not ancrevs:
            # unrelated branch, (no common root)
            revdepth = depth_p1 + depth_p2 + 1
        elif len(ancrevs) == 1:
            # one unique branch point:
            # we can compute depth without any walk
            depth_anc = ancdepth[0][1]
            revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
        else:
            # multiple ancestors, we pick one that is
            # * the deepest (less changeset outside of it),
            # * lowest revs because more chance to have descendant of other "above"
            anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
            revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
        return revdepth

    def rangelength(self, repo, rangeid):
        headrev, index = rangeid.head, rangeid.index
        return self.depthrev(repo, headrev) - index

    def subranges(self, repo, rangeid):
        cached = self._subrangescache.get(rangeid)
        if cached is not None:
            return cached
        if self.rangelength(repo, rangeid) == 1:
            value = []
            self._subrangescache[rangeid] = value
            return value
        return None

    def setsubranges(self, rangeid, value):
        # XXX temporary cache setter as value computation are performed outside
        # this class reach.
        return self._subrangescache.get(rangeid)

    def _slicepoint(self, repo, rangeid):
        rangedepth = self.depthrev(repo, rangeid.head)
        step = _hlp2(rangedepth)
        standard_start = 0
        while standard_start < rangeid.index and 0 < step:
            if standard_start + step < rangedepth:
                standard_start += step
            step //= 2
        if rangeid.index == standard_start:
            slicesize = _hlp2(len(rangeid))
            slicepoint = rangeid.index + slicesize
        else:
            assert standard_start < rangedepth
            slicepoint = standard_start
        return slicepoint

    def _slicesrangeat(self, repo, rangeid, globalindex):
        localindex = globalindex - rangeid.index

        cl = repo.changelog

        result = []
        bottom = rangeid._revs[:localindex]
        top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[localindex:])
        #
        toprootdepth = repo.stablerange.depthrev(repo, top._revs[0])
        if toprootdepth + len(top) == rangeid.depth + 1:
            bheads = [bottom[-1]]
        else:
            bheads = set(bottom)
            parentrevs = cl.parentrevs
            du = bheads.difference_update
            for r in bottom:
                du(parentrevs(r))
            # if len(bheads) == 1:
            #     assert 1 == len(repo.revs('roots(%ld)', top._revs))
        if len(bheads) == 1:
            newhead = bottom[-1]
            bottomdepth = repo.stablerange.depthrev(repo, newhead)
            newstart = bottomdepth - len(bottom)
            result.append(stablerange(repo, newhead, newstart, bottom))
        else:
            # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
            cl = repo.changelog
            for h in bheads:
                subset = cl.ancestors([h], inclusive=True)
                hrevs = [r for r in bottom if r in subset]
                start = repo.stablerange.depthrev(repo, h) - len(hrevs)
                entry = stablerange(repo, h, start, [r for r in bottom if r in subset])
                result.append(entry)
        result.append(top)
        return result

def _hlp2(i):
    """return highest power of two lower than 'i'"""
    return 2 ** int(math.log(i - 1, 2))

class stablerange(object):

    def __init__(self, repo, head, index, revs=None):
        self._repo = repo.unfiltered()
        self.head = head
        self.index = index
        if revs is not None:
            assert len(revs) == len(self)
            self._revs = revs
        assert index < self.depth, (head, index, self.depth, revs)

    def __repr__(self):
        return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash))

    def __hash__(self):
        return self._id

    def __eq__(self, other):
        if type(self) != type(other):
            raise NotImplementedError()
        return self.stablekey == other.stablekey

    @util.propertycache
    def _id(self):
        return hash(self.stablekey)

    @util.propertycache
    def stablekey(self):
        return (self.node, self.index)

    @util.propertycache
    def node(self):
        return self._repo.changelog.node(self.head)

    def __len__(self):
        return self.depth - self.index

    @util.propertycache
    def depth(self):
        return self._repo.stablerange.depthrev(self._repo, self.head)

    @util.propertycache
    def _revs(self):
        r = stablesort(self._repo, [self.head])[self.index:]
        assert len(r) == len(self), (self.head, self.index, len(r), len(self))
        return r

    def subranges(self):
        cache = self._repo.stablerange
        cached = cache.subranges(self._repo, self)
        if cached is not None:
            return cached
        slicepoint = cache._slicepoint(self._repo, self)
        result = cache._slicesrangeat(self._repo, self, slicepoint)
        self._repo.stablerange.setsubranges(self, result)
        return result

    @util.propertycache
    def obshash(self):
        cache = self._repo.obsstore.rangeobshashcache
        obshash = cache.get(self)
        if obshash is not None:
            return obshash
        pieces = []
        nullid = nodemod.nullid
        if len(self) == 1:
            tmarkers = self._repo.obsstore.relevantmarkers([self.node])
            pieces = []
            for m in tmarkers:
                mbin = obsolete._fm1encodeonemarker(m)
                pieces.append(mbin)
            pieces.sort()
        else:
            for subrange in self.subranges():
                obshash = subrange.obshash
                if obshash != nullid:
                    pieces.append(obshash)

        sha = hashlib.sha1()
        # note: if there is only one subrange with actual data, we'll just
        # reuse the same hash.
        if not pieces:
            obshash = nodemod.nullid
        elif len(pieces) != 1 or obshash is None:
            sha = hashlib.sha1()
            for p in pieces:
                sha.update(p)
            obshash = cache[self] = sha.digest()
        return obshash

@eh.reposetup
def setupcache(ui, repo):

    class stablerangerepo(repo.__class__):

        @localrepo.unfilteredpropertycache
        def stablerange(self):
            return stablerangecache()

    repo.__class__ = stablerangerepo