view hgext3rd/evolve/stablerange.py @ 2130:d784622dd5dc

stablerange: move the range class in the new module Our ultimate goal is to remove this class for performance reason. however for now, it contains most of the code we care about so we migrate it as a block first.
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
date Sun, 19 Mar 2017 03:07:01 +0100
parents d07bb7cbae2f
children 86dd39478638
line wrap: on
line source

# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
import math
import hashlib

from mercurial import (
    commands,
    cmdutil,
    localrepo,
    node as nodemod,
    scmutil,
    util,
)

from mercurial.i18n import _

from . import (
    exthelper,
    obsolete,
)

eh = exthelper.exthelper()

##################################
### Stable topological sorting ###
##################################
@eh.command(
    'debugstablesort',
    [
        ('', 'rev', [], 'heads to start from'),
    ] + commands.formatteropts,
    _(''))
def debugstablesort(ui, repo, **opts):
    """display the ::REVS set topologically sorted in a stable way
    """
    revs = scmutil.revrange(repo, opts['rev'])
    displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
    for r in stablesort(repo, revs):
        ctx = repo[r]
        displayer.show(ctx)
        displayer.flush(ctx)
    displayer.close()

def stablesort(repo, revs):
    """return '::revs' topologically sorted in "stable" order

    This is a depth first traversal starting from 'nullrev', using node as a
    tie breaker.
    """
    # Various notes:
    #
    # * Bitbucket is used dates as tie breaker, that might be a good idea.
    #
    # * It seemds we can traverse in the same order from (one) head to bottom,
    #   if we the following record data for each merge:
    #
    #  - highest (stablesort-wise) common ancestors,
    #  - order of parents (tablesort-wise)
    cl = repo.changelog
    parents = cl.parentrevs
    nullrev = nodemod.nullrev
    n = cl.node
    # step 1: We need a parents -> children mapping for 2 reasons.
    #
    # * we build the order from nullrev to tip
    #
    # * we need to detect branching
    children = collections.defaultdict(list)
    for r in cl.ancestors(revs, inclusive=True):
        p1, p2 = parents(r)
        children[p1].append(r)
        if p2 != nullrev:
            children[p2].append(r)
    # step two: walk back up
    # * pick lowest node in case of branching
    # * stack disregarded part of the branching
    # * process merge when both parents are yielded

    # track what changeset has been
    seen = [0] * (max(revs) + 2)
    seen[-1] = True # nullrev is known
    # starts from repository roots
    # reuse the list form the mapping as we won't need it again anyway
    stack = children[nullrev]
    if not stack:
        return []
    if 1 < len(stack):
        stack.sort(key=n, reverse=True)

    # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
    result = []

    current = stack.pop()
    while current is not None or stack:
        if current is None:
            # previous iteration reached a merge or an unready merge,
            current = stack.pop()
            if seen[current]:
                current = None
                continue
        p1, p2 = parents(current)
        if not (seen[p1] and seen[p2]):
            # we can't iterate on this merge yet because other child is not
            # yielded yet (and we are topo sorting) we can discard it for now
            # because it will be reached from the other child.
            current = None
            continue
        assert not seen[current]
        seen[current] = True
        result.append(current) # could be yield, cf earlier comment
        cs = children[current]
        if not cs:
            current = None
        elif 1 == len(cs):
            current = cs[0]
        else:
            cs.sort(key=n, reverse=True)
            current = cs.pop() # proceed on smallest
            stack.extend(cs)   # stack the rest for later
    assert len(result) == len(set(result))
    return result

#################################
### Stable Range computation  ###
#################################

class stablerangecache(dict):

    def __init__(self):
        self._depthcache = {}
        self._subrangescache = {}

    def depthrev(self, repo, rev):
        repo = repo.unfiltered()
        cl = repo.changelog
        cache = self._depthcache
        nullrev = nodemod.nullrev
        stack = [rev]
        while stack:
            revdepth = None
            current = stack[-1]
            revdepth = cache.get(current)
            if revdepth is not None:
                stack.pop()
                continue
            p1, p2 = cl.parentrevs(current)
            if p1 == nullrev:
                # root case
                revdepth = 1
            elif p2 == nullrev:
                # linear commit case
                parentdepth = cache.get(p1)
                if parentdepth is None:
                    stack.append(p1)
                else:
                    revdepth = parentdepth + 1
            else:
                # merge case
                revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
            if revdepth is not None:
                cache[current] = revdepth
                stack.pop()
        # actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
        # assert revdepth == actual_depth, (rev, revdepth, actual_depth)
        return revdepth

    @staticmethod
    def _depthmerge(cl, rev, p1, p2, stack, cache):
        # sub method to simplify the main 'depthrev' one
        revdepth = None
        depth_p1 = cache.get(p1)
        depth_p2 = cache.get(p2)
        missingparent = False
        if depth_p1 is None:
            stack.append(p1)
            missingparent = True
        if depth_p2 is None:
            stack.append(p2)
            missingparent = True
        if missingparent:
            return None
        # computin depth of a merge
        # XXX the common ancestors heads could be cached
        ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
        ancrevs = [cl.rev(a) for a in ancnodes]
        anyunkown = False
        ancdepth = []
        for r in ancrevs:
            d = cache.get(r)
            if d is None:
                anyunkown = True
                stack.append(r)
            ancdepth.append((r, d))
        if anyunkown:
            return None
        if not ancrevs:
            # unrelated branch, (no common root)
            revdepth = depth_p1 + depth_p2 + 1
        elif len(ancrevs) == 1:
            # one unique branch point:
            # we can compute depth without any walk
            depth_anc = ancdepth[0][1]
            revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
        else:
            # multiple ancestors, we pick one that is
            # * the deepest (less changeset outside of it),
            # * lowest revs because more chance to have descendant of other "above"
            anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
            revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
        return revdepth

    def rangelength(self, repo, rangeid):
        headrev, index = rangeid.head, rangeid.index
        return self.depthrev(repo, headrev) - index

    def subranges(self, repo, rangeid):
        cached = self._subrangescache.get(rangeid)
        if cached is not None:
            return cached
        if self.rangelength(repo, rangeid) == 1:
            value = []
            self._subrangescache[rangeid] = value
            return value
        return None

    def setsubranges(self, rangeid, value):
        # XXX temporary cache setter as value computation are performed outside
        # this class reach.
        return self._subrangescache.get(rangeid)

def _hlp2(i):
    """return highest power of two lower than 'i'"""
    return 2 ** int(math.log(i - 1, 2))

class stablerange(object):

    def __init__(self, repo, head, index, revs=None):
        self._repo = repo.unfiltered()
        self.head = head
        self.index = index
        if revs is not None:
            assert len(revs) == len(self)
            self._revs = revs
        assert index < self.depth, (head, index, self.depth, revs)

    def __repr__(self):
        return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash))

    def __hash__(self):
        return self._id

    def __eq__(self, other):
        if type(self) != type(other):
            raise NotImplementedError()
        return self.stablekey == other.stablekey

    @util.propertycache
    def _id(self):
        return hash(self.stablekey)

    @util.propertycache
    def stablekey(self):
        return (self.node, self.index)

    @util.propertycache
    def node(self):
        return self._repo.changelog.node(self.head)

    def __len__(self):
        return self.depth - self.index

    @util.propertycache
    def depth(self):
        return self._repo.stablerange.depthrev(self._repo, self.head)

    @util.propertycache
    def _revs(self):
        r = stablesort(self._repo, [self.head])[self.index:]
        assert len(r) == len(self), (self.head, self.index, len(r), len(self))
        return r

    def _slicesat(self, globalindex):
        localindex = globalindex - self.index

        cl = self._repo.changelog

        result = []
        bottom = self._revs[:localindex]
        top = stablerange(self._repo, self.head, globalindex, self._revs[localindex:])
        #
        toprootdepth = self._repo.stablerange.depthrev(self._repo, top._revs[0])
        if toprootdepth + len(top) == self.depth + 1:
            bheads = [bottom[-1]]
        else:
            bheads = set(bottom)
            parentrevs = cl.parentrevs
            du = bheads.difference_update
            for r in bottom:
                du(parentrevs(r))
            # if len(bheads) == 1:
            #     assert 1 == len(self._repo.revs('roots(%ld)', top._revs))
        if len(bheads) == 1:
            newhead = bottom[-1]
            bottomdepth = self._repo.stablerange.depthrev(self._repo, newhead)
            newstart = bottomdepth - len(bottom)
            result.append(stablerange(self._repo, newhead, newstart, bottom))
        else:
            # assert 1 < len(bheads), (toprootdepth, len(top), len(self))
            cl = self._repo.changelog
            for h in bheads:
                subset = cl.ancestors([h], inclusive=True)
                hrevs = [r for r in bottom if r in subset]
                start = self._repo.stablerange.depthrev(self._repo, h) - len(hrevs)
                entry = stablerange(self._repo, h, start, [r for r in bottom if r in subset])
                result.append(entry)
        result.append(top)
        return result

    def subranges(self):
        cached = self._repo.stablerange.subranges(self._repo, self)
        if cached is not None:
            return cached
        step = _hlp2(self.depth)
        standard_start = 0
        while standard_start < self.index and 0 < step:
            if standard_start + step < self.depth:
                standard_start += step
            step //= 2
        if self.index == standard_start:
            slicesize = _hlp2(len(self))
            slicepoint = self.index + slicesize
        else:
            assert standard_start < self.depth
            slicepoint = standard_start
        result = self._slicesat(slicepoint)
        self._repo.stablerange.setsubranges(self, result)
        return result

    @util.propertycache
    def obshash(self):
        cache = self._repo.obsstore.rangeobshashcache
        obshash = cache.get(self)
        if obshash is not None:
            return obshash
        pieces = []
        nullid = nodemod.nullid
        if len(self) == 1:
            tmarkers = self._repo.obsstore.relevantmarkers([self.node])
            pieces = []
            for m in tmarkers:
                mbin = obsolete._fm1encodeonemarker(m)
                pieces.append(mbin)
            pieces.sort()
        else:
            for subrange in self.subranges():
                obshash = subrange.obshash
                if obshash != nullid:
                    pieces.append(obshash)

        sha = hashlib.sha1()
        # note: if there is only one subrange with actual data, we'll just
        # reuse the same hash.
        if not pieces:
            obshash = nodemod.nullid
        elif len(pieces) != 1 or obshash is None:
            sha = hashlib.sha1()
            for p in pieces:
                sha.update(p)
            obshash = cache[self] = sha.digest()
        return obshash

@eh.reposetup
def setupcache(ui, repo):

    class stablerangerepo(repo.__class__):

        @localrepo.unfilteredpropertycache
        def stablerange(self):
            return stablerangecache()

    repo.__class__ = stablerangerepo