view mercurial/namespaces.py @ 39568:842cd0bdda75

util: teach lrucachedict to enforce a max total cost Now that lrucachedict entries can have a numeric cost associated with them and we can easily pop the oldest item in the cache, it now becomes relatively trivial to implement support for enforcing a high water mark on the total cost of items in the cache. This commit teaches lrucachedict instances to have a max cost associated with them. When items are inserted, we pop old items until enough "cost" frees up to make room for the new item. This feature is close to zero cost when not used (modulo the insertion regressed introduced by the previous commit): $ ./hg perflrucachedict --size 4 --gets 1000000 --sets 1000000 --mixed 1000000 ! gets ! wall 0.607444 comb 0.610000 user 0.610000 sys 0.000000 (best of 17) ! wall 0.601653 comb 0.600000 user 0.600000 sys 0.000000 (best of 17) ! inserts ! wall 0.678261 comb 0.680000 user 0.680000 sys 0.000000 (best of 14) ! wall 0.685042 comb 0.680000 user 0.680000 sys 0.000000 (best of 15) ! sets ! wall 0.808770 comb 0.800000 user 0.800000 sys 0.000000 (best of 13) ! wall 0.834241 comb 0.830000 user 0.830000 sys 0.000000 (best of 12) ! mixed ! wall 0.782441 comb 0.780000 user 0.780000 sys 0.000000 (best of 13) ! wall 0.803804 comb 0.800000 user 0.800000 sys 0.000000 (best of 13) $ hg perflrucachedict --size 1000 --gets 1000000 --sets 1000000 --mixed 1000000 ! init ! wall 0.006952 comb 0.010000 user 0.010000 sys 0.000000 (best of 418) ! gets ! wall 0.613350 comb 0.610000 user 0.610000 sys 0.000000 (best of 17) ! wall 0.617415 comb 0.620000 user 0.620000 sys 0.000000 (best of 17) ! inserts ! wall 0.701270 comb 0.700000 user 0.700000 sys 0.000000 (best of 15) ! wall 0.700516 comb 0.700000 user 0.700000 sys 0.000000 (best of 15) ! sets ! wall 0.825720 comb 0.830000 user 0.830000 sys 0.000000 (best of 13) ! wall 0.837946 comb 0.840000 user 0.830000 sys 0.010000 (best of 12) ! mixed ! wall 0.821644 comb 0.820000 user 0.820000 sys 0.000000 (best of 13) ! wall 0.850559 comb 0.850000 user 0.850000 sys 0.000000 (best of 12) I reckon the slight slowdown on insert is due to added if checks. For caches with total cost limiting enabled: $ hg perflrucachedict --size 4 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 100 ! gets w/ cost limit ! wall 0.598737 comb 0.590000 user 0.590000 sys 0.000000 (best of 17) ! inserts w/ cost limit ! wall 1.694282 comb 1.700000 user 1.700000 sys 0.000000 (best of 6) ! mixed w/ cost limit ! wall 1.157655 comb 1.150000 user 1.150000 sys 0.000000 (best of 9) $ hg perflrucachedict --size 1000 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 10000 ! gets w/ cost limit ! wall 0.598526 comb 0.600000 user 0.600000 sys 0.000000 (best of 17) ! inserts w/ cost limit ! wall 37.838315 comb 37.840000 user 37.840000 sys 0.000000 (best of 3) ! mixed w/ cost limit ! wall 18.060198 comb 18.060000 user 18.060000 sys 0.000000 (best of 3) $ hg perflrucachedict --size 1000 --gets 1000000 --sets 1000000 --mixed 1000000 --costlimit 10000 --mixedgetfreq 90 ! gets w/ cost limit ! wall 0.600024 comb 0.600000 user 0.600000 sys 0.000000 (best of 17) ! inserts w/ cost limit ! wall 37.154547 comb 37.120000 user 37.120000 sys 0.000000 (best of 3) ! mixed w/ cost limit ! wall 4.381602 comb 4.380000 user 4.370000 sys 0.010000 (best of 3) The functions we're benchmarking are slightly different, which could move numbers by a few milliseconds. But the slowdown on insert is too great to be explained by that. The slowness is due to insert heavy operations needing to call popoldest() repeatedly when the cache is at capacity. The next commit will address this. Differential Revision: https://phab.mercurial-scm.org/D4503
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 06 Sep 2018 14:04:46 -0700
parents 4c0683655599
children 2372284d9457
line wrap: on
line source

from __future__ import absolute_import

from .i18n import _
from . import (
    registrar,
    templatekw,
    util,
)

def tolist(val):
    """
    a convenience method to return an empty list instead of None
    """
    if val is None:
        return []
    else:
        return [val]

class namespaces(object):
    """provides an interface to register and operate on multiple namespaces. See
    the namespace class below for details on the namespace object.

    """

    _names_version = 0

    def __init__(self):
        self._names = util.sortdict()
        columns = templatekw.getlogcolumns()

        # we need current mercurial named objects (bookmarks, tags, and
        # branches) to be initialized somewhere, so that place is here
        bmknames = lambda repo: repo._bookmarks.keys()
        bmknamemap = lambda repo, name: tolist(repo._bookmarks.get(name))
        bmknodemap = lambda repo, node: repo.nodebookmarks(node)
        n = namespace("bookmarks", templatename="bookmark",
                      logfmt=columns['bookmark'],
                      listnames=bmknames,
                      namemap=bmknamemap, nodemap=bmknodemap,
                      builtin=True)
        self.addnamespace(n)

        tagnames = lambda repo: [t for t, n in repo.tagslist()]
        tagnamemap = lambda repo, name: tolist(repo._tagscache.tags.get(name))
        tagnodemap = lambda repo, node: repo.nodetags(node)
        n = namespace("tags", templatename="tag",
                      logfmt=columns['tag'],
                      listnames=tagnames,
                      namemap=tagnamemap, nodemap=tagnodemap,
                      deprecated={'tip'},
                      builtin=True)
        self.addnamespace(n)

        bnames = lambda repo: repo.branchmap().keys()
        bnamemap = lambda repo, name: tolist(repo.branchtip(name, True))
        bnodemap = lambda repo, node: [repo[node].branch()]
        n = namespace("branches", templatename="branch",
                      logfmt=columns['branch'],
                      listnames=bnames,
                      namemap=bnamemap, nodemap=bnodemap,
                      builtin=True)
        self.addnamespace(n)

    def __getitem__(self, namespace):
        """returns the namespace object"""
        return self._names[namespace]

    def __iter__(self):
        return self._names.__iter__()

    def items(self):
        return self._names.iteritems()

    iteritems = items

    def addnamespace(self, namespace, order=None):
        """register a namespace

        namespace: the name to be registered (in plural form)
        order: optional argument to specify the order of namespaces
               (e.g. 'branches' should be listed before 'bookmarks')

        """
        if order is not None:
            self._names.insert(order, namespace.name, namespace)
        else:
            self._names[namespace.name] = namespace

        # we only generate a template keyword if one does not already exist
        if namespace.name not in templatekw.keywords:
            templatekeyword = registrar.templatekeyword(templatekw.keywords)
            @templatekeyword(namespace.name, requires={'repo', 'ctx'})
            def generatekw(context, mapping):
                return templatekw.shownames(context, mapping, namespace.name)

    def singlenode(self, repo, name):
        """
        Return the 'best' node for the given name. What's best is defined
        by the namespace's singlenode() function. The first match returned by
        a namespace in the defined precedence order is used.

        Raises a KeyError if there is no such node.
        """
        for ns, v in self._names.iteritems():
            n = v.singlenode(repo, name)
            if n:
                return n
        raise KeyError(_('no such name: %s') % name)

class namespace(object):
    """provides an interface to a namespace

    Namespaces are basically generic many-to-many mapping between some
    (namespaced) names and nodes. The goal here is to control the pollution of
    jamming things into tags or bookmarks (in extension-land) and to simplify
    internal bits of mercurial: log output, tab completion, etc.

    More precisely, we define a mapping of names to nodes, and a mapping from
    nodes to names. Each mapping returns a list.

    Furthermore, each name mapping will be passed a name to lookup which might
    not be in its domain. In this case, each method should return an empty list
    and not raise an error.

    This namespace object will define the properties we need:
      'name': the namespace (plural form)
      'templatename': name to use for templating (usually the singular form
                      of the plural namespace name)
      'listnames': list of all names in the namespace (usually the keys of a
                   dictionary)
      'namemap': function that takes a name and returns a list of nodes
      'nodemap': function that takes a node and returns a list of names
      'deprecated': set of names to be masked for ordinary use
      'builtin': bool indicating if this namespace is supported by core
                 Mercurial.
    """

    def __init__(self, name, templatename=None, logname=None, colorname=None,
                 logfmt=None, listnames=None, namemap=None, nodemap=None,
                 deprecated=None, builtin=False, singlenode=None):
        """create a namespace

        name: the namespace to be registered (in plural form)
        templatename: the name to use for templating
        logname: the name to use for log output; if not specified templatename
                 is used
        colorname: the name to use for colored log output; if not specified
                   logname is used
        logfmt: the format to use for (i18n-ed) log output; if not specified
                it is composed from logname
        listnames: function to list all names
        namemap: function that inputs a name, output node(s)
        nodemap: function that inputs a node, output name(s)
        deprecated: set of names to be masked for ordinary use
        builtin: whether namespace is implemented by core Mercurial
        singlenode: function that inputs a name, output best node (or None)
        """
        self.name = name
        self.templatename = templatename
        self.logname = logname
        self.colorname = colorname
        self.logfmt = logfmt
        self.listnames = listnames
        self.namemap = namemap
        self.nodemap = nodemap
        if singlenode:
            self.singlenode = singlenode

        # if logname is not specified, use the template name as backup
        if self.logname is None:
            self.logname = self.templatename

        # if colorname is not specified, just use the logname as a backup
        if self.colorname is None:
            self.colorname = self.logname

        # if logfmt is not specified, compose it from logname as backup
        if self.logfmt is None:
            # i18n: column positioning for "hg log"
            self.logfmt = ("%s:" % self.logname).ljust(13) + "%s\n"

        if deprecated is None:
            self.deprecated = set()
        else:
            self.deprecated = deprecated

        self.builtin = builtin

    def names(self, repo, node):
        """method that returns a (sorted) list of names in a namespace that
        match a given node"""
        return sorted(self.nodemap(repo, node))

    def nodes(self, repo, name):
        """method that returns a list of nodes in a namespace that
        match a given name.

        """
        return sorted(self.namemap(repo, name))

    def singlenode(self, repo, name):
        """returns the best node for the given name

        By default, the best node is the node from nodes() with the highest
        revision number. It can be overriden by the namespace."""
        n = self.namemap(repo, name)
        if n:
            # return max revision number
            if len(n) > 1:
                cl = repo.changelog
                maxrev = max(cl.rev(node) for node in n)
                return cl.node(maxrev)
            return n[0]
        return None