mercurial/formatter.py
author Gregory Szorc <gregory.szorc@gmail.com>
Mon, 22 Aug 2016 21:48:50 -0700
changeset 29830 92ac2baaea86
parent 29794 4891f3b93182
child 29836 18bac830eef3
permissions -rw-r--r--
revlog: use an LRU cache for delta chain bases Profiling using statprof revealed a hotspot during changegroup application calculating delta chain bases on generaldelta repos. Essentially, revlog._addrevision() was performing a lot of redundant work tracing the delta chain as part of determining when the chain distance was acceptable. This was most pronounced when adding revisions to manifests, which can have delta chains thousands of revisions long. There was a delta chain base cache on revlogs before, but it only captured a single revision. This was acceptable before generaldelta, when _addrevision would build deltas from the previous revision and thus we'd pretty much guarantee a cache hit when resolving the delta chain base on a subsequent _addrevision call. However, it isn't suitable for generaldelta because parent revisions aren't necessarily the last processed revision. This patch converts the delta chain base cache to an LRU dict cache. The cache can hold multiple entries, so generaldelta repos have a higher chance of getting a cache hit. The impact of this change when processing changegroup additions is significant. On a generaldelta conversion of the "mozilla-unified" repo (which contains heads of the main Firefox repositories in chronological order - this means there are lots of transitions between heads in revlog order), this change has the following impact when performing an `hg unbundle` of an uncompressed bundle of the repo: before: 5:42 CPU time after: 4:34 CPU time Most of this time is saved when applying the changelog and manifest revlogs: before: 2:30 CPU time after: 1:17 CPU time That nearly a 50% reduction in CPU time applying changesets and manifests! Applying a gzipped bundle of the same repo (effectively simulating a `hg clone` over HTTP) showed a similar speedup: before: 5:53 CPU time after: 4:46 CPU time Wall time improvements were basically the same as CPU time. I didn't measure explicitly, but it feels like most of the time is saved when processing manifests. This makes sense, as large manifests tend to have very long delta chains and thus benefit the most from this cache. So, this change effectively makes changegroup application (which is used by `hg unbundle`, `hg clone`, `hg pull`, `hg unshelve`, and various other commands) significantly faster when delta chains are long (which can happen on repos with large numbers of files and thus large manifests). In theory, this change can result in more memory utilization. However, we're caching a dict of ints. At most we have 200 ints + Python object overhead per revlog. And, the cache is really only populated when performing read-heavy operations, such as adding changegroups or scanning an individual revlog. For memory bloat to be an issue, we'd need to scan/read several revisions from several revlogs all while having active references to several revlogs. I don't think there are many operations that do this, so I don't think memory bloat from the cache will be an issue.

# formatter.py - generic output formatting for mercurial
#
# Copyright 2012 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import os

from .i18n import _
from .node import (
    hex,
    short,
)

from . import (
    encoding,
    error,
    templatekw,
    templater,
    util,
)

pickle = util.pickle

class baseformatter(object):
    def __init__(self, ui, topic, opts):
        self._ui = ui
        self._topic = topic
        self._style = opts.get("style")
        self._template = opts.get("template")
        self._item = None
        # function to convert node to string suitable for this output
        self.hexfunc = hex
    def __nonzero__(self):
        '''return False if we're not doing real templating so we can
        skip extra work'''
        return True
    def _showitem(self):
        '''show a formatted item once all data is collected'''
        pass
    def startitem(self):
        '''begin an item in the format list'''
        if self._item is not None:
            self._showitem()
        self._item = {}
    @staticmethod
    def formatdate(date, fmt='%a %b %d %H:%M:%S %Y %1%2'):
        '''convert date tuple to appropriate format'''
        return date
    @staticmethod
    def formatdict(data, key='key', value='value', fmt='%s=%s', sep=' '):
        '''convert dict or key-value pairs to appropriate dict format'''
        # use plain dict instead of util.sortdict so that data can be
        # serialized as a builtin dict in pickle output
        return dict(data)
    @staticmethod
    def formatlist(data, name, fmt='%s', sep=' '):
        '''convert iterable to appropriate list format'''
        return list(data)
    def data(self, **data):
        '''insert data into item that's not shown in default output'''
        self._item.update(data)
    def write(self, fields, deftext, *fielddata, **opts):
        '''do default text output while assigning data to item'''
        fieldkeys = fields.split()
        assert len(fieldkeys) == len(fielddata)
        self._item.update(zip(fieldkeys, fielddata))
    def condwrite(self, cond, fields, deftext, *fielddata, **opts):
        '''do conditional write (primarily for plain formatter)'''
        fieldkeys = fields.split()
        assert len(fieldkeys) == len(fielddata)
        self._item.update(zip(fieldkeys, fielddata))
    def plain(self, text, **opts):
        '''show raw text for non-templated mode'''
        pass
    def end(self):
        '''end output for the formatter'''
        if self._item is not None:
            self._showitem()

def _iteritems(data):
    '''iterate key-value pairs in stable order'''
    if isinstance(data, dict):
        return sorted(data.iteritems())
    return data

class plainformatter(baseformatter):
    '''the default text output scheme'''
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        if ui.debugflag:
            self.hexfunc = hex
        else:
            self.hexfunc = short
    def __nonzero__(self):
        return False
    def startitem(self):
        pass
    @staticmethod
    def formatdate(date, fmt='%a %b %d %H:%M:%S %Y %1%2'):
        '''stringify date tuple in the given format'''
        return util.datestr(date, fmt)
    @staticmethod
    def formatdict(data, key='key', value='value', fmt='%s=%s', sep=' '):
        '''stringify key-value pairs separated by sep'''
        return sep.join(fmt % (k, v) for k, v in _iteritems(data))
    @staticmethod
    def formatlist(data, name, fmt='%s', sep=' '):
        '''stringify iterable separated by sep'''
        return sep.join(fmt % e for e in data)
    def data(self, **data):
        pass
    def write(self, fields, deftext, *fielddata, **opts):
        self._ui.write(deftext % fielddata, **opts)
    def condwrite(self, cond, fields, deftext, *fielddata, **opts):
        '''do conditional write'''
        if cond:
            self._ui.write(deftext % fielddata, **opts)
    def plain(self, text, **opts):
        self._ui.write(text, **opts)
    def end(self):
        pass

class debugformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._ui.write("%s = [\n" % self._topic)
    def _showitem(self):
        self._ui.write("    " + repr(self._item) + ",\n")
    def end(self):
        baseformatter.end(self)
        self._ui.write("]\n")

class pickleformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._data = []
    def _showitem(self):
        self._data.append(self._item)
    def end(self):
        baseformatter.end(self)
        self._ui.write(pickle.dumps(self._data))

def _jsonifyobj(v):
    if isinstance(v, dict):
        xs = ['"%s": %s' % (encoding.jsonescape(k), _jsonifyobj(u))
              for k, u in sorted(v.iteritems())]
        return '{' + ', '.join(xs) + '}'
    elif isinstance(v, (list, tuple)):
        return '[' + ', '.join(_jsonifyobj(e) for e in v) + ']'
    elif v is None:
        return 'null'
    elif v is True:
        return 'true'
    elif v is False:
        return 'false'
    elif isinstance(v, (int, float)):
        return str(v)
    else:
        return '"%s"' % encoding.jsonescape(v)

class jsonformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._ui.write("[")
        self._ui._first = True
    def _showitem(self):
        if self._ui._first:
            self._ui._first = False
        else:
            self._ui.write(",")

        self._ui.write("\n {\n")
        first = True
        for k, v in sorted(self._item.items()):
            if first:
                first = False
            else:
                self._ui.write(",\n")
            self._ui.write('  "%s": %s' % (k, _jsonifyobj(v)))
        self._ui.write("\n }")
    def end(self):
        baseformatter.end(self)
        self._ui.write("\n]\n")

class templateformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._topic = topic
        self._t = gettemplater(ui, topic, opts.get('template', ''))
    def _showitem(self):
        g = self._t(self._topic, ui=self._ui, **self._item)
        self._ui.write(templater.stringify(g))
    @staticmethod
    def formatdict(data, key='key', value='value', fmt='%s=%s', sep=' '):
        '''build object that can be evaluated as either plain string or dict'''
        data = util.sortdict(_iteritems(data))
        def f():
            yield plainformatter.formatdict(data, key, value, fmt, sep)
        return templatekw._hybrid(f(), data, lambda k: {key: k, value: data[k]},
                                  lambda d: fmt % (d[key], d[value]))
    @staticmethod
    def formatlist(data, name, fmt='%s', sep=' '):
        '''build object that can be evaluated as either plain string or list'''
        # name is mandatory argument for now, but it could be optional if
        # we have default template keyword, e.g. {item}
        data = list(data)
        def f():
            yield plainformatter.formatlist(data, name, fmt, sep)
        return templatekw._hybrid(f(), data, lambda x: {name: x},
                                  lambda d: fmt % d[name])

def lookuptemplate(ui, topic, tmpl):
    # looks like a literal template?
    if '{' in tmpl:
        return tmpl, None

    # perhaps a stock style?
    if not os.path.split(tmpl)[0]:
        mapname = (templater.templatepath('map-cmdline.' + tmpl)
                   or templater.templatepath(tmpl))
        if mapname and os.path.isfile(mapname):
            return None, mapname

    # perhaps it's a reference to [templates]
    t = ui.config('templates', tmpl)
    if t:
        return templater.unquotestring(t), None

    if tmpl == 'list':
        ui.write(_("available styles: %s\n") % templater.stylelist())
        raise error.Abort(_("specify a template"))

    # perhaps it's a path to a map or a template
    if ('/' in tmpl or '\\' in tmpl) and os.path.isfile(tmpl):
        # is it a mapfile for a style?
        if os.path.basename(tmpl).startswith("map-"):
            return None, os.path.realpath(tmpl)
        tmpl = open(tmpl).read()
        return tmpl, None

    # constant string?
    return tmpl, None

def gettemplater(ui, topic, spec):
    tmpl, mapfile = lookuptemplate(ui, topic, spec)
    assert not (tmpl and mapfile)
    if mapfile:
        return templater.templater.frommapfile(mapfile)
    return maketemplater(ui, topic, tmpl)

def maketemplater(ui, topic, tmpl, filters=None, cache=None):
    """Create a templater from a string template 'tmpl'"""
    aliases = ui.configitems('templatealias')
    t = templater.templater(filters=filters, cache=cache, aliases=aliases)
    if tmpl:
        t.cache[topic] = tmpl
    return t

def formatter(ui, topic, opts):
    template = opts.get("template", "")
    if template == "json":
        return jsonformatter(ui, topic, opts)
    elif template == "pickle":
        return pickleformatter(ui, topic, opts)
    elif template == "debug":
        return debugformatter(ui, topic, opts)
    elif template != "":
        return templateformatter(ui, topic, opts)
    # developer config: ui.formatdebug
    elif ui.configbool('ui', 'formatdebug'):
        return debugformatter(ui, topic, opts)
    # deprecated config: ui.formatjson
    elif ui.configbool('ui', 'formatjson'):
        return jsonformatter(ui, topic, opts)
    return plainformatter(ui, topic, opts)