view mercurial/formatter.py @ 23785:cb99bacb9b4e

branchcache: introduce revbranchcache for caching of revision branch names It is expensive to retrieve the branch name of a revision. Very expensive when creating a changectx and calling .branch() every time - slightly less when using changelog.branchinfo(). Now, to speed things up, provide a way to cache the results on disk in an efficient format. Each branchname is assigned a number, and for each revision we store the number of the corresponding branch name. The branch names are stored in a dedicated file which is strictly append only. Branch names are usually reused across several revisions, and the total list of branch names will thus be so small that it is feasible to read the whole set of names before using the cache. It will however do that it might be more efficient to use the changelog for retrieving the branch info for a single revision. The revision entries are stored in another file. This file is usually append only, but if the repository has been modified, the file will be truncated and the relevant parts rewritten on demand. The entries for each revision are 8 bytes each, and the whole revision file will thus be 1/8 of 00changelog.i. Each revision entry contains the first 4 bytes of the corresponding node hash. This is used as a check sum that always is verified before the entry is used. That check is relatively expensive but it makes sure history modification is detected and handled correctly. It will also detect and handle most revision file corruptions. This is just a cache. A new format can always be introduced if other requirements or ideas make that seem like a good idea. Rebuilding the cache is not really more expensive than it was to run for example 'hg log -b branchname' before this cache was introduced. This new method is still unused but promise to make some operations several times faster once it actually is used. Abandoning Python 2.4 would make it possible to implement this more efficiently by using struct classes and pack_into. The Python code could probably also be micro optimized or it could be implemented very efficiently in C where it would be easy to control the data access.
author Mads Kiilerich <madski@unity3d.com>
date Thu, 08 Jan 2015 00:01:03 +0100
parents cb28d2b3db0b
children 0a714a1f7d5c
line wrap: on
line source

# formatter.py - generic output formatting for mercurial
#
# Copyright 2012 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import cPickle
from node import hex, short
from i18n import _
import encoding, util

class baseformatter(object):
    def __init__(self, ui, topic, opts):
        self._ui = ui
        self._topic = topic
        self._style = opts.get("style")
        self._template = opts.get("template")
        self._item = None
        # function to convert node to string suitable for this output
        self.hexfunc = hex
    def __nonzero__(self):
        '''return False if we're not doing real templating so we can
        skip extra work'''
        return True
    def _showitem(self):
        '''show a formatted item once all data is collected'''
        pass
    def startitem(self):
        '''begin an item in the format list'''
        if self._item is not None:
            self._showitem()
        self._item = {}
    def data(self, **data):
        '''insert data into item that's not shown in default output'''
        self._item.update(data)
    def write(self, fields, deftext, *fielddata, **opts):
        '''do default text output while assigning data to item'''
        for k, v in zip(fields.split(), fielddata):
            self._item[k] = v
    def condwrite(self, cond, fields, deftext, *fielddata, **opts):
        '''do conditional write (primarily for plain formatter)'''
        for k, v in zip(fields.split(), fielddata):
            self._item[k] = v
    def plain(self, text, **opts):
        '''show raw text for non-templated mode'''
        pass
    def end(self):
        '''end output for the formatter'''
        if self._item is not None:
            self._showitem()

class plainformatter(baseformatter):
    '''the default text output scheme'''
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        if ui.debugflag:
            self.hexfunc = hex
        else:
            self.hexfunc = short
    def __nonzero__(self):
        return False
    def startitem(self):
        pass
    def data(self, **data):
        pass
    def write(self, fields, deftext, *fielddata, **opts):
        self._ui.write(deftext % fielddata, **opts)
    def condwrite(self, cond, fields, deftext, *fielddata, **opts):
        '''do conditional write'''
        if cond:
            self._ui.write(deftext % fielddata, **opts)
    def plain(self, text, **opts):
        self._ui.write(text, **opts)
    def end(self):
        pass

class debugformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._ui.write("%s = [\n" % self._topic)
    def _showitem(self):
        self._ui.write("    " + repr(self._item) + ",\n")
    def end(self):
        baseformatter.end(self)
        self._ui.write("]\n")

class pickleformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._data = []
    def _showitem(self):
        self._data.append(self._item)
    def end(self):
        baseformatter.end(self)
        self._ui.write(cPickle.dumps(self._data))

def _jsonifyobj(v):
    if isinstance(v, tuple):
        return '[' + ', '.join(_jsonifyobj(e) for e in v) + ']'
    elif v is True:
        return 'true'
    elif v is False:
        return 'false'
    elif isinstance(v, (int, float)):
        return str(v)
    else:
        return '"%s"' % encoding.jsonescape(v)

class jsonformatter(baseformatter):
    def __init__(self, ui, topic, opts):
        baseformatter.__init__(self, ui, topic, opts)
        self._ui.write("[")
        self._ui._first = True
    def _showitem(self):
        if self._ui._first:
            self._ui._first = False
        else:
            self._ui.write(",")

        self._ui.write("\n {\n")
        first = True
        for k, v in sorted(self._item.items()):
            if first:
                first = False
            else:
                self._ui.write(",\n")
            self._ui.write('  "%s": %s' % (k, _jsonifyobj(v)))
        self._ui.write("\n }")
    def end(self):
        baseformatter.end(self)
        self._ui.write("\n]\n")

def formatter(ui, topic, opts):
    template = opts.get("template", "")
    if template == "json":
        return jsonformatter(ui, topic, opts)
    elif template == "pickle":
        return pickleformatter(ui, topic, opts)
    elif template == "debug":
        return debugformatter(ui, topic, opts)
    elif template != "":
        raise util.Abort(_("custom templates not yet supported"))
    elif ui.configbool('ui', 'formatdebug'):
        return debugformatter(ui, topic, opts)
    elif ui.configbool('ui', 'formatjson'):
        return jsonformatter(ui, topic, opts)
    return plainformatter(ui, topic, opts)