mercurial/demandimport.py
author Martin von Zweigbergk <martinvonz@google.com>
Thu, 19 Mar 2015 11:08:42 -0700
changeset 24401 e6e023d57e94
parent 23643 2205d00b6d2b
child 25327 2e7804110b14
permissions -rw-r--r--
treemanifest: create treemanifest class There are a number of problems with large and flat manifests. Copying from http://mercurial.selenic.com/wiki/ManifestShardingPlan: * manifest too large for RAM * manifest resolution too much CPU (long delta chains) * committing is slow because entire manifest has to be hashed * impossible for narrow clone to leave out part of manifest as all is needed to calculate new hash * diffing two revisions involves traversing entire subdirectories even if identical This is a first step in a series introducing a manifest revlog per directory. This change adds a new manifest class: treemanifest, which is a tree where each node has a dict of files (nodeids), a dict of flags, and a dict of subdirectories (treemanifests). So far, it behaves just like manifestdict, but it will later help us write one manifest revlog per directory. The new class is still unused; it will be used after the next change. The code is not yet optimized. Running with it (see below) makes most or all operations slower. Once we start storing manifest revlogs for every directory, it should be possible to make many of these operations much faster. The fastdelta() optimization has been intentionally not implemented for the treemanifests. We can implement it later if necessary. All tests pass when run with the following patch (and without, of couse): --- a/mercurial/manifest.py Thu Mar 19 11:08:42 2015 -0700 +++ b/mercurial/manifest.py Thu Mar 19 11:15:50 2015 -0700 @@ -596,7 +596,7 @@ class manifest(revlog.revlog): return None, None def add(self, m, transaction, link, p1, p2, added, removed): - if p1 in self._mancache: + if False and p1 in self._mancache: # If our first parent is in the manifest cache, we can # compute a delta here using properties we know about the # manifest up-front, which may save time later for the @@ -626,3 +626,5 @@ class manifest(revlog.revlog): self._mancache[n] = (m, arraytext) return n + +manifestdict = treemanifest

# demandimport.py - global demand-loading of modules for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''
demandimport - automatic demandloading of modules

To enable this module, do:

  import demandimport; demandimport.enable()

Imports of the following forms will be demand-loaded:

  import a, b.c
  import a.b as c
  from a import b,c # a will be loaded immediately

These imports will not be delayed:

  from a import *
  b = __import__(a)
'''

import __builtin__, os, sys
_origimport = __import__

nothing = object()

try:
    # Python 3 doesn't have relative imports nor level -1.
    level = -1
    if sys.version_info[0] >= 3:
        level = 0
    _origimport(__builtin__.__name__, {}, {}, None, level)
except TypeError: # no level argument
    def _import(name, globals, locals, fromlist, level):
        "call _origimport with no level argument"
        return _origimport(name, globals, locals, fromlist)
else:
    _import = _origimport

def _hgextimport(importfunc, name, globals, *args):
    try:
        return importfunc(name, globals, *args)
    except ImportError:
        if not globals:
            raise
        # extensions are loaded with "hgext_" prefix
        hgextname = 'hgext_%s' % name
        nameroot = hgextname.split('.', 1)[0]
        contextroot = globals.get('__name__', '').split('.', 1)[0]
        if nameroot != contextroot:
            raise
        # retry to import with "hgext_" prefix
        return importfunc(hgextname, globals, *args)

class _demandmod(object):
    """module demand-loader and proxy"""
    def __init__(self, name, globals, locals, level=level):
        if '.' in name:
            head, rest = name.split('.', 1)
            after = [rest]
        else:
            head = name
            after = []
        object.__setattr__(self, "_data",
                           (head, globals, locals, after, level))
        object.__setattr__(self, "_module", None)
    def _extend(self, name):
        """add to the list of submodules to load"""
        self._data[3].append(name)
    def _load(self):
        if not self._module:
            head, globals, locals, after, level = self._data
            mod = _hgextimport(_import, head, globals, locals, None, level)
            # load submodules
            def subload(mod, p):
                h, t = p, None
                if '.' in p:
                    h, t = p.split('.', 1)
                if getattr(mod, h, nothing) is nothing:
                    setattr(mod, h, _demandmod(p, mod.__dict__, mod.__dict__))
                elif t:
                    subload(getattr(mod, h), t)

            for x in after:
                subload(mod, x)

            # are we in the locals dictionary still?
            if locals and locals.get(head) == self:
                locals[head] = mod
            object.__setattr__(self, "_module", mod)

    def __repr__(self):
        if self._module:
            return "<proxied module '%s'>" % self._data[0]
        return "<unloaded module '%s'>" % self._data[0]
    def __call__(self, *args, **kwargs):
        raise TypeError("%s object is not callable" % repr(self))
    def __getattribute__(self, attr):
        if attr in ('_data', '_extend', '_load', '_module'):
            return object.__getattribute__(self, attr)
        self._load()
        return getattr(self._module, attr)
    def __setattr__(self, attr, val):
        self._load()
        setattr(self._module, attr, val)

def _demandimport(name, globals=None, locals=None, fromlist=None, level=level):
    if not locals or name in ignore or fromlist == ('*',):
        # these cases we can't really delay
        return _hgextimport(_import, name, globals, locals, fromlist, level)
    elif not fromlist:
        # import a [as b]
        if '.' in name: # a.b
            base, rest = name.split('.', 1)
            # email.__init__ loading email.mime
            if globals and globals.get('__name__', None) == base:
                return _import(name, globals, locals, fromlist, level)
            # if a is already demand-loaded, add b to its submodule list
            if base in locals:
                if isinstance(locals[base], _demandmod):
                    locals[base]._extend(rest)
                return locals[base]
        return _demandmod(name, globals, locals, level)
    else:
        if level != -1:
            # from . import b,c,d or from .a import b,c,d
            return _origimport(name, globals, locals, fromlist, level)
        # from a import b,c,d
        mod = _hgextimport(_origimport, name, globals, locals)
        # recurse down the module chain
        for comp in name.split('.')[1:]:
            if getattr(mod, comp, nothing) is nothing:
                setattr(mod, comp, _demandmod(comp, mod.__dict__, mod.__dict__))
            mod = getattr(mod, comp)
        for x in fromlist:
            # set requested submodules for demand load
            if getattr(mod, x, nothing) is nothing:
                setattr(mod, x, _demandmod(x, mod.__dict__, locals))
        return mod

ignore = [
    '_hashlib',
    '_xmlplus',
    'fcntl',
    'win32com.gen_py',
    '_winreg', # 2.7 mimetypes needs immediate ImportError
    'pythoncom',
    # imported by tarfile, not available under Windows
    'pwd',
    'grp',
    # imported by profile, itself imported by hotshot.stats,
    # not available under Windows
    'resource',
    # this trips up many extension authors
    'gtk',
    # setuptools' pkg_resources.py expects "from __main__ import x" to
    # raise ImportError if x not defined
    '__main__',
    '_ssl', # conditional imports in the stdlib, issue1964
    'rfc822',
    'mimetools',
    # setuptools 8 expects this module to explode early when not on windows
    'distutils.msvc9compiler'
    ]

def isenabled():
    return __builtin__.__import__ == _demandimport

def enable():
    "enable global demand-loading of modules"
    if os.environ.get('HGDEMANDIMPORT') != 'disable':
        __builtin__.__import__ = _demandimport

def disable():
    "disable global demand-loading of modules"
    __builtin__.__import__ = _origimport