dirstate: avoid a race with multiple commits in the same process
(issue2264, issue2516)
The race happens when two commits in a row change the same file
without changing its size, *if* those two commits happen in the same
second in the same process while holding the same repo lock. For
example:
commit 1:
M a
M b
commit 2: # same process, same second, same repo lock
M b # modify b without changing its size
M c
This first manifested in transplant, which is the most common way to
do multiple commits in the same process. But it can manifest in any
script or extension that does multiple commits under the same repo
lock. (Thus, the test script tests both transplant and a custom script.)
The problem was that dirstate.status() failed to notice the change to
b when localrepo is about to do the second commit, meaning that change
gets left in the working directory. In the context of transplant, that
means either a crash ("RuntimeError: nothing committed after
transplant") or a silently inaccurate transplant, depending on whether
any other files were modified by the second transplanted changeset.
The fix is to make status() work a little harder when we have
previously marked files as clean (state 'normal') in the same process.
Specifically, dirstate.normal() adds files to self._lastnormal, and
other state-changing methods remove them. Then dirstate.status() puts
any files in self._lastnormal into state 'lookup', which will make
localrepository.status() read file contents to see if it has really
changed. So we pay a small performance penalty for the second (and
subsequent) commits in the same process, without affecting the common
case. Anything that does lots of status updates and checks in the
same process could suffer a performance hit.
Incidentally, there is a simpler fix: call dirstate.normallookup() on
every file updated by commit() at the end of the commit. The trouble
with that solution is that it imposes a performance penalty on the
common case: it means the next status-dependent hg command after every
"hg commit" will be a little bit slower. The patch here is more
complex, but only affects performance for the uncommon case.
# extensions.py - extension handling for mercurial
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import imp, os
import util, cmdutil, help, error
from i18n import _, gettext
_extensions = {}
_order = []
_ignore = ['hbisect', 'bookmarks']
def extensions():
for name in _order:
module = _extensions[name]
if module:
yield name, module
def find(name):
'''return module with given extension name'''
try:
return _extensions[name]
except KeyError:
for k, v in _extensions.iteritems():
if k.endswith('.' + name) or k.endswith('/' + name):
return v
raise KeyError(name)
def loadpath(path, module_name):
module_name = module_name.replace('.', '_')
path = util.expandpath(path)
if os.path.isdir(path):
# module/__init__.py style
d, f = os.path.split(path.rstrip('/'))
fd, fpath, desc = imp.find_module(f, [d])
return imp.load_module(module_name, fd, fpath, desc)
else:
return imp.load_source(module_name, path)
def load(ui, name, path):
# unused ui argument kept for backwards compatibility
if name.startswith('hgext.') or name.startswith('hgext/'):
shortname = name[6:]
else:
shortname = name
if shortname in _ignore:
return None
if shortname in _extensions:
return _extensions[shortname]
_extensions[shortname] = None
if path:
# the module will be loaded in sys.modules
# choose an unique name so that it doesn't
# conflicts with other modules
mod = loadpath(path, 'hgext.%s' % name)
else:
def importh(name):
mod = __import__(name)
components = name.split('.')
for comp in components[1:]:
mod = getattr(mod, comp)
return mod
try:
mod = importh("hgext.%s" % name)
except ImportError:
mod = importh(name)
_extensions[shortname] = mod
_order.append(shortname)
return mod
def loadall(ui):
result = ui.configitems("extensions")
newindex = len(_order)
for (name, path) in result:
if path:
if path[0] == '!':
continue
try:
load(ui, name, path)
except KeyboardInterrupt:
raise
except Exception, inst:
if path:
ui.warn(_("*** failed to import extension %s from %s: %s\n")
% (name, path, inst))
else:
ui.warn(_("*** failed to import extension %s: %s\n")
% (name, inst))
if ui.traceback():
return 1
for name in _order[newindex:]:
uisetup = getattr(_extensions[name], 'uisetup', None)
if uisetup:
uisetup(ui)
for name in _order[newindex:]:
extsetup = getattr(_extensions[name], 'extsetup', None)
if extsetup:
try:
extsetup(ui)
except TypeError:
if extsetup.func_code.co_argcount != 0:
raise
extsetup() # old extsetup with no ui argument
def wrapcommand(table, command, wrapper):
'''Wrap the command named `command' in table
Replace command in the command table with wrapper. The wrapped command will
be inserted into the command table specified by the table argument.
The wrapper will be called like
wrapper(orig, *args, **kwargs)
where orig is the original (wrapped) function, and *args, **kwargs
are the arguments passed to it.
'''
assert hasattr(wrapper, '__call__')
aliases, entry = cmdutil.findcmd(command, table)
for alias, e in table.iteritems():
if e is entry:
key = alias
break
origfn = entry[0]
def wrap(*args, **kwargs):
return util.checksignature(wrapper)(
util.checksignature(origfn), *args, **kwargs)
wrap.__doc__ = getattr(origfn, '__doc__')
wrap.__module__ = getattr(origfn, '__module__')
newentry = list(entry)
newentry[0] = wrap
table[key] = tuple(newentry)
return entry
def wrapfunction(container, funcname, wrapper):
'''Wrap the function named funcname in container
Replace the funcname member in the given container with the specified
wrapper. The container is typically a module, class, or instance.
The wrapper will be called like
wrapper(orig, *args, **kwargs)
where orig is the original (wrapped) function, and *args, **kwargs
are the arguments passed to it.
Wrapping methods of the repository object is not recommended since
it conflicts with extensions that extend the repository by
subclassing. All extensions that need to extend methods of
localrepository should use this subclassing trick: namely,
reposetup() should look like
def reposetup(ui, repo):
class myrepo(repo.__class__):
def whatever(self, *args, **kwargs):
[...extension stuff...]
super(myrepo, self).whatever(*args, **kwargs)
[...extension stuff...]
repo.__class__ = myrepo
In general, combining wrapfunction() with subclassing does not
work. Since you cannot control what other extensions are loaded by
your end users, you should play nicely with others by using the
subclass trick.
'''
assert hasattr(wrapper, '__call__')
def wrap(*args, **kwargs):
return wrapper(origfn, *args, **kwargs)
origfn = getattr(container, funcname)
assert hasattr(origfn, '__call__')
setattr(container, funcname, wrap)
return origfn
def _disabledpaths(strip_init=False):
'''find paths of disabled extensions. returns a dict of {name: path}
removes /__init__.py from packages if strip_init is True'''
import hgext
extpath = os.path.dirname(os.path.abspath(hgext.__file__))
try: # might not be a filesystem path
files = os.listdir(extpath)
except OSError:
return {}
exts = {}
for e in files:
if e.endswith('.py'):
name = e.rsplit('.', 1)[0]
path = os.path.join(extpath, e)
else:
name = e
path = os.path.join(extpath, e, '__init__.py')
if not os.path.exists(path):
continue
if strip_init:
path = os.path.dirname(path)
if name in exts or name in _order or name == '__init__':
continue
exts[name] = path
return exts
def _disabledhelp(path):
'''retrieve help synopsis of a disabled extension (without importing)'''
try:
file = open(path)
except IOError:
return
else:
doc = help.moduledoc(file)
file.close()
if doc: # extracting localized synopsis
return gettext(doc).splitlines()[0]
else:
return _('(no help text available)')
def disabled():
'''find disabled extensions from hgext
returns a dict of {name: desc}, and the max name length'''
paths = _disabledpaths()
if not paths:
return None, 0
exts = {}
maxlength = 0
for name, path in paths.iteritems():
doc = _disabledhelp(path)
if not doc:
continue
exts[name] = doc
if len(name) > maxlength:
maxlength = len(name)
return exts, maxlength
def disabledext(name):
'''find a specific disabled extension from hgext. returns desc'''
paths = _disabledpaths()
if name in paths:
return _disabledhelp(paths[name])
def disabledcmd(ui, cmd, strict=False):
'''import disabled extensions until cmd is found.
returns (cmdname, extname, doc)'''
paths = _disabledpaths(strip_init=True)
if not paths:
raise error.UnknownCommand(cmd)
def findcmd(cmd, name, path):
try:
mod = loadpath(path, 'hgext.%s' % name)
except Exception:
return
try:
aliases, entry = cmdutil.findcmd(cmd,
getattr(mod, 'cmdtable', {}), strict)
except (error.AmbiguousCommand, error.UnknownCommand):
return
except Exception:
ui.warn(_('warning: error finding commands in %s\n') % path)
ui.traceback()
return
for c in aliases:
if c.startswith(cmd):
cmd = c
break
else:
cmd = aliases[0]
return (cmd, name, mod)
# first, search for an extension with the same name as the command
path = paths.pop(cmd, None)
if path:
ext = findcmd(cmd, cmd, path)
if ext:
return ext
# otherwise, interrogate each extension until there's a match
for name, path in paths.iteritems():
ext = findcmd(cmd, name, path)
if ext:
return ext
raise error.UnknownCommand(cmd)
def enabled():
'''return a dict of {name: desc} of extensions, and the max name length'''
exts = {}
maxlength = 0
for ename, ext in extensions():
doc = (gettext(ext.__doc__) or _('(no help text available)'))
ename = ename.split('.')[-1]
maxlength = max(len(ename), maxlength)
exts[ename] = doc.splitlines()[0].strip()
return exts, maxlength