hg-stable: mercurial/transaction.py@3c0983cc279e

i18n: cache the result of every gettext call In looking at profiler output for 'hg log' on mozilla-central, I noticed we spent a _huge_ amount of time in gettext relative to what it's doing. Caching provides a roughly 15% performance improvement even on repositories as small as hg. == hg repo on linux == Before: % cumulative self time seconds seconds name 5.05 0.19 0.19 i18n.py:62:gettext 4.84 0.18 0.18 revlog.py:88:decompress 2.95 0.17 0.11 changelog.py:201:node 2.32 0.09 0.09 ui.py:577:write 2.11 0.08 0.08 i18n.py:72:gettext 2.11 0.08 0.08 obsolete.py:196:_fm0readmarkers 1.89 0.07 0.07 obsolete.py:569:_load 1.68 0.63 0.06 localrepo.py:29:__get__ real 0m4.026s user 0m3.993s sys 0m0.034s After: % cumulative self time seconds seconds name 8.05 0.26 0.26 revlog.py:88:decompress 2.68 0.22 0.09 color.py:395:write 2.20 0.07 0.07 obsolete.py:196:_fm0readmarkers 1.95 0.06 0.06 obsolete.py:174:_fm0readmarkers 1.95 0.06 0.06 ui.py:577:write 1.95 0.06 0.06 util.py:1228:datestr 1.71 0.06 0.06 utf_8.py:16:decode 1.71 0.06 0.06 revlog.py:273:__len__ real 0m3.519s user 0m3.447s sys 0m0.073s == mozilla-central repo on linux == Before: % cumulative self time seconds seconds name 7.72 2.35 2.35 revlog.py:88:decompress 4.46 1.36 1.36 i18n.py:62:gettext 2.22 0.67 0.67 i18n.py:72:gettext 2.19 1.14 0.67 changelog.py:201:node 2.16 0.66 0.66 ui.py:577:write 1.96 0.60 0.60 utf_8.py:16:decode 1.93 1.97 0.59 color.py:395:write 1.85 0.81 0.56 changelog.py:136:tip real 0m30.822s user 0m30.660s sys 0m0.149s After: % cumulative self time seconds seconds name 9.82 2.49 2.49 revlog.py:88:decompress 2.67 1.31 0.68 localrepo.py:29:__get__ 2.57 0.65 0.65 utf_8.py:16:decode 2.48 1.01 0.63 changelog.py:201:node 2.10 0.82 0.53 changelog.py:136:tip 2.01 0.51 0.51 ui.py:577:write 1.91 0.49 0.49 util.py:1232:datestr 1.85 1.65 0.47 color.py:395:write real 0m25.619s user 0m25.446s sys 0m0.166s == cpython repo on os x = Before: % cumulative self time seconds seconds name 5.05 1.35 1.35 cmdutil.py:982:_show 4.59 1.22 1.22 revlog.py:274:__len__ 3.98 1.06 1.06 i18n.py:62:gettext 3.91 1.04 1.04 revlog.py:1016:revision 3.68 0.98 0.98 revlog.py:337:parents 3.45 0.92 0.92 revlog.py:88:decompress 2.91 0.78 0.78 revlog.py:309:rev 2.62 0.70 0.70 revlog.py:1033:revision real 0m30.414s user 0m28.145s sys 0m0.541s After: % cumulative self time seconds seconds name 7.98 1.66 1.66 cmdutil.py:982:_show 6.83 1.42 1.42 changelog.py:46:decodeextra 5.18 1.08 1.08 revlog.py:274:__len__ 3.94 0.82 0.82 revlog.py:1016:revision 3.41 0.71 0.71 revlog.py:309:rev 3.32 0.69 0.69 revlog.py:88:decompress 2.99 0.63 0.62 revlog.py:1033:revision 2.69 0.56 0.56 revlog.py:341:start real 0m22.811s user 0m21.883s sys 0m0.397s


# transaction.py - simple journaling scheme for mercurial
#
# This transaction scheme is intended to gracefully handle program
# errors and interruptions. More serious failures like system crashes
# can be recovered with an fsck-like tool. As the whole repository is
# effectively log-structured, this should amount to simply truncating
# anything that isn't referenced in the changelog.
#
# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from i18n import _
import errno
import error, util

def active(func):
    def _active(self, *args, **kwds):
        if self.count == 0:
            raise error.Abort(_(
                'cannot use transaction when it is already committed/aborted'))
        return func(self, *args, **kwds)
    return _active

def _playback(journal, report, opener, entries, backupentries, unlink=True):
    for f, o, _ignore in entries:
        if o or not unlink:
            try:
                fp = opener(f, 'a')
                fp.truncate(o)
                fp.close()
            except IOError:
                report(_("failed to truncate %s\n") % f)
                raise
        else:
            try:
                opener.unlink(f)
            except (IOError, OSError), inst:
                if inst.errno != errno.ENOENT:
                    raise

    backupfiles = []
    for f, b, _ignore in backupentries:
        filepath = opener.join(f)
        backuppath = opener.join(b)
        try:
            util.copyfile(backuppath, filepath)
            backupfiles.append(b)
        except IOError:
            report(_("failed to recover %s\n") % f)
            raise

    opener.unlink(journal)
    backuppath = "%s.backupfiles" % journal
    if opener.exists(backuppath):
        opener.unlink(backuppath)
    for f in backupfiles:
        opener.unlink(f)

class transaction(object):
    def __init__(self, report, opener, journal, after=None, createmode=None,
            onclose=None, onabort=None):
        """Begin a new transaction

        Begins a new transaction that allows rolling back writes in the event of
        an exception.

        * `after`: called after the transaction has been committed
        * `createmode`: the mode of the journal file that will be created
        * `onclose`: called as the transaction is closing, but before it is
        closed
        * `onabort`: called as the transaction is aborting, but before any files
        have been truncated
        """
        self.count = 1
        self.usages = 1
        self.report = report
        self.opener = opener
        self.after = after
        self.onclose = onclose
        self.onabort = onabort
        self.entries = []
        self.backupentries = []
        self.map = {}
        self.backupmap = {}
        self.journal = journal
        self._queue = []
        # a dict of arguments to be passed to hooks
        self.hookargs = {}

        self.backupjournal = "%s.backupfiles" % journal
        self.file = opener.open(self.journal, "w")
        self.backupsfile = opener.open(self.backupjournal, 'w')
        if createmode is not None:
            opener.chmod(self.journal, createmode & 0666)
            opener.chmod(self.backupjournal, createmode & 0666)

        # hold file generations to be performed on commit
        self._filegenerators = {}

    def __del__(self):
        if self.journal:
            self._abort()

    @active
    def startgroup(self):
        self._queue.append(([], []))

    @active
    def endgroup(self):
        q = self._queue.pop()
        self.entries.extend(q[0])
        self.backupentries.extend(q[1])

        offsets = []
        backups = []
        for f, o, _data in q[0]:
            offsets.append((f, o))

        for f, b, _data in q[1]:
            backups.append((f, b))

        d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets])
        self.file.write(d)
        self.file.flush()

        d = ''.join(['%s\0%s\0' % (f, b) for f, b in backups])
        self.backupsfile.write(d)
        self.backupsfile.flush()

    @active
    def add(self, file, offset, data=None):
        if file in self.map or file in self.backupmap:
            return
        if self._queue:
            self._queue[-1][0].append((file, offset, data))
            return

        self.entries.append((file, offset, data))
        self.map[file] = len(self.entries) - 1
        # add enough data to the journal to do the truncate
        self.file.write("%s\0%d\n" % (file, offset))
        self.file.flush()

    @active
    def addbackup(self, file, hardlink=True, vfs=None):
        """Adds a backup of the file to the transaction

        Calling addbackup() creates a hardlink backup of the specified file
        that is used to recover the file in the event of the transaction
        aborting.

        * `file`: the file path, relative to .hg/store
        * `hardlink`: use a hardlink to quickly create the backup
        """

        if file in self.map or file in self.backupmap:
            return
        backupfile = "%s.backup.%s" % (self.journal, file)
        if vfs is None:
            vfs = self.opener
        if vfs.exists(file):
            filepath = vfs.join(file)
            backuppath = self.opener.join(backupfile)
            util.copyfiles(filepath, backuppath, hardlink=hardlink)
        else:
            self.add(file, 0)
            return

        if self._queue:
            self._queue[-1][1].append((file, backupfile))
            return

        self.backupentries.append((file, backupfile, None))
        self.backupmap[file] = len(self.backupentries) - 1
        self.backupsfile.write("%s\0%s\0" % (file, backupfile))
        self.backupsfile.flush()

    @active
    def addfilegenerator(self, genid, filenames, genfunc, order=0, vfs=None):
        """add a function to generates some files at transaction commit

        The `genfunc` argument is a function capable of generating proper
        content of each entry in the `filename` tuple.

        At transaction close time, `genfunc` will be called with one file
        object argument per entries in `filenames`.

        The transaction itself is responsible for the backup, creation and
        final write of such file.

        The `genid` argument is used to ensure the same set of file is only
        generated once. Call to `addfilegenerator` for a `genid` already
        present will overwrite the old entry.

        The `order` argument may be used to control the order in which multiple
        generator will be executed.
        """
        # For now, we are unable to do proper backup and restore of custom vfs
        # but for bookmarks that are handled outside this mechanism.
        assert vfs is None or filenames == ('bookmarks',)
        self._filegenerators[genid] = (order, filenames, genfunc, vfs)

    @active
    def find(self, file):
        if file in self.map:
            return self.entries[self.map[file]]
        if file in self.backupmap:
            return self.backupentries[self.backupmap[file]]
        return None

    @active
    def replace(self, file, offset, data=None):
        '''
        replace can only replace already committed entries
        that are not pending in the queue
        '''

        if file not in self.map:
            raise KeyError(file)
        index = self.map[file]
        self.entries[index] = (file, offset, data)
        self.file.write("%s\0%d\n" % (file, offset))
        self.file.flush()

    @active
    def nest(self):
        self.count += 1
        self.usages += 1
        return self

    def release(self):
        if self.count > 0:
            self.usages -= 1
        # if the transaction scopes are left without being closed, fail
        if self.count > 0 and self.usages == 0:
            self._abort()

    def running(self):
        return self.count > 0

    @active
    def close(self):
        '''commit the transaction'''
        # write files registered for generation
        for entry in sorted(self._filegenerators.values()):
            order, filenames, genfunc, vfs = entry
            if vfs is None:
                vfs = self.opener
            files = []
            try:
                for name in filenames:
                    # Some files are already backed up when creating the
                    # localrepo. Until this is properly fixed we disable the
                    # backup for them.
                    if name not in ('phaseroots', 'bookmarks'):
                        self.addbackup(name)
                    files.append(vfs(name, 'w', atomictemp=True))
                genfunc(*files)
            finally:
                for f in files:
                    f.close()

        if self.count == 1 and self.onclose is not None:
            self.onclose()

        self.count -= 1
        if self.count != 0:
            return
        self.file.close()
        self.backupsfile.close()
        self.entries = []
        if self.after:
            self.after()
        if self.opener.isfile(self.journal):
            self.opener.unlink(self.journal)
        if self.opener.isfile(self.backupjournal):
            self.opener.unlink(self.backupjournal)
            for _f, b, _ignore in self.backupentries:
                self.opener.unlink(b)
        self.backupentries = []
        self.journal = None

    @active
    def abort(self):
        '''abort the transaction (generally called on error, or when the
        transaction is not explicitly committed before going out of
        scope)'''
        self._abort()

    def _abort(self):
        self.count = 0
        self.usages = 0
        self.file.close()
        self.backupsfile.close()

        if self.onabort is not None:
            self.onabort()

        try:
            if not self.entries and not self.backupentries:
                if self.journal:
                    self.opener.unlink(self.journal)
                if self.backupjournal:
                    self.opener.unlink(self.backupjournal)
                return

            self.report(_("transaction abort!\n"))

            try:
                _playback(self.journal, self.report, self.opener,
                          self.entries, self.backupentries, False)
                self.report(_("rollback completed\n"))
            except Exception:
                self.report(_("rollback failed - please run hg recover\n"))
        finally:
            self.journal = None


def rollback(opener, file, report):
    """Rolls back the transaction contained in the given file

    Reads the entries in the specified file, and the corresponding
    '*.backupfiles' file, to recover from an incomplete transaction.

    * `file`: a file containing a list of entries, specifying where
    to truncate each file.  The file should contain a list of
    file\0offset pairs, delimited by newlines. The corresponding
    '*.backupfiles' file should contain a list of file\0backupfile
    pairs, delimited by \0.
    """
    entries = []
    backupentries = []

    fp = opener.open(file)
    lines = fp.readlines()
    fp.close()
    for l in lines:
        try:
            f, o = l.split('\0')
            entries.append((f, int(o), None))
        except ValueError:
            report(_("couldn't read journal entry %r!\n") % l)

    backupjournal = "%s.backupfiles" % file
    if opener.exists(backupjournal):
        fp = opener.open(backupjournal)
        data = fp.read()
        if len(data) > 0:
            parts = data.split('\0')
            for i in xrange(0, len(parts), 2):
                f, b = parts[i:i + 1]
                backupentries.append((f, b, None))

    _playback(file, report, opener, entries, backupentries)

author	Augie Fackler <raf@durin42.com>
	Fri, 17 Oct 2014 13:52:10 -0400
changeset 23031	3c0983cc279e
parent 22663	4c6198737ad8
child 23063	cd86a6707159
permissions	-rw-r--r--