mercurial/repair.py
author Greg Ward <greg@gerg.ca>
Sun, 20 Mar 2011 17:41:09 -0400
changeset 13704 a464763e99f1
parent 13702 ffd370aa050b
child 13705 73cfb7a5aa56
permissions -rw-r--r--
dirstate: avoid a race with multiple commits in the same process (issue2264, issue2516) The race happens when two commits in a row change the same file without changing its size, *if* those two commits happen in the same second in the same process while holding the same repo lock. For example: commit 1: M a M b commit 2: # same process, same second, same repo lock M b # modify b without changing its size M c This first manifested in transplant, which is the most common way to do multiple commits in the same process. But it can manifest in any script or extension that does multiple commits under the same repo lock. (Thus, the test script tests both transplant and a custom script.) The problem was that dirstate.status() failed to notice the change to b when localrepo is about to do the second commit, meaning that change gets left in the working directory. In the context of transplant, that means either a crash ("RuntimeError: nothing committed after transplant") or a silently inaccurate transplant, depending on whether any other files were modified by the second transplanted changeset. The fix is to make status() work a little harder when we have previously marked files as clean (state 'normal') in the same process. Specifically, dirstate.normal() adds files to self._lastnormal, and other state-changing methods remove them. Then dirstate.status() puts any files in self._lastnormal into state 'lookup', which will make localrepository.status() read file contents to see if it has really changed. So we pay a small performance penalty for the second (and subsequent) commits in the same process, without affecting the common case. Anything that does lots of status updates and checks in the same process could suffer a performance hit. Incidentally, there is a simpler fix: call dirstate.normallookup() on every file updated by commit() at the end of the commit. The trouble with that solution is that it imposes a performance penalty on the common case: it means the next status-dependent hg command after every "hg commit" will be a little bit slower. The patch here is more complex, but only affects performance for the uncommon case.

# repair.py - functions for repository repair for mercurial
#
# Copyright 2005, 2006 Chris Mason <mason@suse.com>
# Copyright 2007 Matt Mackall
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import changegroup, bookmarks
from node import nullrev, short
from i18n import _
import os

def _bundle(repo, bases, heads, node, suffix, compress=True):
    """create a bundle with the specified revisions as a backup"""
    cg = repo.changegroupsubset(bases, heads, 'strip')
    backupdir = repo.join("strip-backup")
    if not os.path.isdir(backupdir):
        os.mkdir(backupdir)
    name = os.path.join(backupdir, "%s-%s.hg" % (short(node), suffix))
    if compress:
        bundletype = "HG10BZ"
    else:
        bundletype = "HG10UN"
    return changegroup.writebundle(cg, name, bundletype)

def _collectfiles(repo, striprev):
    """find out the filelogs affected by the strip"""
    files = set()

    for x in xrange(striprev, len(repo)):
        files.update(repo[x].files())

    return sorted(files)

def _collectbrokencsets(repo, files, striprev):
    """return the changesets which will be broken by the truncation"""
    def collectone(revlog):
        startrev = count = len(revlog)
        # find the truncation point of the revlog
        for i in xrange(count):
            lrev = revlog.linkrev(i)
            if lrev >= striprev:
                startrev = i + 1
                break

        # see if any revision after that point has a linkrev less than striprev
        # (those will be broken by strip)
        for i in xrange(startrev, count):
            lrev = revlog.linkrev(i)
            if lrev < striprev:
                yield lrev

    for rev in collectone(repo.manifest):
        yield rev
    for fname in files:
        f = repo.file(fname)
        for rev in collectone(f):
            yield rev

def strip(ui, repo, node, backup="all"):
    cl = repo.changelog
    # TODO delete the undo files, and handle undo of merge sets
    striprev = cl.rev(node)

    keeppartialbundle = backup == 'strip'

    # Some revisions with rev > striprev may not be descendants of striprev.
    # We have to find these revisions and put them in a bundle, so that
    # we can restore them after the truncations.
    # To create the bundle we use repo.changegroupsubset which requires
    # the list of heads and bases of the set of interesting revisions.
    # (head = revision in the set that has no descendant in the set;
    #  base = revision in the set that has no ancestor in the set)
    tostrip = set(cl.descendants(striprev))
    tostrip.add(striprev)

    files = _collectfiles(repo, striprev)
    saverevs = set(_collectbrokencsets(repo, files, striprev))

    # compute heads
    saveheads = set(saverevs)
    for r in xrange(striprev + 1, len(cl)):
        if r not in tostrip:
            saverevs.add(r)
            saveheads.difference_update(cl.parentrevs(r))
            saveheads.add(r)
    saveheads = [cl.node(r) for r in saveheads]

    # compute base nodes
    if saverevs:
        descendants = set(cl.descendants(*saverevs))
        saverevs.difference_update(descendants)
    savebases = [cl.node(r) for r in saverevs]

    bm = repo._bookmarks
    updatebm = []
    for m in bm:
        rev = repo[bm[m]].rev()
        if rev in tostrip:
            updatebm.append(m)

    # create a changegroup for all the branches we need to keep
    backupfile = None
    if backup == "all":
        backupfile = _bundle(repo, [node], cl.heads(), node, 'backup')
        repo.ui.status(_("saved backup bundle to %s\n") % backupfile)
    if saveheads or savebases:
        # do not compress partial bundle if we remove it from disk later
        chgrpfile = _bundle(repo, savebases, saveheads, node, 'temp',
                            compress=keeppartialbundle)

    mfst = repo.manifest

    tr = repo.transaction("strip")
    offset = len(tr.entries)

    try:
        tr.startgroup()
        cl.strip(striprev, tr)
        mfst.strip(striprev, tr)
        for fn in files:
            repo.file(fn).strip(striprev, tr)
        tr.endgroup()

        try:
            for i in xrange(offset, len(tr.entries)):
                file, troffset, ignore = tr.entries[i]
                repo.sopener(file, 'a').truncate(troffset)
            tr.close()
        except:
            tr.abort()
            raise

        if saveheads or savebases:
            ui.note(_("adding branch\n"))
            f = open(chgrpfile, "rb")
            gen = changegroup.readbundle(f, chgrpfile)
            if not repo.ui.verbose:
                # silence internal shuffling chatter
                repo.ui.pushbuffer()
            repo.addchangegroup(gen, 'strip', 'bundle:' + chgrpfile, True)
            if not repo.ui.verbose:
                repo.ui.popbuffer()
            f.close()
            if not keeppartialbundle:
                os.unlink(chgrpfile)

        for m in updatebm:
            bm[m] = repo['.'].node()
        bookmarks.write(repo)

    except:
        if backupfile:
            ui.warn(_("strip failed, full bundle stored in '%s'\n")
                    % backupfile)
        elif saveheads:
            ui.warn(_("strip failed, partial bundle stored in '%s'\n")
                    % chgrpfile)
        raise

    repo.destroyed()