view hgext/split.py @ 42377:0546ead39a7e stable

manifest: avoid corruption by dropping removed files with pure (issue5801) Previously, removed files would simply be marked by overwriting the first byte with NUL and dropping their entry in `self.position`. But no effort was made to ignore them when compacting the dictionary into text form. This allowed them to slip into the manifest revision, since the code seems to be trying to minimize the string operations by copying as large a chunk as possible. As part of this, compact() walks the existing text based on entries in the `positions` list, and consumed everything up to the next position entry. This typically resulted in a ValueError complaining about unsorted manifest entries. Sometimes it seems that files do get dropped in large repos- it seems to correspond to there being a new entry that would take the same slot. A much more trivial problem is that if the only changes were removals, `_compact()` didn't even run because `__delitem__` doesn't add anything to `self.extradata`. Now there's an explicit variable to flag this, both to allow `_compact()` to run, and to avoid searching the manifest in cases where there are no removals. In practice, this behavior was mostly obscured by the check in fastdelta() which takes a different path that explicitly drops removed files if there are fewer than 1000 changes. However, timeless has a repo where after rebasing tens of commits, a totally different path[1] is taken that bypasses the change count check and hits this problem. [1] https://www.mercurial-scm.org/repo/hg/file/2338bdea4474/mercurial/manifest.py#l1511
author Matt Harbison <matt_harbison@yahoo.com>
date Thu, 23 May 2019 21:54:24 -0400
parents 42e2c7c52e1b
children 2372284d9457
line wrap: on
line source

# split.py - split a changeset into smaller ones
#
# Copyright 2015 Laurent Charignon <lcharignon@fb.com>
# Copyright 2017 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""command to split a changeset into smaller ones (EXPERIMENTAL)"""

from __future__ import absolute_import

from mercurial.i18n import _

from mercurial.node import (
    nullid,
    short,
)

from mercurial import (
    bookmarks,
    cmdutil,
    commands,
    error,
    hg,
    obsolete,
    phases,
    pycompat,
    registrar,
    revsetlang,
    scmutil,
)

# allow people to use split without explicitly enabling rebase extension
from . import (
    rebase,
)

cmdtable = {}
command = registrar.command(cmdtable)

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = 'ships-with-hg-core'

@command('split',
    [('r', 'rev', '', _("revision to split"), _('REV')),
     ('', 'rebase', True, _('rebase descendants after split')),
    ] + cmdutil.commitopts2,
    _('hg split [--no-rebase] [[-r] REV]'),
    helpcategory=command.CATEGORY_CHANGE_MANAGEMENT, helpbasic=True)
def split(ui, repo, *revs, **opts):
    """split a changeset into smaller ones

    Repeatedly prompt changes and commit message for new changesets until there
    is nothing left in the original changeset.

    If --rev was not given, split the working directory parent.

    By default, rebase connected non-obsoleted descendants onto the new
    changeset. Use --no-rebase to avoid the rebase.
    """
    opts = pycompat.byteskwargs(opts)
    revlist = []
    if opts.get('rev'):
        revlist.append(opts.get('rev'))
    revlist.extend(revs)
    with repo.wlock(), repo.lock(), repo.transaction('split') as tr:
        revs = scmutil.revrange(repo, revlist or ['.'])
        if len(revs) > 1:
            raise error.Abort(_('cannot split multiple revisions'))

        rev = revs.first()
        ctx = repo[rev]
        if rev is None or ctx.node() == nullid:
            ui.status(_('nothing to split\n'))
            return 1
        if ctx.node() is None:
            raise error.Abort(_('cannot split working directory'))

        # rewriteutil.precheck is not very useful here because:
        # 1. null check is done above and it's more friendly to return 1
        #    instead of abort
        # 2. mergestate check is done below by cmdutil.bailifchanged
        # 3. unstable check is more complex here because of --rebase
        #
        # So only "public" check is useful and it's checked directly here.
        if ctx.phase() == phases.public:
            raise error.Abort(_('cannot split public changeset'),
                              hint=_("see 'hg help phases' for details"))

        descendants = list(repo.revs('(%d::) - (%d)', rev, rev))
        alloworphaned = obsolete.isenabled(repo, obsolete.allowunstableopt)
        if opts.get('rebase'):
            # Skip obsoleted descendants and their descendants so the rebase
            # won't cause conflicts for sure.
            torebase = list(repo.revs('%ld - (%ld & obsolete())::',
                                      descendants, descendants))
            if not alloworphaned and len(torebase) != len(descendants):
                raise error.Abort(_('split would leave orphaned changesets '
                                    'behind'))
        else:
            if not alloworphaned and descendants:
                raise error.Abort(
                    _('cannot split changeset with children without rebase'))
            torebase = ()

        if len(ctx.parents()) > 1:
            raise error.Abort(_('cannot split a merge changeset'))

        cmdutil.bailifchanged(repo)

        # Deactivate bookmark temporarily so it won't get moved unintentionally
        bname = repo._activebookmark
        if bname and repo._bookmarks[bname] != ctx.node():
            bookmarks.deactivate(repo)

        wnode = repo['.'].node()
        top = None
        try:
            top = dosplit(ui, repo, tr, ctx, opts)
        finally:
            # top is None: split failed, need update --clean recovery.
            # wnode == ctx.node(): wnode split, no need to update.
            if top is None or wnode != ctx.node():
                hg.clean(repo, wnode, show_stats=False)
            if bname:
                bookmarks.activate(repo, bname)
        if torebase and top:
            dorebase(ui, repo, torebase, top)

def dosplit(ui, repo, tr, ctx, opts):
    committed = [] # [ctx]

    # Set working parent to ctx.p1(), and keep working copy as ctx's content
    if ctx.node() != repo.dirstate.p1():
        hg.clean(repo, ctx.node(), show_stats=False)
    with repo.dirstate.parentchange():
        scmutil.movedirstate(repo, ctx.p1())

    # Any modified, added, removed, deleted result means split is incomplete
    incomplete = lambda repo: any(repo.status()[:4])

    # Main split loop
    while incomplete(repo):
        if committed:
            header = (_('HG: Splitting %s. So far it has been split into:\n')
                      % short(ctx.node()))
            for c in committed:
                firstline = c.description().split('\n', 1)[0]
                header += _('HG: - %s: %s\n') % (short(c.node()), firstline)
            header += _('HG: Write commit message for the next split '
                        'changeset.\n')
        else:
            header = _('HG: Splitting %s. Write commit message for the '
                       'first split changeset.\n') % short(ctx.node())
        opts.update({
            'edit': True,
            'interactive': True,
            'message': header + ctx.description(),
        })
        commands.commit(ui, repo, **pycompat.strkwargs(opts))
        newctx = repo['.']
        committed.append(newctx)

    if not committed:
        raise error.Abort(_('cannot split an empty revision'))

    scmutil.cleanupnodes(repo, {ctx.node(): [c.node() for c in committed]},
                         operation='split', fixphase=True)

    return committed[-1]

def dorebase(ui, repo, src, destctx):
    rebase.rebase(ui, repo, rev=[revsetlang.formatspec('%ld', src)],
                  dest=revsetlang.formatspec('%d', destctx.rev()))