view hgext/strip.py @ 44363:f7459da77f23

nodemap: introduce an option to use mmap to read the nodemap mapping The performance and memory benefit is much greater if we don't have to copy all the data in memory for each information. So we introduce an option (on by default) to read the data using mmap. This changeset is the last one definition the API for index support nodemap data. (they have to be able to use the mmaping). Below are some benchmark comparing the best we currently have in 5.3 with the final step of this series (using the persistent nodemap implementation in Rust). The benchmark run `hg perfindex` with various revset and the following variants: Before: * do not use the persistent nodemap * use the CPython implementation of the index for nodemap * use mmapping of the changelog index After: * use the MixedIndex Rust code, with the NodeTree object for nodemap access (still in review) * use the persistent nodemap data from disk * access the persistent nodemap data through mmap * use mmapping of the changelog index The persistent nodemap greatly speed up most operation on very large repositories. Some of the previously very fast lookup end up a bit slower because the persistent nodemap has to be setup. However the absolute slowdown is very small and won't matters in the big picture. Here are some numbers (in seconds) for the reference copy of mozilla-try: Revset Before After abs-change speedup -10000: 0.004622 0.005532 0.000910 × 0.83 -10: 0.000050 0.000132 0.000082 × 0.37 tip 0.000052 0.000085 0.000033 × 0.61 0 + (-10000:) 0.028222 0.005337 -0.022885 × 5.29 0 0.023521 0.000084 -0.023437 × 280.01 (-10000:) + 0 0.235539 0.005308 -0.230231 × 44.37 (-10:) + :9 0.232883 0.000180 -0.232703 ×1293.79 (-10000:) + (:99) 0.238735 0.005358 -0.233377 × 44.55 :99 + (-10000:) 0.317942 0.005593 -0.312349 × 56.84 :9 + (-10:) 0.313372 0.000179 -0.313193 ×1750.68 :9 0.316450 0.000143 -0.316307 ×2212.93 On smaller repositories, the cost of nodemap related operation is not as big, so the win is much more modest. Yet it helps shaving a handful of millisecond here and there. Here are some numbers (in seconds) for the reference copy of mercurial: Revset Before After abs-change speedup -10: 0.000065 0.000097 0.000032 × 0.67 tip 0.000063 0.000078 0.000015 × 0.80 0 0.000561 0.000079 -0.000482 × 7.10 -10000: 0.004609 0.003648 -0.000961 × 1.26 0 + (-10000:) 0.005023 0.003715 -0.001307 × 1.35 (-10:) + :9 0.002187 0.000108 -0.002079 ×20.25 (-10000:) + 0 0.006252 0.003716 -0.002536 × 1.68 (-10000:) + (:99) 0.006367 0.003707 -0.002660 × 1.71 :9 + (-10:) 0.003846 0.000110 -0.003736 ×34.96 :9 0.003854 0.000099 -0.003755 ×38.92 :99 + (-10000:) 0.007644 0.003778 -0.003866 × 2.02 Differential Revision: https://phab.mercurial-scm.org/D7894
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 11 Feb 2020 11:18:52 +0100
parents 8ff1ecfadcd1
children 9d2b2df2c2ba
line wrap: on
line source

"""strip changesets and their descendants from history

This extension allows you to strip changesets and all their descendants from the
repository. See the command help for details.
"""
from __future__ import absolute_import

from mercurial.i18n import _
from mercurial.pycompat import getattr
from mercurial import (
    bookmarks as bookmarksmod,
    cmdutil,
    error,
    hg,
    lock as lockmod,
    merge,
    node as nodemod,
    pycompat,
    registrar,
    repair,
    scmutil,
    util,
)

nullid = nodemod.nullid
release = lockmod.release

cmdtable = {}
command = registrar.command(cmdtable)
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = b'ships-with-hg-core'


def checklocalchanges(repo, force=False):
    s = repo.status()
    if not force:
        cmdutil.checkunfinished(repo)
        cmdutil.bailifchanged(repo)
    else:
        cmdutil.checkunfinished(repo, skipmerge=True)
    return s


def _findupdatetarget(repo, nodes):
    unode, p2 = repo.changelog.parents(nodes[0])
    currentbranch = repo[None].branch()

    if (
        util.safehasattr(repo, b'mq')
        and p2 != nullid
        and p2 in [x.node for x in repo.mq.applied]
    ):
        unode = p2
    elif currentbranch != repo[unode].branch():
        pwdir = b'parents(wdir())'
        revset = b'max(((parents(%ln::%r) + %r) - %ln::%r) and branch(%s))'
        branchtarget = repo.revs(
            revset, nodes, pwdir, pwdir, nodes, pwdir, currentbranch
        )
        if branchtarget:
            cl = repo.changelog
            unode = cl.node(branchtarget.first())

    return unode


def strip(
    ui,
    repo,
    revs,
    update=True,
    backup=True,
    force=None,
    bookmarks=None,
    soft=False,
):
    with repo.wlock(), repo.lock():

        if update:
            checklocalchanges(repo, force=force)
            urev = _findupdatetarget(repo, revs)
            hg.clean(repo, urev)
            repo.dirstate.write(repo.currenttransaction())

        if soft:
            repair.softstrip(ui, repo, revs, backup)
        else:
            repair.strip(ui, repo, revs, backup)

        repomarks = repo._bookmarks
        if bookmarks:
            with repo.transaction(b'strip') as tr:
                if repo._activebookmark in bookmarks:
                    bookmarksmod.deactivate(repo)
                repomarks.applychanges(repo, tr, [(b, None) for b in bookmarks])
            for bookmark in sorted(bookmarks):
                ui.write(_(b"bookmark '%s' deleted\n") % bookmark)


@command(
    b"strip",
    [
        (
            b'r',
            b'rev',
            [],
            _(
                b'strip specified revision (optional, '
                b'can specify revisions without this '
                b'option)'
            ),
            _(b'REV'),
        ),
        (
            b'f',
            b'force',
            None,
            _(
                b'force removal of changesets, discard '
                b'uncommitted changes (no backup)'
            ),
        ),
        (b'', b'no-backup', None, _(b'do not save backup bundle')),
        (b'', b'nobackup', None, _(b'do not save backup bundle (DEPRECATED)'),),
        (b'n', b'', None, _(b'ignored  (DEPRECATED)')),
        (
            b'k',
            b'keep',
            None,
            _(b"do not modify working directory during strip"),
        ),
        (
            b'B',
            b'bookmark',
            [],
            _(b"remove revs only reachable from given bookmark"),
            _(b'BOOKMARK'),
        ),
        (
            b'',
            b'soft',
            None,
            _(b"simply drop changesets from visible history (EXPERIMENTAL)"),
        ),
    ],
    _(b'hg strip [-k] [-f] [-B bookmark] [-r] REV...'),
    helpcategory=command.CATEGORY_MAINTENANCE,
)
def stripcmd(ui, repo, *revs, **opts):
    """strip changesets and all their descendants from the repository

    The strip command removes the specified changesets and all their
    descendants. If the working directory has uncommitted changes, the
    operation is aborted unless the --force flag is supplied, in which
    case changes will be discarded.

    If a parent of the working directory is stripped, then the working
    directory will automatically be updated to the most recent
    available ancestor of the stripped parent after the operation
    completes.

    Any stripped changesets are stored in ``.hg/strip-backup`` as a
    bundle (see :hg:`help bundle` and :hg:`help unbundle`). They can
    be restored by running :hg:`unbundle .hg/strip-backup/BUNDLE`,
    where BUNDLE is the bundle file created by the strip. Note that
    the local revision numbers will in general be different after the
    restore.

    Use the --no-backup option to discard the backup bundle once the
    operation completes.

    Strip is not a history-rewriting operation and can be used on
    changesets in the public phase. But if the stripped changesets have
    been pushed to a remote repository you will likely pull them again.

    Return 0 on success.
    """
    opts = pycompat.byteskwargs(opts)
    backup = True
    if opts.get(b'no_backup') or opts.get(b'nobackup'):
        backup = False

    cl = repo.changelog
    revs = list(revs) + opts.get(b'rev')
    revs = set(scmutil.revrange(repo, revs))

    with repo.wlock():
        bookmarks = set(opts.get(b'bookmark'))
        if bookmarks:
            repomarks = repo._bookmarks
            if not bookmarks.issubset(repomarks):
                raise error.Abort(
                    _(b"bookmark '%s' not found")
                    % b','.join(sorted(bookmarks - set(repomarks.keys())))
                )

            # If the requested bookmark is not the only one pointing to a
            # a revision we have to only delete the bookmark and not strip
            # anything. revsets cannot detect that case.
            nodetobookmarks = {}
            for mark, node in pycompat.iteritems(repomarks):
                nodetobookmarks.setdefault(node, []).append(mark)
            for marks in nodetobookmarks.values():
                if bookmarks.issuperset(marks):
                    rsrevs = scmutil.bookmarkrevs(repo, marks[0])
                    revs.update(set(rsrevs))
            if not revs:
                with repo.lock(), repo.transaction(b'bookmark') as tr:
                    bmchanges = [(b, None) for b in bookmarks]
                    repomarks.applychanges(repo, tr, bmchanges)
                for bookmark in sorted(bookmarks):
                    ui.write(_(b"bookmark '%s' deleted\n") % bookmark)

        if not revs:
            raise error.Abort(_(b'empty revision set'))

        descendants = set(cl.descendants(revs))
        strippedrevs = revs.union(descendants)
        roots = revs.difference(descendants)

        # if one of the wdir parent is stripped we'll need
        # to update away to an earlier revision
        update = any(
            p != nullid and cl.rev(p) in strippedrevs
            for p in repo.dirstate.parents()
        )

        rootnodes = set(cl.node(r) for r in roots)

        q = getattr(repo, 'mq', None)
        if q is not None and q.applied:
            # refresh queue state if we're about to strip
            # applied patches
            if cl.rev(repo.lookup(b'qtip')) in strippedrevs:
                q.applieddirty = True
                start = 0
                end = len(q.applied)
                for i, statusentry in enumerate(q.applied):
                    if statusentry.node in rootnodes:
                        # if one of the stripped roots is an applied
                        # patch, only part of the queue is stripped
                        start = i
                        break
                del q.applied[start:end]
                q.savedirty()

        revs = sorted(rootnodes)
        if update and opts.get(b'keep'):
            urev = _findupdatetarget(repo, revs)
            uctx = repo[urev]

            # only reset the dirstate for files that would actually change
            # between the working context and uctx
            descendantrevs = repo.revs(b"%d::.", uctx.rev())
            changedfiles = []
            for rev in descendantrevs:
                # blindly reset the files, regardless of what actually changed
                changedfiles.extend(repo[rev].files())

            # reset files that only changed in the dirstate too
            dirstate = repo.dirstate
            dirchanges = [f for f in dirstate if dirstate[f] != b'n']
            changedfiles.extend(dirchanges)

            repo.dirstate.rebuild(urev, uctx.manifest(), changedfiles)
            repo.dirstate.write(repo.currenttransaction())

            # clear resolve state
            merge.mergestate.clean(repo, repo[b'.'].node())

            update = False

        strip(
            ui,
            repo,
            revs,
            backup=backup,
            update=update,
            force=opts.get(b'force'),
            bookmarks=bookmarks,
            soft=opts[b'soft'],
        )

    return 0