hgext/largefiles/lfcommands.py
author Martin von Zweigbergk <martinvonz@google.com>
Mon, 21 Mar 2022 14:21:10 -0700
branchstable
changeset 48817 9120c0cd935c
parent 48180 df3021c1f093
child 48966 6000f5b25c9b
permissions -rw-r--r--
unamend: abort if commit was not created by `hg [un]amend` `hg unamend` can currently undo any kind of rewrite, as long as it has an obsmarker. However, that has quite unexpected results if you run it after e.g. `hg rebase` (expecting it to behave like a generic `hg undo` command), because it updates to the predecessor and leaves the old changes in the working copy. I think it's better to allow `hg unamend` only after `hg amend` (and after `hg unamend` because that's documented as being supported). Differential Revision: https://phab.mercurial-scm.org/D12390

# Copyright 2009-2010 Gregory P. Ward
# Copyright 2009-2010 Intelerad Medical Systems Incorporated
# Copyright 2010-2011 Fog Creek Software
# Copyright 2010-2011 Unity Technologies
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''High-level command function for lfconvert, plus the cmdtable.'''
from __future__ import absolute_import

import errno
import os
import shutil

from mercurial.i18n import _
from mercurial.node import (
    bin,
    hex,
)

from mercurial import (
    cmdutil,
    context,
    error,
    exthelper,
    hg,
    lock,
    logcmdutil,
    match as matchmod,
    pycompat,
    scmutil,
    util,
)
from mercurial.utils import hashutil

from ..convert import (
    convcmd,
    filemap,
)

from . import lfutil, storefactory

release = lock.release

# -- Commands ----------------------------------------------------------

eh = exthelper.exthelper()


@eh.command(
    b'lfconvert',
    [
        (
            b's',
            b'size',
            b'',
            _(b'minimum size (MB) for files to be converted as largefiles'),
            b'SIZE',
        ),
        (
            b'',
            b'to-normal',
            False,
            _(b'convert from a largefiles repo to a normal repo'),
        ),
    ],
    _(b'hg lfconvert SOURCE DEST [FILE ...]'),
    norepo=True,
    inferrepo=True,
)
def lfconvert(ui, src, dest, *pats, **opts):
    """convert a normal repository to a largefiles repository

    Convert repository SOURCE to a new repository DEST, identical to
    SOURCE except that certain files will be converted as largefiles:
    specifically, any file that matches any PATTERN *or* whose size is
    above the minimum size threshold is converted as a largefile. The
    size used to determine whether or not to track a file as a
    largefile is the size of the first version of the file. The
    minimum size can be specified either with --size or in
    configuration as ``largefiles.size``.

    After running this command you will need to make sure that
    largefiles is enabled anywhere you intend to push the new
    repository.

    Use --to-normal to convert largefiles back to normal files; after
    this, the DEST repository can be used without largefiles at all."""

    opts = pycompat.byteskwargs(opts)
    if opts[b'to_normal']:
        tolfile = False
    else:
        tolfile = True
        size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)

    if not hg.islocal(src):
        raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
    if not hg.islocal(dest):
        raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)

    rsrc = hg.repository(ui, src)
    ui.status(_(b'initializing destination %s\n') % dest)
    rdst = hg.repository(ui, dest, create=True)

    success = False
    dstwlock = dstlock = None
    try:
        # Get a list of all changesets in the source.  The easy way to do this
        # is to simply walk the changelog, using changelog.nodesbetween().
        # Take a look at mercurial/revlog.py:639 for more details.
        # Use a generator instead of a list to decrease memory usage
        ctxs = (
            rsrc[ctx]
            for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
        )
        revmap = {rsrc.nullid: rdst.nullid}
        if tolfile:
            # Lock destination to prevent modification while it is converted to.
            # Don't need to lock src because we are just reading from its
            # history which can't change.
            dstwlock = rdst.wlock()
            dstlock = rdst.lock()

            lfiles = set()
            normalfiles = set()
            if not pats:
                pats = ui.configlist(lfutil.longname, b'patterns')
            if pats:
                matcher = matchmod.match(rsrc.root, b'', list(pats))
            else:
                matcher = None

            lfiletohash = {}
            with ui.makeprogress(
                _(b'converting revisions'),
                unit=_(b'revisions'),
                total=rsrc[b'tip'].rev(),
            ) as progress:
                for ctx in ctxs:
                    progress.update(ctx.rev())
                    _lfconvert_addchangeset(
                        rsrc,
                        rdst,
                        ctx,
                        revmap,
                        lfiles,
                        normalfiles,
                        matcher,
                        size,
                        lfiletohash,
                    )

            if rdst.wvfs.exists(lfutil.shortname):
                rdst.wvfs.rmtree(lfutil.shortname)

            for f in lfiletohash.keys():
                if rdst.wvfs.isfile(f):
                    rdst.wvfs.unlink(f)
                try:
                    rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
                except OSError:
                    pass

            # If there were any files converted to largefiles, add largefiles
            # to the destination repository's requirements.
            if lfiles:
                rdst.requirements.add(b'largefiles')
                scmutil.writereporequirements(rdst)
        else:

            class lfsource(filemap.filemap_source):
                def __init__(self, ui, source):
                    super(lfsource, self).__init__(ui, source, None)
                    self.filemapper.rename[lfutil.shortname] = b'.'

                def getfile(self, name, rev):
                    realname, realrev = rev
                    f = super(lfsource, self).getfile(name, rev)

                    if (
                        not realname.startswith(lfutil.shortnameslash)
                        or f[0] is None
                    ):
                        return f

                    # Substitute in the largefile data for the hash
                    hash = f[0].strip()
                    path = lfutil.findfile(rsrc, hash)

                    if path is None:
                        raise error.Abort(
                            _(b"missing largefile for '%s' in %s")
                            % (realname, realrev)
                        )
                    return util.readfile(path), f[1]

            class converter(convcmd.converter):
                def __init__(self, ui, source, dest, revmapfile, opts):
                    src = lfsource(ui, source)

                    super(converter, self).__init__(
                        ui, src, dest, revmapfile, opts
                    )

            found, missing = downloadlfiles(ui, rsrc)
            if missing != 0:
                raise error.Abort(_(b"all largefiles must be present locally"))

            orig = convcmd.converter
            convcmd.converter = converter

            try:
                convcmd.convert(
                    ui, src, dest, source_type=b'hg', dest_type=b'hg'
                )
            finally:
                convcmd.converter = orig
        success = True
    finally:
        if tolfile:
            rdst.dirstate.clear()
            release(dstlock, dstwlock)
        if not success:
            # we failed, remove the new directory
            shutil.rmtree(rdst.root)


def _lfconvert_addchangeset(
    rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
):
    # Convert src parents to dst parents
    parents = _convertparents(ctx, revmap)

    # Generate list of changed files
    files = _getchangedfiles(ctx, parents)

    dstfiles = []
    for f in files:
        if f not in lfiles and f not in normalfiles:
            islfile = _islfile(f, ctx, matcher, size)
            # If this file was renamed or copied then copy
            # the largefile-ness of its predecessor
            if f in ctx.manifest():
                fctx = ctx.filectx(f)
                renamed = fctx.copysource()
                if renamed is None:
                    # the code below assumes renamed to be a boolean or a list
                    # and won't quite work with the value None
                    renamed = False
                renamedlfile = renamed and renamed in lfiles
                islfile |= renamedlfile
                if b'l' in fctx.flags():
                    if renamedlfile:
                        raise error.Abort(
                            _(b'renamed/copied largefile %s becomes symlink')
                            % f
                        )
                    islfile = False
            if islfile:
                lfiles.add(f)
            else:
                normalfiles.add(f)

        if f in lfiles:
            fstandin = lfutil.standin(f)
            dstfiles.append(fstandin)
            # largefile in manifest if it has not been removed/renamed
            if f in ctx.manifest():
                fctx = ctx.filectx(f)
                if b'l' in fctx.flags():
                    renamed = fctx.copysource()
                    if renamed and renamed in lfiles:
                        raise error.Abort(
                            _(b'largefile %s becomes symlink') % f
                        )

                # largefile was modified, update standins
                m = hashutil.sha1(b'')
                m.update(ctx[f].data())
                hash = hex(m.digest())
                if f not in lfiletohash or lfiletohash[f] != hash:
                    rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
                    executable = b'x' in ctx[f].flags()
                    lfutil.writestandin(rdst, fstandin, hash, executable)
                    lfiletohash[f] = hash
        else:
            # normal file
            dstfiles.append(f)

    def getfilectx(repo, memctx, f):
        srcfname = lfutil.splitstandin(f)
        if srcfname is not None:
            # if the file isn't in the manifest then it was removed
            # or renamed, return None to indicate this
            try:
                fctx = ctx.filectx(srcfname)
            except error.LookupError:
                return None
            renamed = fctx.copysource()
            if renamed:
                # standin is always a largefile because largefile-ness
                # doesn't change after rename or copy
                renamed = lfutil.standin(renamed)

            return context.memfilectx(
                repo,
                memctx,
                f,
                lfiletohash[srcfname] + b'\n',
                b'l' in fctx.flags(),
                b'x' in fctx.flags(),
                renamed,
            )
        else:
            return _getnormalcontext(repo, ctx, f, revmap)

    # Commit
    _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)


def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
    mctx = context.memctx(
        rdst,
        parents,
        ctx.description(),
        dstfiles,
        getfilectx,
        ctx.user(),
        ctx.date(),
        ctx.extra(),
    )
    ret = rdst.commitctx(mctx)
    lfutil.copyalltostore(rdst, ret)
    rdst.setparents(ret)
    revmap[ctx.node()] = rdst.changelog.tip()


# Generate list of changed files
def _getchangedfiles(ctx, parents):
    files = set(ctx.files())
    if ctx.repo().nullid not in parents:
        mc = ctx.manifest()
        for pctx in ctx.parents():
            for fn in pctx.manifest().diff(mc):
                files.add(fn)
    return files


# Convert src parents to dst parents
def _convertparents(ctx, revmap):
    parents = []
    for p in ctx.parents():
        parents.append(revmap[p.node()])
    while len(parents) < 2:
        parents.append(ctx.repo().nullid)
    return parents


# Get memfilectx for a normal file
def _getnormalcontext(repo, ctx, f, revmap):
    try:
        fctx = ctx.filectx(f)
    except error.LookupError:
        return None
    renamed = fctx.copysource()

    data = fctx.data()
    if f == b'.hgtags':
        data = _converttags(repo.ui, revmap, data)
    return context.memfilectx(
        repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
    )


# Remap tag data using a revision map
def _converttags(ui, revmap, data):
    newdata = []
    for line in data.splitlines():
        try:
            id, name = line.split(b' ', 1)
        except ValueError:
            ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
            continue
        try:
            newid = bin(id)
        except TypeError:
            ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
            continue
        try:
            newdata.append(b'%s %s\n' % (hex(revmap[newid]), name))
        except KeyError:
            ui.warn(_(b'no mapping for id %s\n') % id)
            continue
    return b''.join(newdata)


def _islfile(file, ctx, matcher, size):
    """Return true if file should be considered a largefile, i.e.
    matcher matches it or it is larger than size."""
    # never store special .hg* files as largefiles
    if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
        return False
    if matcher and matcher(file):
        return True
    try:
        return ctx.filectx(file).size() >= size * 1024 * 1024
    except error.LookupError:
        return False


def uploadlfiles(ui, rsrc, rdst, files):
    '''upload largefiles to the central store'''

    if not files:
        return

    store = storefactory.openstore(rsrc, rdst, put=True)

    at = 0
    ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
    retval = store.exists(files)
    files = [h for h in files if not retval[h]]
    ui.debug(b"%d largefiles need to be uploaded\n" % len(files))

    with ui.makeprogress(
        _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
    ) as progress:
        for hash in files:
            progress.update(at)
            source = lfutil.findfile(rsrc, hash)
            if not source:
                raise error.Abort(
                    _(
                        b'largefile %s missing from store'
                        b' (needs to be uploaded)'
                    )
                    % hash
                )
            # XXX check for errors here
            store.put(source, hash)
            at += 1


def verifylfiles(ui, repo, all=False, contents=False):
    """Verify that every largefile revision in the current changeset
    exists in the central store.  With --contents, also verify that
    the contents of each local largefile file revision are correct (SHA-1 hash
    matches the revision ID).  With --all, check every changeset in
    this repository."""
    if all:
        revs = repo.revs(b'all()')
    else:
        revs = [b'.']

    store = storefactory.openstore(repo)
    return store.verify(revs, contents=contents)


def cachelfiles(ui, repo, node, filelist=None):
    """cachelfiles ensures that all largefiles needed by the specified revision
    are present in the repository's largefile cache.

    returns a tuple (cached, missing).  cached is the list of files downloaded
    by this operation; missing is the list of files that were needed but could
    not be found."""
    lfiles = lfutil.listlfiles(repo, node)
    if filelist:
        lfiles = set(lfiles) & set(filelist)
    toget = []

    ctx = repo[node]
    for lfile in lfiles:
        try:
            expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
        except IOError as err:
            if err.errno == errno.ENOENT:
                continue  # node must be None and standin wasn't found in wctx
            raise
        if not lfutil.findfile(repo, expectedhash):
            toget.append((lfile, expectedhash))

    if toget:
        store = storefactory.openstore(repo)
        ret = store.get(toget)
        return ret

    return ([], [])


def downloadlfiles(ui, repo):
    tonode = repo.changelog.node
    totalsuccess = 0
    totalmissing = 0
    for rev in repo.revs(b'file(%s)', b'path:' + lfutil.shortname):
        success, missing = cachelfiles(ui, repo, tonode(rev))
        totalsuccess += len(success)
        totalmissing += len(missing)
    ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
    if totalmissing > 0:
        ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
    return totalsuccess, totalmissing


def updatelfiles(
    ui, repo, filelist=None, printmessage=None, normallookup=False
):
    """Update largefiles according to standins in the working directory

    If ``printmessage`` is other than ``None``, it means "print (or
    ignore, for false) message forcibly".
    """
    statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
    with repo.wlock():
        lfdirstate = lfutil.openlfdirstate(ui, repo)
        lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)

        if filelist is not None:
            filelist = set(filelist)
            lfiles = [f for f in lfiles if f in filelist]

        with lfdirstate.parentchange():
            update = {}
            dropped = set()
            updated, removed = 0, 0
            wvfs = repo.wvfs
            wctx = repo[None]
            for lfile in lfiles:
                lfileorig = os.path.relpath(
                    scmutil.backuppath(ui, repo, lfile), start=repo.root
                )
                standin = lfutil.standin(lfile)
                standinorig = os.path.relpath(
                    scmutil.backuppath(ui, repo, standin), start=repo.root
                )
                if wvfs.exists(standin):
                    if wvfs.exists(standinorig) and wvfs.exists(lfile):
                        shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
                        wvfs.unlinkpath(standinorig)
                    expecthash = lfutil.readasstandin(wctx[standin])
                    if expecthash != b'':
                        if lfile not in wctx:  # not switched to normal file
                            if repo.dirstate.get_entry(standin).any_tracked:
                                wvfs.unlinkpath(lfile, ignoremissing=True)
                            else:
                                dropped.add(lfile)

                        # use normallookup() to allocate an entry in largefiles
                        # dirstate to prevent lfilesrepo.status() from reporting
                        # missing files as removed.
                        lfdirstate.update_file(
                            lfile,
                            p1_tracked=True,
                            wc_tracked=True,
                            possibly_dirty=True,
                        )
                        update[lfile] = expecthash
                else:
                    # Remove lfiles for which the standin is deleted, unless the
                    # lfile is added to the repository again. This happens when a
                    # largefile is converted back to a normal file: the standin
                    # disappears, but a new (normal) file appears as the lfile.
                    if (
                        wvfs.exists(lfile)
                        and repo.dirstate.normalize(lfile) not in wctx
                    ):
                        wvfs.unlinkpath(lfile)
                        removed += 1

        # largefile processing might be slow and be interrupted - be prepared
        lfdirstate.write(repo.currenttransaction())

        if lfiles:
            lfiles = [f for f in lfiles if f not in dropped]

            for f in dropped:
                repo.wvfs.unlinkpath(lfutil.standin(f))
                # This needs to happen for dropped files, otherwise they stay in
                # the M state.
                lfdirstate._map.reset_state(f)

            statuswriter(_(b'getting changed largefiles\n'))
            cachelfiles(ui, repo, None, lfiles)

        with lfdirstate.parentchange():
            for lfile in lfiles:
                update1 = 0

                expecthash = update.get(lfile)
                if expecthash:
                    if not lfutil.copyfromcache(repo, expecthash, lfile):
                        # failed ... but already removed and set to normallookup
                        continue
                    # Synchronize largefile dirstate to the last modified
                    # time of the file
                    lfdirstate.update_file(
                        lfile, p1_tracked=True, wc_tracked=True
                    )
                    update1 = 1

                # copy the exec mode of largefile standin from the repository's
                # dirstate to its state in the lfdirstate.
                standin = lfutil.standin(lfile)
                if wvfs.exists(standin):
                    # exec is decided by the users permissions using mask 0o100
                    standinexec = wvfs.stat(standin).st_mode & 0o100
                    st = wvfs.stat(lfile)
                    mode = st.st_mode
                    if standinexec != mode & 0o100:
                        # first remove all X bits, then shift all R bits to X
                        mode &= ~0o111
                        if standinexec:
                            mode |= (mode >> 2) & 0o111 & ~util.umask
                        wvfs.chmod(lfile, mode)
                        update1 = 1

                updated += update1

                lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)

        lfdirstate.write(repo.currenttransaction())
        if lfiles:
            statuswriter(
                _(b'%d largefiles updated, %d removed\n') % (updated, removed)
            )


@eh.command(
    b'lfpull',
    [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
    + cmdutil.remoteopts,
    _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
)
def lfpull(ui, repo, source=b"default", **opts):
    """pull largefiles for the specified revisions from the specified source

    Pull largefiles that are referenced from local changesets but missing
    locally, pulling from a remote repository to the local cache.

    If SOURCE is omitted, the 'default' path will be used.
    See :hg:`help urls` for more information.

    .. container:: verbose

      Some examples:

      - pull largefiles for all branch heads::

          hg lfpull -r "head() and not closed()"

      - pull largefiles on the default branch::

          hg lfpull -r "branch(default)"
    """
    repo.lfpullsource = source

    revs = opts.get('rev', [])
    if not revs:
        raise error.Abort(_(b'no revisions specified'))
    revs = logcmdutil.revrange(repo, revs)

    numcached = 0
    for rev in revs:
        ui.note(_(b'pulling largefiles for revision %d\n') % rev)
        (cached, missing) = cachelfiles(ui, repo, rev)
        numcached += len(cached)
    ui.status(_(b"%d largefiles cached\n") % numcached)


@eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
def debuglfput(ui, repo, filepath, **kwargs):
    hash = lfutil.hashfile(filepath)
    storefactory.openstore(repo).put(filepath, hash)
    ui.write(b'%s\n' % hash)
    return 0