view hgext/automv.py @ 51928:ad83e4f9b40e

typing: correct pytype mistakes in `mercurial/vfs.py` With the previous changes in this series (prior to merging the *.pyi file), this wasn't too bad- the only definitively wrong things were the `data` argument to `writelines()`, and the return type on `backgroundclosing()` (both of these errors were dropped in the previous commit; for some reason pytype doesn't like `contextlib._GeneratorContextManager`, even though that's what it determined it is): File "/mnt/c/Users/Matt/hg/mercurial/vfs.py", line 411, in abstractvfs: Bad return type 'contextlib._GeneratorContextManager' for generator function abstractvfs.backgroundclosing [bad-yield-annotation] Expected Generator, Iterable or Iterator PyCharm thinks this is `Generator[backgroundfilecloser], Any, None]`, which can be reduced to `Iterator[backgroundfilecloser]`, but pytype flagged the line that calls `yield` without an argument unless it's also `Optional`. PyCharm is happy either way. For some reason, `Iterable` didn't work for pytype: File "/mnt/c/Users/Matt/hg/mercurial/vfs.py", line 390, in abstractvfs: Function contextlib.contextmanager was called with the wrong arguments [wrong-arg-types] Expected: (func: Callable[[Any], Iterator]) Actually passed: (func: Callable[[Any, Any, Any], Iterable[Optional[Any]]]) Attributes of protocol Iterator[_T_co] are not implemented on Iterable[Optional[Any]]: __next__
author Matt Harbison <matt_harbison@yahoo.com>
date Fri, 20 Sep 2024 16:36:28 -0400
parents f4733654f144
children
line wrap: on
line source

# automv.py
#
# Copyright 2013-2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""check for unrecorded moves at commit time (EXPERIMENTAL)

This extension checks at commit/amend time if any of the committed files
comes from an unrecorded mv.

The threshold at which a file is considered a move can be set with the
``automv.similarity`` config option. This option takes a percentage between 0
(disabled) and 100 (files must be identical), the default is 95.

"""

# Using 95 as a default similarity is based on an analysis of the mercurial
# repositories of the cpython, mozilla-central & mercurial repositories, as
# well as 2 very large facebook repositories. At 95 50% of all potential
# missed moves would be caught, as well as correspond with 87% of all
# explicitly marked moves.  Together, 80% of moved files are 95% similar or
# more.
#
# See http://markmail.org/thread/5pxnljesvufvom57 for context.

from __future__ import annotations

from mercurial.i18n import _
from mercurial import (
    commands,
    copies,
    error,
    extensions,
    pycompat,
    registrar,
    scmutil,
    similar,
)

configtable = {}
configitem = registrar.configitem(configtable)

configitem(
    b'automv',
    b'similarity',
    default=95,
)


def extsetup(ui):
    entry = extensions.wrapcommand(commands.table, b'commit', mvcheck)
    entry[1].append(
        (b'', b'no-automv', None, _(b'disable automatic file move detection'))
    )


def mvcheck(orig, ui, repo, *pats, **opts):
    """Hook to check for moves at commit time"""
    renames = None
    disabled = opts.pop('no_automv', False)
    with repo.wlock():
        if not disabled:
            threshold = ui.configint(b'automv', b'similarity')
            if not 0 <= threshold <= 100:
                raise error.Abort(
                    _(b'automv.similarity must be between 0 and 100')
                )
            if threshold > 0:
                match = scmutil.match(
                    repo[None], pats, pycompat.byteskwargs(opts)
                )
                added, removed = _interestingfiles(repo, match)
                uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
                renames = _findrenames(
                    repo, uipathfn, added, removed, threshold / 100.0
                )

        if renames is not None:
            with repo.dirstate.changing_files(repo):
                # XXX this should be wider and integrated with the commit
                # transaction. At the same time as we do the `addremove` logic
                # for commit.  However we can't really do better with the
                # current extension structure, and this is not worse than what
                # happened before.
                scmutil._markchanges(repo, (), (), renames)
        return orig(ui, repo, *pats, **opts)


def _interestingfiles(repo, matcher):
    """Find what files were added or removed in this commit.

    Returns a tuple of two lists: (added, removed). Only files not *already*
    marked as moved are included in the added list.

    """
    stat = repo.status(match=matcher)
    added = stat.added
    removed = stat.removed

    copy = copies.pathcopies(repo[b'.'], repo[None], matcher)
    # remove the copy files for which we already have copy info
    added = [f for f in added if f not in copy]

    return added, removed


def _findrenames(repo, uipathfn, added, removed, similarity):
    """Find what files in added are really moved files.

    Any file named in removed that is at least similarity% similar to a file
    in added is seen as a rename.

    """
    renames = {}
    if similarity > 0:
        for src, dst, score in similar.findrenames(
            repo, added, removed, similarity
        ):
            if repo.ui.verbose:
                repo.ui.status(
                    _(b'detected move of %s as %s (%d%% similar)\n')
                    % (uipathfn(src), uipathfn(dst), score * 100)
                )
            renames[dst] = src
    if renames:
        repo.ui.status(_(b'detected move of %d files\n') % len(renames))
    return renames