diff mercurial/upgrade_utils/actions.py @ 46047:4b89cf08d8dc

upgrade: split definition and management of the actions from the main code This is a second step to clarify and clean up this code. The code responsible for definition which action exist, are possible and their compatibility if moved into a sub module. This clarify the main code and prepare further cleanup. Differential Revision: https://phab.mercurial-scm.org/D9477
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 01 Dec 2020 15:11:06 +0100
parents mercurial/upgrade.py@f105c49e89cd
children f4f956342cf1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/upgrade_utils/actions.py	Tue Dec 01 15:11:06 2020 +0100
@@ -0,0 +1,666 @@
+# upgrade.py - functions for in place upgrade of Mercurial repository
+#
+# Copyright (c) 2016-present, Gregory Szorc
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from ..i18n import _
+from .. import (
+    localrepo,
+    requirements,
+    util,
+)
+
+from ..utils import compression
+
+# list of requirements that request a clone of all revlog if added/removed
+RECLONES_REQUIREMENTS = {
+    b'generaldelta',
+    requirements.SPARSEREVLOG_REQUIREMENT,
+}
+
+
+def requiredsourcerequirements(repo):
+    """Obtain requirements required to be present to upgrade a repo.
+
+    An upgrade will not be allowed if the repository doesn't have the
+    requirements returned by this function.
+    """
+    return {
+        # Introduced in Mercurial 0.9.2.
+        b'revlogv1',
+        # Introduced in Mercurial 0.9.2.
+        b'store',
+    }
+
+
+def blocksourcerequirements(repo):
+    """Obtain requirements that will prevent an upgrade from occurring.
+
+    An upgrade cannot be performed if the source repository contains a
+    requirements in the returned set.
+    """
+    return {
+        # The upgrade code does not yet support these experimental features.
+        # This is an artificial limitation.
+        requirements.TREEMANIFEST_REQUIREMENT,
+        # This was a precursor to generaldelta and was never enabled by default.
+        # It should (hopefully) not exist in the wild.
+        b'parentdelta',
+        # Upgrade should operate on the actual store, not the shared link.
+        requirements.SHARED_REQUIREMENT,
+    }
+
+
+def supportremovedrequirements(repo):
+    """Obtain requirements that can be removed during an upgrade.
+
+    If an upgrade were to create a repository that dropped a requirement,
+    the dropped requirement must appear in the returned set for the upgrade
+    to be allowed.
+    """
+    supported = {
+        requirements.SPARSEREVLOG_REQUIREMENT,
+        requirements.SIDEDATA_REQUIREMENT,
+        requirements.COPIESSDC_REQUIREMENT,
+        requirements.NODEMAP_REQUIREMENT,
+        requirements.SHARESAFE_REQUIREMENT,
+    }
+    for name in compression.compengines:
+        engine = compression.compengines[name]
+        if engine.available() and engine.revlogheader():
+            supported.add(b'exp-compression-%s' % name)
+            if engine.name() == b'zstd':
+                supported.add(b'revlog-compression-zstd')
+    return supported
+
+
+def supporteddestrequirements(repo):
+    """Obtain requirements that upgrade supports in the destination.
+
+    If the result of the upgrade would create requirements not in this set,
+    the upgrade is disallowed.
+
+    Extensions should monkeypatch this to add their custom requirements.
+    """
+    supported = {
+        b'dotencode',
+        b'fncache',
+        b'generaldelta',
+        b'revlogv1',
+        b'store',
+        requirements.SPARSEREVLOG_REQUIREMENT,
+        requirements.SIDEDATA_REQUIREMENT,
+        requirements.COPIESSDC_REQUIREMENT,
+        requirements.NODEMAP_REQUIREMENT,
+        requirements.SHARESAFE_REQUIREMENT,
+    }
+    for name in compression.compengines:
+        engine = compression.compengines[name]
+        if engine.available() and engine.revlogheader():
+            supported.add(b'exp-compression-%s' % name)
+            if engine.name() == b'zstd':
+                supported.add(b'revlog-compression-zstd')
+    return supported
+
+
+def allowednewrequirements(repo):
+    """Obtain requirements that can be added to a repository during upgrade.
+
+    This is used to disallow proposed requirements from being added when
+    they weren't present before.
+
+    We use a list of allowed requirement additions instead of a list of known
+    bad additions because the whitelist approach is safer and will prevent
+    future, unknown requirements from accidentally being added.
+    """
+    supported = {
+        b'dotencode',
+        b'fncache',
+        b'generaldelta',
+        requirements.SPARSEREVLOG_REQUIREMENT,
+        requirements.SIDEDATA_REQUIREMENT,
+        requirements.COPIESSDC_REQUIREMENT,
+        requirements.NODEMAP_REQUIREMENT,
+        requirements.SHARESAFE_REQUIREMENT,
+    }
+    for name in compression.compengines:
+        engine = compression.compengines[name]
+        if engine.available() and engine.revlogheader():
+            supported.add(b'exp-compression-%s' % name)
+            if engine.name() == b'zstd':
+                supported.add(b'revlog-compression-zstd')
+    return supported
+
+
+def preservedrequirements(repo):
+    return set()
+
+
+DEFICIENCY = b'deficiency'
+OPTIMISATION = b'optimization'
+
+
+class improvement(object):
+    """Represents an improvement that can be made as part of an upgrade.
+
+    The following attributes are defined on each instance:
+
+    name
+       Machine-readable string uniquely identifying this improvement. It
+       will be mapped to an action later in the upgrade process.
+
+    type
+       Either ``DEFICIENCY`` or ``OPTIMISATION``. A deficiency is an obvious
+       problem. An optimization is an action (sometimes optional) that
+       can be taken to further improve the state of the repository.
+
+    description
+       Message intended for humans explaining the improvement in more detail,
+       including the implications of it. For ``DEFICIENCY`` types, should be
+       worded in the present tense. For ``OPTIMISATION`` types, should be
+       worded in the future tense.
+
+    upgrademessage
+       Message intended for humans explaining what an upgrade addressing this
+       issue will do. Should be worded in the future tense.
+    """
+
+    def __init__(self, name, type, description, upgrademessage):
+        self.name = name
+        self.type = type
+        self.description = description
+        self.upgrademessage = upgrademessage
+
+    def __eq__(self, other):
+        if not isinstance(other, improvement):
+            # This is what python tell use to do
+            return NotImplemented
+        return self.name == other.name
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(self.name)
+
+
+allformatvariant = []
+
+
+def registerformatvariant(cls):
+    allformatvariant.append(cls)
+    return cls
+
+
+class formatvariant(improvement):
+    """an improvement subclass dedicated to repository format"""
+
+    type = DEFICIENCY
+    ### The following attributes should be defined for each class:
+
+    # machine-readable string uniquely identifying this improvement. it will be
+    # mapped to an action later in the upgrade process.
+    name = None
+
+    # message intended for humans explaining the improvement in more detail,
+    # including the implications of it ``DEFICIENCY`` types, should be worded
+    # in the present tense.
+    description = None
+
+    # message intended for humans explaining what an upgrade addressing this
+    # issue will do. should be worded in the future tense.
+    upgrademessage = None
+
+    # value of current Mercurial default for new repository
+    default = None
+
+    def __init__(self):
+        raise NotImplementedError()
+
+    @staticmethod
+    def fromrepo(repo):
+        """current value of the variant in the repository"""
+        raise NotImplementedError()
+
+    @staticmethod
+    def fromconfig(repo):
+        """current value of the variant in the configuration"""
+        raise NotImplementedError()
+
+
+class requirementformatvariant(formatvariant):
+    """formatvariant based on a 'requirement' name.
+
+    Many format variant are controlled by a 'requirement'. We define a small
+    subclass to factor the code.
+    """
+
+    # the requirement that control this format variant
+    _requirement = None
+
+    @staticmethod
+    def _newreporequirements(ui):
+        return localrepo.newreporequirements(
+            ui, localrepo.defaultcreateopts(ui)
+        )
+
+    @classmethod
+    def fromrepo(cls, repo):
+        assert cls._requirement is not None
+        return cls._requirement in repo.requirements
+
+    @classmethod
+    def fromconfig(cls, repo):
+        assert cls._requirement is not None
+        return cls._requirement in cls._newreporequirements(repo.ui)
+
+
+@registerformatvariant
+class fncache(requirementformatvariant):
+    name = b'fncache'
+
+    _requirement = b'fncache'
+
+    default = True
+
+    description = _(
+        b'long and reserved filenames may not work correctly; '
+        b'repository performance is sub-optimal'
+    )
+
+    upgrademessage = _(
+        b'repository will be more resilient to storing '
+        b'certain paths and performance of certain '
+        b'operations should be improved'
+    )
+
+
+@registerformatvariant
+class dotencode(requirementformatvariant):
+    name = b'dotencode'
+
+    _requirement = b'dotencode'
+
+    default = True
+
+    description = _(
+        b'storage of filenames beginning with a period or '
+        b'space may not work correctly'
+    )
+
+    upgrademessage = _(
+        b'repository will be better able to store files '
+        b'beginning with a space or period'
+    )
+
+
+@registerformatvariant
+class generaldelta(requirementformatvariant):
+    name = b'generaldelta'
+
+    _requirement = b'generaldelta'
+
+    default = True
+
+    description = _(
+        b'deltas within internal storage are unable to '
+        b'choose optimal revisions; repository is larger and '
+        b'slower than it could be; interaction with other '
+        b'repositories may require extra network and CPU '
+        b'resources, making "hg push" and "hg pull" slower'
+    )
+
+    upgrademessage = _(
+        b'repository storage will be able to create '
+        b'optimal deltas; new repository data will be '
+        b'smaller and read times should decrease; '
+        b'interacting with other repositories using this '
+        b'storage model should require less network and '
+        b'CPU resources, making "hg push" and "hg pull" '
+        b'faster'
+    )
+
+
+@registerformatvariant
+class sharesafe(requirementformatvariant):
+    name = b'exp-sharesafe'
+    _requirement = requirements.SHARESAFE_REQUIREMENT
+
+    default = False
+
+    description = _(
+        b'old shared repositories do not share source repository '
+        b'requirements and config. This leads to various problems '
+        b'when the source repository format is upgraded or some new '
+        b'extensions are enabled.'
+    )
+
+    upgrademessage = _(
+        b'Upgrades a repository to share-safe format so that future '
+        b'shares of this repository share its requirements and configs.'
+    )
+
+
+@registerformatvariant
+class sparserevlog(requirementformatvariant):
+    name = b'sparserevlog'
+
+    _requirement = requirements.SPARSEREVLOG_REQUIREMENT
+
+    default = True
+
+    description = _(
+        b'in order to limit disk reading and memory usage on older '
+        b'version, the span of a delta chain from its root to its '
+        b'end is limited, whatever the relevant data in this span. '
+        b'This can severly limit Mercurial ability to build good '
+        b'chain of delta resulting is much more storage space being '
+        b'taken and limit reusability of on disk delta during '
+        b'exchange.'
+    )
+
+    upgrademessage = _(
+        b'Revlog supports delta chain with more unused data '
+        b'between payload. These gaps will be skipped at read '
+        b'time. This allows for better delta chains, making a '
+        b'better compression and faster exchange with server.'
+    )
+
+
+@registerformatvariant
+class sidedata(requirementformatvariant):
+    name = b'sidedata'
+
+    _requirement = requirements.SIDEDATA_REQUIREMENT
+
+    default = False
+
+    description = _(
+        b'Allows storage of extra data alongside a revision, '
+        b'unlocking various caching options.'
+    )
+
+    upgrademessage = _(b'Allows storage of extra data alongside a revision.')
+
+
+@registerformatvariant
+class persistentnodemap(requirementformatvariant):
+    name = b'persistent-nodemap'
+
+    _requirement = requirements.NODEMAP_REQUIREMENT
+
+    default = False
+
+    description = _(
+        b'persist the node -> rev mapping on disk to speedup lookup'
+    )
+
+    upgrademessage = _(b'Speedup revision lookup by node id.')
+
+
+@registerformatvariant
+class copiessdc(requirementformatvariant):
+    name = b'copies-sdc'
+
+    _requirement = requirements.COPIESSDC_REQUIREMENT
+
+    default = False
+
+    description = _(b'Stores copies information alongside changesets.')
+
+    upgrademessage = _(
+        b'Allows to use more efficient algorithm to deal with ' b'copy tracing.'
+    )
+
+
+@registerformatvariant
+class removecldeltachain(formatvariant):
+    name = b'plain-cl-delta'
+
+    default = True
+
+    description = _(
+        b'changelog storage is using deltas instead of '
+        b'raw entries; changelog reading and any '
+        b'operation relying on changelog data are slower '
+        b'than they could be'
+    )
+
+    upgrademessage = _(
+        b'changelog storage will be reformated to '
+        b'store raw entries; changelog reading will be '
+        b'faster; changelog size may be reduced'
+    )
+
+    @staticmethod
+    def fromrepo(repo):
+        # Mercurial 4.0 changed changelogs to not use delta chains. Search for
+        # changelogs with deltas.
+        cl = repo.changelog
+        chainbase = cl.chainbase
+        return all(rev == chainbase(rev) for rev in cl)
+
+    @staticmethod
+    def fromconfig(repo):
+        return True
+
+
+@registerformatvariant
+class compressionengine(formatvariant):
+    name = b'compression'
+    default = b'zlib'
+
+    description = _(
+        b'Compresion algorithm used to compress data. '
+        b'Some engine are faster than other'
+    )
+
+    upgrademessage = _(
+        b'revlog content will be recompressed with the new algorithm.'
+    )
+
+    @classmethod
+    def fromrepo(cls, repo):
+        # we allow multiple compression engine requirement to co-exist because
+        # strickly speaking, revlog seems to support mixed compression style.
+        #
+        # The compression used for new entries will be "the last one"
+        compression = b'zlib'
+        for req in repo.requirements:
+            prefix = req.startswith
+            if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
+                compression = req.split(b'-', 2)[2]
+        return compression
+
+    @classmethod
+    def fromconfig(cls, repo):
+        compengines = repo.ui.configlist(b'format', b'revlog-compression')
+        # return the first valid value as the selection code would do
+        for comp in compengines:
+            if comp in util.compengines:
+                return comp
+
+        # no valide compression found lets display it all for clarity
+        return b','.join(compengines)
+
+
+@registerformatvariant
+class compressionlevel(formatvariant):
+    name = b'compression-level'
+    default = b'default'
+
+    description = _(b'compression level')
+
+    upgrademessage = _(b'revlog content will be recompressed')
+
+    @classmethod
+    def fromrepo(cls, repo):
+        comp = compressionengine.fromrepo(repo)
+        level = None
+        if comp == b'zlib':
+            level = repo.ui.configint(b'storage', b'revlog.zlib.level')
+        elif comp == b'zstd':
+            level = repo.ui.configint(b'storage', b'revlog.zstd.level')
+        if level is None:
+            return b'default'
+        return bytes(level)
+
+    @classmethod
+    def fromconfig(cls, repo):
+        comp = compressionengine.fromconfig(repo)
+        level = None
+        if comp == b'zlib':
+            level = repo.ui.configint(b'storage', b'revlog.zlib.level')
+        elif comp == b'zstd':
+            level = repo.ui.configint(b'storage', b'revlog.zstd.level')
+        if level is None:
+            return b'default'
+        return bytes(level)
+
+
+def finddeficiencies(repo):
+    """returns a list of deficiencies that the repo suffer from"""
+    deficiencies = []
+
+    # We could detect lack of revlogv1 and store here, but they were added
+    # in 0.9.2 and we don't support upgrading repos without these
+    # requirements, so let's not bother.
+
+    for fv in allformatvariant:
+        if not fv.fromrepo(repo):
+            deficiencies.append(fv)
+
+    return deficiencies
+
+
+ALL_OPTIMISATIONS = []
+
+
+def register_optimization(obj):
+    ALL_OPTIMISATIONS.append(obj)
+    return obj
+
+
+register_optimization(
+    improvement(
+        name=b're-delta-parent',
+        type=OPTIMISATION,
+        description=_(
+            b'deltas within internal storage will be recalculated to '
+            b'choose an optimal base revision where this was not '
+            b'already done; the size of the repository may shrink and '
+            b'various operations may become faster; the first time '
+            b'this optimization is performed could slow down upgrade '
+            b'execution considerably; subsequent invocations should '
+            b'not run noticeably slower'
+        ),
+        upgrademessage=_(
+            b'deltas within internal storage will choose a new '
+            b'base revision if needed'
+        ),
+    )
+)
+
+register_optimization(
+    improvement(
+        name=b're-delta-multibase',
+        type=OPTIMISATION,
+        description=_(
+            b'deltas within internal storage will be recalculated '
+            b'against multiple base revision and the smallest '
+            b'difference will be used; the size of the repository may '
+            b'shrink significantly when there are many merges; this '
+            b'optimization will slow down execution in proportion to '
+            b'the number of merges in the repository and the amount '
+            b'of files in the repository; this slow down should not '
+            b'be significant unless there are tens of thousands of '
+            b'files and thousands of merges'
+        ),
+        upgrademessage=_(
+            b'deltas within internal storage will choose an '
+            b'optimal delta by computing deltas against multiple '
+            b'parents; may slow down execution time '
+            b'significantly'
+        ),
+    )
+)
+
+register_optimization(
+    improvement(
+        name=b're-delta-all',
+        type=OPTIMISATION,
+        description=_(
+            b'deltas within internal storage will always be '
+            b'recalculated without reusing prior deltas; this will '
+            b'likely make execution run several times slower; this '
+            b'optimization is typically not needed'
+        ),
+        upgrademessage=_(
+            b'deltas within internal storage will be fully '
+            b'recomputed; this will likely drastically slow down '
+            b'execution time'
+        ),
+    )
+)
+
+register_optimization(
+    improvement(
+        name=b're-delta-fulladd',
+        type=OPTIMISATION,
+        description=_(
+            b'every revision will be re-added as if it was new '
+            b'content. It will go through the full storage '
+            b'mechanism giving extensions a chance to process it '
+            b'(eg. lfs). This is similar to "re-delta-all" but even '
+            b'slower since more logic is involved.'
+        ),
+        upgrademessage=_(
+            b'each revision will be added as new content to the '
+            b'internal storage; this will likely drastically slow '
+            b'down execution time, but some extensions might need '
+            b'it'
+        ),
+    )
+)
+
+
+def findoptimizations(repo):
+    """Determine optimisation that could be used during upgrade"""
+    # These are unconditionally added. There is logic later that figures out
+    # which ones to apply.
+    return list(ALL_OPTIMISATIONS)
+
+
+def determineactions(repo, deficiencies, sourcereqs, destreqs):
+    """Determine upgrade actions that will be performed.
+
+    Given a list of improvements as returned by ``finddeficiencies`` and
+    ``findoptimizations``, determine the list of upgrade actions that
+    will be performed.
+
+    The role of this function is to filter improvements if needed, apply
+    recommended optimizations from the improvements list that make sense,
+    etc.
+
+    Returns a list of action names.
+    """
+    newactions = []
+
+    for d in deficiencies:
+        name = d._requirement
+
+        # If the action is a requirement that doesn't show up in the
+        # destination requirements, prune the action.
+        if name is not None and name not in destreqs:
+            continue
+
+        newactions.append(d)
+
+    # FUTURE consider adding some optimizations here for certain transitions.
+    # e.g. adding generaldelta could schedule parent redeltas.
+
+    return newactions