Mercurial > hg
diff mercurial/upgrade_utils/actions.py @ 46047:4b89cf08d8dc
upgrade: split definition and management of the actions from the main code
This is a second step to clarify and clean up this code. The code responsible
for definition which action exist, are possible and their compatibility if moved
into a sub module.
This clarify the main code and prepare further cleanup.
Differential Revision: https://phab.mercurial-scm.org/D9477
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 01 Dec 2020 15:11:06 +0100 |
parents | mercurial/upgrade.py@f105c49e89cd |
children | f4f956342cf1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/upgrade_utils/actions.py Tue Dec 01 15:11:06 2020 +0100 @@ -0,0 +1,666 @@ +# upgrade.py - functions for in place upgrade of Mercurial repository +# +# Copyright (c) 2016-present, Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from ..i18n import _ +from .. import ( + localrepo, + requirements, + util, +) + +from ..utils import compression + +# list of requirements that request a clone of all revlog if added/removed +RECLONES_REQUIREMENTS = { + b'generaldelta', + requirements.SPARSEREVLOG_REQUIREMENT, +} + + +def requiredsourcerequirements(repo): + """Obtain requirements required to be present to upgrade a repo. + + An upgrade will not be allowed if the repository doesn't have the + requirements returned by this function. + """ + return { + # Introduced in Mercurial 0.9.2. + b'revlogv1', + # Introduced in Mercurial 0.9.2. + b'store', + } + + +def blocksourcerequirements(repo): + """Obtain requirements that will prevent an upgrade from occurring. + + An upgrade cannot be performed if the source repository contains a + requirements in the returned set. + """ + return { + # The upgrade code does not yet support these experimental features. + # This is an artificial limitation. + requirements.TREEMANIFEST_REQUIREMENT, + # This was a precursor to generaldelta and was never enabled by default. + # It should (hopefully) not exist in the wild. + b'parentdelta', + # Upgrade should operate on the actual store, not the shared link. + requirements.SHARED_REQUIREMENT, + } + + +def supportremovedrequirements(repo): + """Obtain requirements that can be removed during an upgrade. + + If an upgrade were to create a repository that dropped a requirement, + the dropped requirement must appear in the returned set for the upgrade + to be allowed. + """ + supported = { + requirements.SPARSEREVLOG_REQUIREMENT, + requirements.SIDEDATA_REQUIREMENT, + requirements.COPIESSDC_REQUIREMENT, + requirements.NODEMAP_REQUIREMENT, + requirements.SHARESAFE_REQUIREMENT, + } + for name in compression.compengines: + engine = compression.compengines[name] + if engine.available() and engine.revlogheader(): + supported.add(b'exp-compression-%s' % name) + if engine.name() == b'zstd': + supported.add(b'revlog-compression-zstd') + return supported + + +def supporteddestrequirements(repo): + """Obtain requirements that upgrade supports in the destination. + + If the result of the upgrade would create requirements not in this set, + the upgrade is disallowed. + + Extensions should monkeypatch this to add their custom requirements. + """ + supported = { + b'dotencode', + b'fncache', + b'generaldelta', + b'revlogv1', + b'store', + requirements.SPARSEREVLOG_REQUIREMENT, + requirements.SIDEDATA_REQUIREMENT, + requirements.COPIESSDC_REQUIREMENT, + requirements.NODEMAP_REQUIREMENT, + requirements.SHARESAFE_REQUIREMENT, + } + for name in compression.compengines: + engine = compression.compengines[name] + if engine.available() and engine.revlogheader(): + supported.add(b'exp-compression-%s' % name) + if engine.name() == b'zstd': + supported.add(b'revlog-compression-zstd') + return supported + + +def allowednewrequirements(repo): + """Obtain requirements that can be added to a repository during upgrade. + + This is used to disallow proposed requirements from being added when + they weren't present before. + + We use a list of allowed requirement additions instead of a list of known + bad additions because the whitelist approach is safer and will prevent + future, unknown requirements from accidentally being added. + """ + supported = { + b'dotencode', + b'fncache', + b'generaldelta', + requirements.SPARSEREVLOG_REQUIREMENT, + requirements.SIDEDATA_REQUIREMENT, + requirements.COPIESSDC_REQUIREMENT, + requirements.NODEMAP_REQUIREMENT, + requirements.SHARESAFE_REQUIREMENT, + } + for name in compression.compengines: + engine = compression.compengines[name] + if engine.available() and engine.revlogheader(): + supported.add(b'exp-compression-%s' % name) + if engine.name() == b'zstd': + supported.add(b'revlog-compression-zstd') + return supported + + +def preservedrequirements(repo): + return set() + + +DEFICIENCY = b'deficiency' +OPTIMISATION = b'optimization' + + +class improvement(object): + """Represents an improvement that can be made as part of an upgrade. + + The following attributes are defined on each instance: + + name + Machine-readable string uniquely identifying this improvement. It + will be mapped to an action later in the upgrade process. + + type + Either ``DEFICIENCY`` or ``OPTIMISATION``. A deficiency is an obvious + problem. An optimization is an action (sometimes optional) that + can be taken to further improve the state of the repository. + + description + Message intended for humans explaining the improvement in more detail, + including the implications of it. For ``DEFICIENCY`` types, should be + worded in the present tense. For ``OPTIMISATION`` types, should be + worded in the future tense. + + upgrademessage + Message intended for humans explaining what an upgrade addressing this + issue will do. Should be worded in the future tense. + """ + + def __init__(self, name, type, description, upgrademessage): + self.name = name + self.type = type + self.description = description + self.upgrademessage = upgrademessage + + def __eq__(self, other): + if not isinstance(other, improvement): + # This is what python tell use to do + return NotImplemented + return self.name == other.name + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash(self.name) + + +allformatvariant = [] + + +def registerformatvariant(cls): + allformatvariant.append(cls) + return cls + + +class formatvariant(improvement): + """an improvement subclass dedicated to repository format""" + + type = DEFICIENCY + ### The following attributes should be defined for each class: + + # machine-readable string uniquely identifying this improvement. it will be + # mapped to an action later in the upgrade process. + name = None + + # message intended for humans explaining the improvement in more detail, + # including the implications of it ``DEFICIENCY`` types, should be worded + # in the present tense. + description = None + + # message intended for humans explaining what an upgrade addressing this + # issue will do. should be worded in the future tense. + upgrademessage = None + + # value of current Mercurial default for new repository + default = None + + def __init__(self): + raise NotImplementedError() + + @staticmethod + def fromrepo(repo): + """current value of the variant in the repository""" + raise NotImplementedError() + + @staticmethod + def fromconfig(repo): + """current value of the variant in the configuration""" + raise NotImplementedError() + + +class requirementformatvariant(formatvariant): + """formatvariant based on a 'requirement' name. + + Many format variant are controlled by a 'requirement'. We define a small + subclass to factor the code. + """ + + # the requirement that control this format variant + _requirement = None + + @staticmethod + def _newreporequirements(ui): + return localrepo.newreporequirements( + ui, localrepo.defaultcreateopts(ui) + ) + + @classmethod + def fromrepo(cls, repo): + assert cls._requirement is not None + return cls._requirement in repo.requirements + + @classmethod + def fromconfig(cls, repo): + assert cls._requirement is not None + return cls._requirement in cls._newreporequirements(repo.ui) + + +@registerformatvariant +class fncache(requirementformatvariant): + name = b'fncache' + + _requirement = b'fncache' + + default = True + + description = _( + b'long and reserved filenames may not work correctly; ' + b'repository performance is sub-optimal' + ) + + upgrademessage = _( + b'repository will be more resilient to storing ' + b'certain paths and performance of certain ' + b'operations should be improved' + ) + + +@registerformatvariant +class dotencode(requirementformatvariant): + name = b'dotencode' + + _requirement = b'dotencode' + + default = True + + description = _( + b'storage of filenames beginning with a period or ' + b'space may not work correctly' + ) + + upgrademessage = _( + b'repository will be better able to store files ' + b'beginning with a space or period' + ) + + +@registerformatvariant +class generaldelta(requirementformatvariant): + name = b'generaldelta' + + _requirement = b'generaldelta' + + default = True + + description = _( + b'deltas within internal storage are unable to ' + b'choose optimal revisions; repository is larger and ' + b'slower than it could be; interaction with other ' + b'repositories may require extra network and CPU ' + b'resources, making "hg push" and "hg pull" slower' + ) + + upgrademessage = _( + b'repository storage will be able to create ' + b'optimal deltas; new repository data will be ' + b'smaller and read times should decrease; ' + b'interacting with other repositories using this ' + b'storage model should require less network and ' + b'CPU resources, making "hg push" and "hg pull" ' + b'faster' + ) + + +@registerformatvariant +class sharesafe(requirementformatvariant): + name = b'exp-sharesafe' + _requirement = requirements.SHARESAFE_REQUIREMENT + + default = False + + description = _( + b'old shared repositories do not share source repository ' + b'requirements and config. This leads to various problems ' + b'when the source repository format is upgraded or some new ' + b'extensions are enabled.' + ) + + upgrademessage = _( + b'Upgrades a repository to share-safe format so that future ' + b'shares of this repository share its requirements and configs.' + ) + + +@registerformatvariant +class sparserevlog(requirementformatvariant): + name = b'sparserevlog' + + _requirement = requirements.SPARSEREVLOG_REQUIREMENT + + default = True + + description = _( + b'in order to limit disk reading and memory usage on older ' + b'version, the span of a delta chain from its root to its ' + b'end is limited, whatever the relevant data in this span. ' + b'This can severly limit Mercurial ability to build good ' + b'chain of delta resulting is much more storage space being ' + b'taken and limit reusability of on disk delta during ' + b'exchange.' + ) + + upgrademessage = _( + b'Revlog supports delta chain with more unused data ' + b'between payload. These gaps will be skipped at read ' + b'time. This allows for better delta chains, making a ' + b'better compression and faster exchange with server.' + ) + + +@registerformatvariant +class sidedata(requirementformatvariant): + name = b'sidedata' + + _requirement = requirements.SIDEDATA_REQUIREMENT + + default = False + + description = _( + b'Allows storage of extra data alongside a revision, ' + b'unlocking various caching options.' + ) + + upgrademessage = _(b'Allows storage of extra data alongside a revision.') + + +@registerformatvariant +class persistentnodemap(requirementformatvariant): + name = b'persistent-nodemap' + + _requirement = requirements.NODEMAP_REQUIREMENT + + default = False + + description = _( + b'persist the node -> rev mapping on disk to speedup lookup' + ) + + upgrademessage = _(b'Speedup revision lookup by node id.') + + +@registerformatvariant +class copiessdc(requirementformatvariant): + name = b'copies-sdc' + + _requirement = requirements.COPIESSDC_REQUIREMENT + + default = False + + description = _(b'Stores copies information alongside changesets.') + + upgrademessage = _( + b'Allows to use more efficient algorithm to deal with ' b'copy tracing.' + ) + + +@registerformatvariant +class removecldeltachain(formatvariant): + name = b'plain-cl-delta' + + default = True + + description = _( + b'changelog storage is using deltas instead of ' + b'raw entries; changelog reading and any ' + b'operation relying on changelog data are slower ' + b'than they could be' + ) + + upgrademessage = _( + b'changelog storage will be reformated to ' + b'store raw entries; changelog reading will be ' + b'faster; changelog size may be reduced' + ) + + @staticmethod + def fromrepo(repo): + # Mercurial 4.0 changed changelogs to not use delta chains. Search for + # changelogs with deltas. + cl = repo.changelog + chainbase = cl.chainbase + return all(rev == chainbase(rev) for rev in cl) + + @staticmethod + def fromconfig(repo): + return True + + +@registerformatvariant +class compressionengine(formatvariant): + name = b'compression' + default = b'zlib' + + description = _( + b'Compresion algorithm used to compress data. ' + b'Some engine are faster than other' + ) + + upgrademessage = _( + b'revlog content will be recompressed with the new algorithm.' + ) + + @classmethod + def fromrepo(cls, repo): + # we allow multiple compression engine requirement to co-exist because + # strickly speaking, revlog seems to support mixed compression style. + # + # The compression used for new entries will be "the last one" + compression = b'zlib' + for req in repo.requirements: + prefix = req.startswith + if prefix(b'revlog-compression-') or prefix(b'exp-compression-'): + compression = req.split(b'-', 2)[2] + return compression + + @classmethod + def fromconfig(cls, repo): + compengines = repo.ui.configlist(b'format', b'revlog-compression') + # return the first valid value as the selection code would do + for comp in compengines: + if comp in util.compengines: + return comp + + # no valide compression found lets display it all for clarity + return b','.join(compengines) + + +@registerformatvariant +class compressionlevel(formatvariant): + name = b'compression-level' + default = b'default' + + description = _(b'compression level') + + upgrademessage = _(b'revlog content will be recompressed') + + @classmethod + def fromrepo(cls, repo): + comp = compressionengine.fromrepo(repo) + level = None + if comp == b'zlib': + level = repo.ui.configint(b'storage', b'revlog.zlib.level') + elif comp == b'zstd': + level = repo.ui.configint(b'storage', b'revlog.zstd.level') + if level is None: + return b'default' + return bytes(level) + + @classmethod + def fromconfig(cls, repo): + comp = compressionengine.fromconfig(repo) + level = None + if comp == b'zlib': + level = repo.ui.configint(b'storage', b'revlog.zlib.level') + elif comp == b'zstd': + level = repo.ui.configint(b'storage', b'revlog.zstd.level') + if level is None: + return b'default' + return bytes(level) + + +def finddeficiencies(repo): + """returns a list of deficiencies that the repo suffer from""" + deficiencies = [] + + # We could detect lack of revlogv1 and store here, but they were added + # in 0.9.2 and we don't support upgrading repos without these + # requirements, so let's not bother. + + for fv in allformatvariant: + if not fv.fromrepo(repo): + deficiencies.append(fv) + + return deficiencies + + +ALL_OPTIMISATIONS = [] + + +def register_optimization(obj): + ALL_OPTIMISATIONS.append(obj) + return obj + + +register_optimization( + improvement( + name=b're-delta-parent', + type=OPTIMISATION, + description=_( + b'deltas within internal storage will be recalculated to ' + b'choose an optimal base revision where this was not ' + b'already done; the size of the repository may shrink and ' + b'various operations may become faster; the first time ' + b'this optimization is performed could slow down upgrade ' + b'execution considerably; subsequent invocations should ' + b'not run noticeably slower' + ), + upgrademessage=_( + b'deltas within internal storage will choose a new ' + b'base revision if needed' + ), + ) +) + +register_optimization( + improvement( + name=b're-delta-multibase', + type=OPTIMISATION, + description=_( + b'deltas within internal storage will be recalculated ' + b'against multiple base revision and the smallest ' + b'difference will be used; the size of the repository may ' + b'shrink significantly when there are many merges; this ' + b'optimization will slow down execution in proportion to ' + b'the number of merges in the repository and the amount ' + b'of files in the repository; this slow down should not ' + b'be significant unless there are tens of thousands of ' + b'files and thousands of merges' + ), + upgrademessage=_( + b'deltas within internal storage will choose an ' + b'optimal delta by computing deltas against multiple ' + b'parents; may slow down execution time ' + b'significantly' + ), + ) +) + +register_optimization( + improvement( + name=b're-delta-all', + type=OPTIMISATION, + description=_( + b'deltas within internal storage will always be ' + b'recalculated without reusing prior deltas; this will ' + b'likely make execution run several times slower; this ' + b'optimization is typically not needed' + ), + upgrademessage=_( + b'deltas within internal storage will be fully ' + b'recomputed; this will likely drastically slow down ' + b'execution time' + ), + ) +) + +register_optimization( + improvement( + name=b're-delta-fulladd', + type=OPTIMISATION, + description=_( + b'every revision will be re-added as if it was new ' + b'content. It will go through the full storage ' + b'mechanism giving extensions a chance to process it ' + b'(eg. lfs). This is similar to "re-delta-all" but even ' + b'slower since more logic is involved.' + ), + upgrademessage=_( + b'each revision will be added as new content to the ' + b'internal storage; this will likely drastically slow ' + b'down execution time, but some extensions might need ' + b'it' + ), + ) +) + + +def findoptimizations(repo): + """Determine optimisation that could be used during upgrade""" + # These are unconditionally added. There is logic later that figures out + # which ones to apply. + return list(ALL_OPTIMISATIONS) + + +def determineactions(repo, deficiencies, sourcereqs, destreqs): + """Determine upgrade actions that will be performed. + + Given a list of improvements as returned by ``finddeficiencies`` and + ``findoptimizations``, determine the list of upgrade actions that + will be performed. + + The role of this function is to filter improvements if needed, apply + recommended optimizations from the improvements list that make sense, + etc. + + Returns a list of action names. + """ + newactions = [] + + for d in deficiencies: + name = d._requirement + + # If the action is a requirement that doesn't show up in the + # destination requirements, prune the action. + if name is not None and name not in destreqs: + continue + + newactions.append(d) + + # FUTURE consider adding some optimizations here for certain transitions. + # e.g. adding generaldelta could schedule parent redeltas. + + return newactions