changeset 51029:498afb627f78

revlog: move configuration attribute into dedicated object First this make things clearer as the number of configuration attributes has grown out of control, so gathering them make things clearer. Second, this will make future sharing them at different level simpler. Third it will allow us to make initialisation and temporary override simpler soon.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 10 Oct 2023 10:02:05 +0200
parents f70ce1aedbcb
children 177e7d6bf875
files mercurial/bundlerepo.py mercurial/changelog.py mercurial/manifest.py mercurial/revlog.py mercurial/revlogutils/rewrite.py mercurial/unionrepo.py tests/test-revlog-raw.py
diffstat 7 files changed, 278 insertions(+), 73 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/bundlerepo.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/bundlerepo.py	Tue Oct 10 10:02:05 2023 +0200
@@ -235,6 +235,15 @@
             )
         return super(bundlemanifest, self).dirlog(d)
 
+    # XXX small hack to work around the use of manifest.manifestrevlog
+    @property
+    def _generaldelta(self):
+        return self._revlog.delta_config.general_delta
+
+    @_generaldelta.setter
+    def _generaldelta(self, value):
+        self._revlog.delta_config.general_delta = value
+
 
 class bundlefilelog(filelog.filelog):
     def __init__(self, opener, path, cgunpacker, linkmapper):
--- a/mercurial/changelog.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/changelog.py	Tue Oct 10 10:02:05 2023 +0200
@@ -416,7 +416,7 @@
             # changelogs don't benefit from generaldelta.
 
             self._format_flags &= ~revlog.FLAG_GENERALDELTA
-            self._generaldelta = False
+            self.delta_config.general_delta = False
 
         # Delta chains for changelogs tend to be very small because entries
         # tend to be small and don't delta well with each. So disable delta
--- a/mercurial/manifest.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/manifest.py	Tue Oct 10 10:02:05 2023 +0200
@@ -1614,7 +1614,7 @@
         )
 
         self.index = self._revlog.index
-        self._generaldelta = self._revlog._generaldelta
+        self._generaldelta = self._revlog.delta_config.general_delta
 
     def get_revlog(self):
         """return an actual revlog instance if any
--- a/mercurial/revlog.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/revlog.py	Tue Oct 10 10:02:05 2023 +0200
@@ -241,6 +241,92 @@
 hexdigits = b'0123456789abcdefABCDEF'
 
 
+@attr.s()
+class FeatureConfig:
+    """Hold configuration values about the available revlog features"""
+
+    # the default compression engine
+    compression_engine = attr.ib(default=b'zlib')
+    # compression engines options
+    compression_engine_options = attr.ib(default=attr.Factory(dict))
+
+    # can we use censor on this revlog
+    censorable = attr.ib(default=False)
+    # does this revlog use the "side data" feature
+    has_side_data = attr.ib(default=False)
+    # might remove rank configuration once the computation has no impact
+    compute_rank = attr.ib(default=False)
+    # parent order is supposed to be semantically irrelevant, so we
+    # normally resort parents to ensure that the first parent is non-null,
+    # if there is a non-null parent at all.
+    # filelog abuses the parent order as flag to mark some instances of
+    # meta-encoded files, so allow it to disable this behavior.
+    canonical_parent_order = attr.ib(default=False)
+    # can ellipsis commit be used
+    enable_ellipsis = attr.ib(default=False)
+
+
+@attr.s()
+class DataConfig:
+    """Hold configuration value about how the revlog data are read"""
+
+    # should we try to open the "pending" version of the revlog
+    try_pending = attr.ib(default=False)
+    # should we try to open the "splitted" version of the revlog
+    try_split = attr.ib(default=False)
+    #  When True, indexfile should be opened with checkambig=True at writing,
+    #  to avoid file stat ambiguity.
+    check_ambig = attr.ib(default=False)
+
+    # If true, use mmap instead of reading to deal with large index
+    mmap_large_index = attr.ib(default=False)
+    # how much data is large
+    mmap_index_threshold = attr.ib(default=None)
+    # How much data to read and cache into the raw revlog data cache.
+    chunk_cache_size = attr.ib(default=65536)
+
+    # Allow sparse reading of the revlog data
+    with_sparse_read = attr.ib(default=False)
+    # minimal density of a sparse read chunk
+    sr_density_threshold = attr.ib(default=0.50)
+    # minimal size of data we skip when performing sparse read
+    sr_min_gap_size = attr.ib(default=262144)
+
+    # are delta encoded against arbitrary bases.
+    generaldelta = attr.ib(default=False)
+
+
+@attr.s()
+class DeltaConfig:
+    """Hold configuration value about how new delta are computed
+
+    Some attributes are duplicated from DataConfig to help havign each object
+    self contained.
+    """
+
+    # can delta be encoded against arbitrary bases.
+    general_delta = attr.ib(default=False)
+    # Allow sparse writing of the revlog data
+    sparse_revlog = attr.ib(default=False)
+    # maximum length of a delta chain
+    max_chain_len = attr.ib(default=None)
+    # Maximum distance between delta chain base start and end
+    max_deltachain_span = attr.ib(default=-1)
+    # If `upper_bound_comp` is not None, this is the expected maximal gain from
+    # compression for the data content.
+    upper_bound_comp = attr.ib(default=None)
+    # Should we try a delta against both parent
+    delta_both_parents = attr.ib(default=True)
+    # Test delta base candidate group by chunk of this maximal size.
+    candidate_group_chunk_size = attr.ib(default=0)
+    # Should we display debug information about delta computation
+    debug_delta = attr.ib(default=False)
+    # trust incoming delta by default
+    lazy_delta = attr.ib(default=True)
+    # trust the base of incoming delta by default
+    lazy_delta_base = attr.ib(default=False)
+
+
 class revlog:
     """
     the underlying revision storage object
@@ -348,43 +434,31 @@
         assert target[0] in ALL_KINDS
         assert len(target) == 2
         self.target = target
-        #  When True, indexfile is opened with checkambig=True at writing, to
-        #  avoid file stat ambiguity.
-        self._checkambig = checkambig
-        self._mmaplargeindex = mmaplargeindex
-        self._censorable = censorable
+        self.feature_config = FeatureConfig(
+            censorable=censorable,
+            canonical_parent_order=canonical_parent_order,
+        )
+        self.data_config = DataConfig(
+            check_ambig=checkambig,
+            mmap_large_index=mmaplargeindex,
+        )
+        self.delta_config = DeltaConfig()
+
         # 3-tuple of (node, rev, text) for a raw revision.
         self._revisioncache = None
         # Maps rev to chain base rev.
         self._chainbasecache = util.lrucachedict(100)
         # 2-tuple of (offset, data) of raw data from the revlog at an offset.
         self._chunkcache = (0, b'')
-        # How much data to read and cache into the raw revlog data cache.
-        self._chunkcachesize = 65536
-        self._maxchainlen = None
-        self._deltabothparents = True
-        self._candidate_group_chunk_size = 0
-        self._debug_delta = False
+
         self.index = None
         self._docket = None
         self._nodemap_docket = None
         # Mapping of partial identifiers to full nodes.
         self._pcache = {}
-        # Mapping of revision integer to full node.
-        self._compengine = b'zlib'
-        self._compengineopts = {}
-        self._maxdeltachainspan = -1
-        self._withsparseread = False
-        self._sparserevlog = False
-        self.hassidedata = False
-        self._srdensitythreshold = 0.50
-        self._srmingapsize = 262144
 
         # other optionnals features
 
-        # might remove rank configuration once the computation has no impact
-        self._compute_rank = False
-
         # Make copy of flag processors so each revlog instance can support
         # custom flags.
         self._flagprocessors = dict(flagutil.flagprocessors)
@@ -398,12 +472,110 @@
 
         self._concurrencychecker = concurrencychecker
 
-        # parent order is supposed to be semantically irrelevant, so we
-        # normally resort parents to ensure that the first parent is non-null,
-        # if there is a non-null parent at all.
-        # filelog abuses the parent order as flag to mark some instances of
-        # meta-encoded files, so allow it to disable this behavior.
-        self.canonical_parent_order = canonical_parent_order
+    @property
+    def _generaldelta(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.general_delta
+
+    @property
+    def _checkambig(self):
+        """temporary compatibility proxy"""
+        return self.data_config.check_ambig
+
+    @property
+    def _mmaplargeindex(self):
+        """temporary compatibility proxy"""
+        return self.data_config.mmap_large_index
+
+    @property
+    def _censorable(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.censorable
+
+    @property
+    def _chunkcachesize(self):
+        """temporary compatibility proxy"""
+        return self.data_config.chunk_cache_size
+
+    @property
+    def _maxchainlen(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.max_chain_len
+
+    @property
+    def _deltabothparents(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.delta_both_parents
+
+    @property
+    def _candidate_group_chunk_size(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.candidate_group_chunk_size
+
+    @property
+    def _debug_delta(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.debug_delta
+
+    @property
+    def _compengine(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.compression_engine
+
+    @property
+    def _compengineopts(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.compression_engine_options
+
+    @property
+    def _maxdeltachainspan(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.max_deltachain_span
+
+    @property
+    def _withsparseread(self):
+        """temporary compatibility proxy"""
+        return self.data_config.with_sparse_read
+
+    @property
+    def _sparserevlog(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.sparse_revlog
+
+    @property
+    def hassidedata(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.has_side_data
+
+    @property
+    def _srdensitythreshold(self):
+        """temporary compatibility proxy"""
+        return self.data_config.sr_density_threshold
+
+    @property
+    def _srmingapsize(self):
+        """temporary compatibility proxy"""
+        return self.data_config.sr_min_gap_size
+
+    @property
+    def _compute_rank(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.compute_rank
+
+    @property
+    def canonical_parent_order(self):
+        """temporary compatibility proxy"""
+        return self.feature_config.canonical_parent_order
+
+    @property
+    def _lazydelta(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.lazy_delta
+
+    @property
+    def _lazydeltabase(self):
+        """temporary compatibility proxy"""
+        return self.delta_config.lazy_delta_base
 
     def _init_opts(self):
         """process options (from above/config) to setup associated default revlog mode
@@ -426,7 +598,8 @@
 
         if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
             new_header = CHANGELOGV2
-            self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
+            compute_rank = opts.get(b'changelogv2.compute-rank', True)
+            self.feature_config.compute_rank = compute_rank
         elif b'revlogv2' in opts:
             new_header = REVLOGV2
         elif b'revlogv1' in opts:
@@ -439,54 +612,63 @@
             new_header = REVLOG_DEFAULT_VERSION
 
         if b'chunkcachesize' in opts:
-            self._chunkcachesize = opts[b'chunkcachesize']
+            self.data_config.chunk_cache_size = opts[b'chunkcachesize']
         if b'maxchainlen' in opts:
-            self._maxchainlen = opts[b'maxchainlen']
+            self.delta_config.max_chain_len = opts[b'maxchainlen']
         if b'deltabothparents' in opts:
-            self._deltabothparents = opts[b'deltabothparents']
+            self.delta_config.delta_both_parents = opts[b'deltabothparents']
         dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
         if dps_cgds:
-            self._candidate_group_chunk_size = dps_cgds
-        self._lazydelta = bool(opts.get(b'lazydelta', True))
-        self._lazydeltabase = False
-        if self._lazydelta:
-            self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
+            self.delta_config.candidate_group_chunk_size = dps_cgds
+        if b'lazydelta' in opts:
+            self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
+        if self._lazydelta and b'lazydeltabase' in opts:
+            self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
         if b'debug-delta' in opts:
-            self._debug_delta = opts[b'debug-delta']
+            self.delta_config.debug_delta = opts[b'debug-delta']
         if b'compengine' in opts:
-            self._compengine = opts[b'compengine']
+            self.feature_config.compression_engine = opts[b'compengine']
+        comp_engine_opts = self.feature_config.compression_engine_options
         if b'zlib.level' in opts:
-            self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
+            comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
         if b'zstd.level' in opts:
-            self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
+            comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
         if b'maxdeltachainspan' in opts:
-            self._maxdeltachainspan = opts[b'maxdeltachainspan']
+            self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
         if self._mmaplargeindex and b'mmapindexthreshold' in opts:
             mmapindexthreshold = opts[b'mmapindexthreshold']
-        self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
-        withsparseread = bool(opts.get(b'with-sparse-read', False))
-        # sparse-revlog forces sparse-read
-        self._withsparseread = self._sparserevlog or withsparseread
+            self.data_config.mmap_index_threshold = mmapindexthreshold
+        if b'sparse-revlog' in opts:
+            self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
+        if self.delta_config.sparse_revlog:
+            # sparse-revlog forces sparse-read
+            self.data_config.with_sparse_read = True
+        elif b'with-sparse-read' in opts:
+            self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
         if b'sparse-read-density-threshold' in opts:
-            self._srdensitythreshold = opts[b'sparse-read-density-threshold']
+            self.data_config.sr_density_threshold = opts[
+                b'sparse-read-density-threshold'
+            ]
         if b'sparse-read-min-gap-size' in opts:
-            self._srmingapsize = opts[b'sparse-read-min-gap-size']
+            self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
         if opts.get(b'enableellipsis'):
+            self.feature_config.enable_ellipsis = True
             self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
 
         # revlog v0 doesn't have flag processors
         for flag, processor in opts.get(b'flagprocessors', {}).items():
             flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
 
-        if self._chunkcachesize <= 0:
+        chunk_cache_size = self.data_config.chunk_cache_size
+        if chunk_cache_size <= 0:
             raise error.RevlogError(
                 _(b'revlog chunk cache size %r is not greater than 0')
-                % self._chunkcachesize
+                % chunk_cache_size
             )
-        elif self._chunkcachesize & (self._chunkcachesize - 1):
+        elif chunk_cache_size & (chunk_cache_size - 1):
             raise error.RevlogError(
                 _(b'revlog chunk cache size %r is not a power of 2')
-                % self._chunkcachesize
+                % chunk_cache_size
             )
         force_nodemap = opts.get(b'devel-force-nodemap', False)
         return new_header, mmapindexthreshold, force_nodemap
@@ -664,8 +846,10 @@
 
             features = FEATURES_BY_VERSION[self._format_version]
             self._inline = features[b'inline'](self._format_flags)
-            self._generaldelta = features[b'generaldelta'](self._format_flags)
-            self.hassidedata = features[b'sidedata']
+            self.delta_config.general_delta = features[b'generaldelta'](
+                self._format_flags
+            )
+            self.feature_config.has_side_data = features[b'sidedata']
 
             if not features[b'docket']:
                 self._indexfile = entry_point
@@ -694,7 +878,7 @@
 
             self._inline = False
             # generaldelta implied by version 2 revlogs.
-            self._generaldelta = True
+            self.delta_config.general_delta = True
             # the logic for persistent nodemap will be dealt with within the
             # main docket, so disable it for now.
             self._nodemap_file = None
@@ -712,7 +896,7 @@
 
         # sparse-revlog can't be on without general-delta (issue6056)
         if not self._generaldelta:
-            self._sparserevlog = False
+            self.delta_config.sparse_revlog = False
 
         self._storedeltachains = True
 
@@ -3197,16 +3381,17 @@
 
         try:
             if deltareuse == self.DELTAREUSEALWAYS:
-                destrevlog._lazydeltabase = True
-                destrevlog._lazydelta = True
+                destrevlog.delta_config.lazy_delta_base = True
+                destrevlog.delta_config.lazy_delta = True
             elif deltareuse == self.DELTAREUSESAMEREVS:
-                destrevlog._lazydeltabase = False
-                destrevlog._lazydelta = True
+                destrevlog.delta_config.lazy_delta_base = False
+                destrevlog.delta_config.lazy_delta = True
             elif deltareuse == self.DELTAREUSENEVER:
-                destrevlog._lazydeltabase = False
-                destrevlog._lazydelta = False
-
-            destrevlog._deltabothparents = forcedeltabothparents or oldamd
+                destrevlog.delta_config.lazy_delta_base = False
+                destrevlog.delta_config.lazy_delta = False
+
+            delta_both_parents = forcedeltabothparents or oldamd
+            destrevlog.delta_config.delta_both_parents = delta_both_parents
 
             with self.reading():
                 self._clone(
@@ -3219,9 +3404,9 @@
                 )
 
         finally:
-            destrevlog._lazydelta = oldlazydelta
-            destrevlog._lazydeltabase = oldlazydeltabase
-            destrevlog._deltabothparents = oldamd
+            destrevlog.delta_config.lazy_delta = oldlazydelta
+            destrevlog.delta_config.lazy_delta_base = oldlazydeltabase
+            destrevlog.delta_config.delta_both_parents = oldamd
 
     def _clone(
         self,
--- a/mercurial/revlogutils/rewrite.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/revlogutils/rewrite.py	Tue Oct 10 10:02:05 2023 +0200
@@ -75,7 +75,7 @@
     )
     newrl._format_version = rl._format_version
     newrl._format_flags = rl._format_flags
-    newrl._generaldelta = rl._generaldelta
+    newrl.delta_config.general_delta = rl._generaldelta
     newrl._parse_index = rl._parse_index
 
     for rev in rl.revs():
--- a/mercurial/unionrepo.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/mercurial/unionrepo.py	Tue Oct 10 10:02:05 2023 +0200
@@ -213,6 +213,15 @@
             self, opener, self._revlog.radix, manifest2, linkmapper
         )
 
+    # XXX small hack to work around the use of manifest.manifestrevlog
+    @property
+    def _generaldelta(self):
+        return self._revlog.delta_config.general_delta
+
+    @_generaldelta.setter
+    def _generaldelta(self, value):
+        self._revlog.delta_config.general_delta = value
+
 
 class unionfilelog(filelog.filelog):
     def __init__(self, opener, path, opener2, linkmapper, repo):
--- a/tests/test-revlog-raw.py	Tue Oct 10 10:01:57 2023 +0200
+++ b/tests/test-revlog-raw.py	Tue Oct 10 10:02:05 2023 +0200
@@ -375,7 +375,8 @@
     try:
         # the test revlog is small, we remove the floor under which we
         # slicing is diregarded.
-        rlog._srmingapsize = 0
+        rlog.data_config.sr_min_gap_size = 0
+        rlog.delta_config.sr_min_gap_size = 0
         for item in slicingdata:
             chain, expected, target = item
             result = deltas.slicechunk(rlog, chain, targetsize=target)
@@ -387,7 +388,8 @@
                 print('  expected: %s' % expected)
                 print('  result:   %s' % result)
     finally:
-        rlog._srmingapsize = oldmin
+        rlog.data_config.sr_min_gap_size = oldmin
+        rlog.delta_config.sr_min_gap_size = oldmin
 
 
 def md5sum(s):