comparison mercurial/revlog.py @ 51029:498afb627f78

revlog: move configuration attribute into dedicated object First this make things clearer as the number of configuration attributes has grown out of control, so gathering them make things clearer. Second, this will make future sharing them at different level simpler. Third it will allow us to make initialisation and temporary override simpler soon.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 10 Oct 2023 10:02:05 +0200
parents 8520db304f01
children 177e7d6bf875
comparison
equal deleted inserted replaced
51028:f70ce1aedbcb 51029:498afb627f78
239 ) 239 )
240 240
241 hexdigits = b'0123456789abcdefABCDEF' 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 @attr.s()
245 class FeatureConfig:
246 """Hold configuration values about the available revlog features"""
247
248 # the default compression engine
249 compression_engine = attr.ib(default=b'zlib')
250 # compression engines options
251 compression_engine_options = attr.ib(default=attr.Factory(dict))
252
253 # can we use censor on this revlog
254 censorable = attr.ib(default=False)
255 # does this revlog use the "side data" feature
256 has_side_data = attr.ib(default=False)
257 # might remove rank configuration once the computation has no impact
258 compute_rank = attr.ib(default=False)
259 # parent order is supposed to be semantically irrelevant, so we
260 # normally resort parents to ensure that the first parent is non-null,
261 # if there is a non-null parent at all.
262 # filelog abuses the parent order as flag to mark some instances of
263 # meta-encoded files, so allow it to disable this behavior.
264 canonical_parent_order = attr.ib(default=False)
265 # can ellipsis commit be used
266 enable_ellipsis = attr.ib(default=False)
267
268
269 @attr.s()
270 class DataConfig:
271 """Hold configuration value about how the revlog data are read"""
272
273 # should we try to open the "pending" version of the revlog
274 try_pending = attr.ib(default=False)
275 # should we try to open the "splitted" version of the revlog
276 try_split = attr.ib(default=False)
277 # When True, indexfile should be opened with checkambig=True at writing,
278 # to avoid file stat ambiguity.
279 check_ambig = attr.ib(default=False)
280
281 # If true, use mmap instead of reading to deal with large index
282 mmap_large_index = attr.ib(default=False)
283 # how much data is large
284 mmap_index_threshold = attr.ib(default=None)
285 # How much data to read and cache into the raw revlog data cache.
286 chunk_cache_size = attr.ib(default=65536)
287
288 # Allow sparse reading of the revlog data
289 with_sparse_read = attr.ib(default=False)
290 # minimal density of a sparse read chunk
291 sr_density_threshold = attr.ib(default=0.50)
292 # minimal size of data we skip when performing sparse read
293 sr_min_gap_size = attr.ib(default=262144)
294
295 # are delta encoded against arbitrary bases.
296 generaldelta = attr.ib(default=False)
297
298
299 @attr.s()
300 class DeltaConfig:
301 """Hold configuration value about how new delta are computed
302
303 Some attributes are duplicated from DataConfig to help havign each object
304 self contained.
305 """
306
307 # can delta be encoded against arbitrary bases.
308 general_delta = attr.ib(default=False)
309 # Allow sparse writing of the revlog data
310 sparse_revlog = attr.ib(default=False)
311 # maximum length of a delta chain
312 max_chain_len = attr.ib(default=None)
313 # Maximum distance between delta chain base start and end
314 max_deltachain_span = attr.ib(default=-1)
315 # If `upper_bound_comp` is not None, this is the expected maximal gain from
316 # compression for the data content.
317 upper_bound_comp = attr.ib(default=None)
318 # Should we try a delta against both parent
319 delta_both_parents = attr.ib(default=True)
320 # Test delta base candidate group by chunk of this maximal size.
321 candidate_group_chunk_size = attr.ib(default=0)
322 # Should we display debug information about delta computation
323 debug_delta = attr.ib(default=False)
324 # trust incoming delta by default
325 lazy_delta = attr.ib(default=True)
326 # trust the base of incoming delta by default
327 lazy_delta_base = attr.ib(default=False)
328
329
244 class revlog: 330 class revlog:
245 """ 331 """
246 the underlying revision storage object 332 the underlying revision storage object
247 333
248 A revlog consists of two parts, an index and the revision data. 334 A revlog consists of two parts, an index and the revision data.
346 self._nodemap_file = nodemaputil.get_nodemap_file(self) 432 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347 433
348 assert target[0] in ALL_KINDS 434 assert target[0] in ALL_KINDS
349 assert len(target) == 2 435 assert len(target) == 2
350 self.target = target 436 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to 437 self.feature_config = FeatureConfig(
352 # avoid file stat ambiguity. 438 censorable=censorable,
353 self._checkambig = checkambig 439 canonical_parent_order=canonical_parent_order,
354 self._mmaplargeindex = mmaplargeindex 440 )
355 self._censorable = censorable 441 self.data_config = DataConfig(
442 check_ambig=checkambig,
443 mmap_large_index=mmaplargeindex,
444 )
445 self.delta_config = DeltaConfig()
446
356 # 3-tuple of (node, rev, text) for a raw revision. 447 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None 448 self._revisioncache = None
358 # Maps rev to chain base rev. 449 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100) 450 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset. 451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'') 452 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache. 453
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
368 self.index = None 454 self.index = None
369 self._docket = None 455 self._docket = None
370 self._nodemap_docket = None 456 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes. 457 # Mapping of partial identifiers to full nodes.
372 self._pcache = {} 458 self._pcache = {}
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
378 self._sparserevlog = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
382 459
383 # other optionnals features 460 # other optionnals features
384
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
387 461
388 # Make copy of flag processors so each revlog instance can support 462 # Make copy of flag processors so each revlog instance can support
389 # custom flags. 463 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors) 464 self._flagprocessors = dict(flagutil.flagprocessors)
391 465
396 470
397 self._loadindex() 471 self._loadindex()
398 472
399 self._concurrencychecker = concurrencychecker 473 self._concurrencychecker = concurrencychecker
400 474
401 # parent order is supposed to be semantically irrelevant, so we 475 @property
402 # normally resort parents to ensure that the first parent is non-null, 476 def _generaldelta(self):
403 # if there is a non-null parent at all. 477 """temporary compatibility proxy"""
404 # filelog abuses the parent order as flag to mark some instances of 478 return self.delta_config.general_delta
405 # meta-encoded files, so allow it to disable this behavior. 479
406 self.canonical_parent_order = canonical_parent_order 480 @property
481 def _checkambig(self):
482 """temporary compatibility proxy"""
483 return self.data_config.check_ambig
484
485 @property
486 def _mmaplargeindex(self):
487 """temporary compatibility proxy"""
488 return self.data_config.mmap_large_index
489
490 @property
491 def _censorable(self):
492 """temporary compatibility proxy"""
493 return self.feature_config.censorable
494
495 @property
496 def _chunkcachesize(self):
497 """temporary compatibility proxy"""
498 return self.data_config.chunk_cache_size
499
500 @property
501 def _maxchainlen(self):
502 """temporary compatibility proxy"""
503 return self.delta_config.max_chain_len
504
505 @property
506 def _deltabothparents(self):
507 """temporary compatibility proxy"""
508 return self.delta_config.delta_both_parents
509
510 @property
511 def _candidate_group_chunk_size(self):
512 """temporary compatibility proxy"""
513 return self.delta_config.candidate_group_chunk_size
514
515 @property
516 def _debug_delta(self):
517 """temporary compatibility proxy"""
518 return self.delta_config.debug_delta
519
520 @property
521 def _compengine(self):
522 """temporary compatibility proxy"""
523 return self.feature_config.compression_engine
524
525 @property
526 def _compengineopts(self):
527 """temporary compatibility proxy"""
528 return self.feature_config.compression_engine_options
529
530 @property
531 def _maxdeltachainspan(self):
532 """temporary compatibility proxy"""
533 return self.delta_config.max_deltachain_span
534
535 @property
536 def _withsparseread(self):
537 """temporary compatibility proxy"""
538 return self.data_config.with_sparse_read
539
540 @property
541 def _sparserevlog(self):
542 """temporary compatibility proxy"""
543 return self.delta_config.sparse_revlog
544
545 @property
546 def hassidedata(self):
547 """temporary compatibility proxy"""
548 return self.feature_config.has_side_data
549
550 @property
551 def _srdensitythreshold(self):
552 """temporary compatibility proxy"""
553 return self.data_config.sr_density_threshold
554
555 @property
556 def _srmingapsize(self):
557 """temporary compatibility proxy"""
558 return self.data_config.sr_min_gap_size
559
560 @property
561 def _compute_rank(self):
562 """temporary compatibility proxy"""
563 return self.feature_config.compute_rank
564
565 @property
566 def canonical_parent_order(self):
567 """temporary compatibility proxy"""
568 return self.feature_config.canonical_parent_order
569
570 @property
571 def _lazydelta(self):
572 """temporary compatibility proxy"""
573 return self.delta_config.lazy_delta
574
575 @property
576 def _lazydeltabase(self):
577 """temporary compatibility proxy"""
578 return self.delta_config.lazy_delta_base
407 579
408 def _init_opts(self): 580 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode 581 """process options (from above/config) to setup associated default revlog mode
410 582
411 These values might be affected when actually reading on disk information. 583 These values might be affected when actually reading on disk information.
424 mmapindexthreshold = None 596 mmapindexthreshold = None
425 opts = self.opener.options 597 opts = self.opener.options
426 598
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: 599 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2 600 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True) 601 compute_rank = opts.get(b'changelogv2.compute-rank', True)
602 self.feature_config.compute_rank = compute_rank
430 elif b'revlogv2' in opts: 603 elif b'revlogv2' in opts:
431 new_header = REVLOGV2 604 new_header = REVLOGV2
432 elif b'revlogv1' in opts: 605 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA 606 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts: 607 if b'generaldelta' in opts:
437 new_header = REVLOGV0 610 new_header = REVLOGV0
438 else: 611 else:
439 new_header = REVLOG_DEFAULT_VERSION 612 new_header = REVLOG_DEFAULT_VERSION
440 613
441 if b'chunkcachesize' in opts: 614 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize'] 615 self.data_config.chunk_cache_size = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts: 616 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen'] 617 self.delta_config.max_chain_len = opts[b'maxchainlen']
445 if b'deltabothparents' in opts: 618 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents'] 619 self.delta_config.delta_both_parents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size') 620 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds: 621 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds 622 self.delta_config.candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True)) 623 if b'lazydelta' in opts:
451 self._lazydeltabase = False 624 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
452 if self._lazydelta: 625 if self._lazydelta and b'lazydeltabase' in opts:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False)) 626 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
454 if b'debug-delta' in opts: 627 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta'] 628 self.delta_config.debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts: 629 if b'compengine' in opts:
457 self._compengine = opts[b'compengine'] 630 self.feature_config.compression_engine = opts[b'compengine']
631 comp_engine_opts = self.feature_config.compression_engine_options
458 if b'zlib.level' in opts: 632 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level'] 633 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts: 634 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] 635 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts: 636 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan'] 637 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts: 638 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold'] 639 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) 640 self.data_config.mmap_index_threshold = mmapindexthreshold
467 withsparseread = bool(opts.get(b'with-sparse-read', False)) 641 if b'sparse-revlog' in opts:
468 # sparse-revlog forces sparse-read 642 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
469 self._withsparseread = self._sparserevlog or withsparseread 643 if self.delta_config.sparse_revlog:
644 # sparse-revlog forces sparse-read
645 self.data_config.with_sparse_read = True
646 elif b'with-sparse-read' in opts:
647 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
470 if b'sparse-read-density-threshold' in opts: 648 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold'] 649 self.data_config.sr_density_threshold = opts[
650 b'sparse-read-density-threshold'
651 ]
472 if b'sparse-read-min-gap-size' in opts: 652 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size'] 653 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'): 654 if opts.get(b'enableellipsis'):
655 self.feature_config.enable_ellipsis = True
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor 656 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476 657
477 # revlog v0 doesn't have flag processors 658 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items(): 659 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors) 660 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480 661
481 if self._chunkcachesize <= 0: 662 chunk_cache_size = self.data_config.chunk_cache_size
663 if chunk_cache_size <= 0:
482 raise error.RevlogError( 664 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0') 665 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize 666 % chunk_cache_size
485 ) 667 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1): 668 elif chunk_cache_size & (chunk_cache_size - 1):
487 raise error.RevlogError( 669 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2') 670 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize 671 % chunk_cache_size
490 ) 672 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False) 673 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap 674 return new_header, mmapindexthreshold, force_nodemap
493 675
494 def _get_data(self, filepath, mmap_threshold, size=None): 676 def _get_data(self, filepath, mmap_threshold, size=None):
662 msg %= (display_flag, self._format_version, self.display_id) 844 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg) 845 raise error.RevlogError(msg)
664 846
665 features = FEATURES_BY_VERSION[self._format_version] 847 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags) 848 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags) 849 self.delta_config.general_delta = features[b'generaldelta'](
668 self.hassidedata = features[b'sidedata'] 850 self._format_flags
851 )
852 self.feature_config.has_side_data = features[b'sidedata']
669 853
670 if not features[b'docket']: 854 if not features[b'docket']:
671 self._indexfile = entry_point 855 self._indexfile = entry_point
672 index_data = entry_data 856 index_data = entry_data
673 else: 857 else:
692 msg %= (self.display_id, len(index_data), index_size) 876 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg) 877 raise error.RevlogError(msg)
694 878
695 self._inline = False 879 self._inline = False
696 # generaldelta implied by version 2 revlogs. 880 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True 881 self.delta_config.general_delta = True
698 # the logic for persistent nodemap will be dealt with within the 882 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now. 883 # main docket, so disable it for now.
700 self._nodemap_file = None 884 self._nodemap_file = None
701 885
702 if self._docket is not None: 886 if self._docket is not None:
710 self.nodeconstants = sha1nodeconstants 894 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid 895 self.nullid = self.nodeconstants.nullid
712 896
713 # sparse-revlog can't be on without general-delta (issue6056) 897 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta: 898 if not self._generaldelta:
715 self._sparserevlog = False 899 self.delta_config.sparse_revlog = False
716 900
717 self._storedeltachains = True 901 self._storedeltachains = True
718 902
719 devel_nodemap = ( 903 devel_nodemap = (
720 self._nodemap_file 904 self._nodemap_file
3195 oldlazydeltabase = destrevlog._lazydeltabase 3379 oldlazydeltabase = destrevlog._lazydeltabase
3196 oldamd = destrevlog._deltabothparents 3380 oldamd = destrevlog._deltabothparents
3197 3381
3198 try: 3382 try:
3199 if deltareuse == self.DELTAREUSEALWAYS: 3383 if deltareuse == self.DELTAREUSEALWAYS:
3200 destrevlog._lazydeltabase = True 3384 destrevlog.delta_config.lazy_delta_base = True
3201 destrevlog._lazydelta = True 3385 destrevlog.delta_config.lazy_delta = True
3202 elif deltareuse == self.DELTAREUSESAMEREVS: 3386 elif deltareuse == self.DELTAREUSESAMEREVS:
3203 destrevlog._lazydeltabase = False 3387 destrevlog.delta_config.lazy_delta_base = False
3204 destrevlog._lazydelta = True 3388 destrevlog.delta_config.lazy_delta = True
3205 elif deltareuse == self.DELTAREUSENEVER: 3389 elif deltareuse == self.DELTAREUSENEVER:
3206 destrevlog._lazydeltabase = False 3390 destrevlog.delta_config.lazy_delta_base = False
3207 destrevlog._lazydelta = False 3391 destrevlog.delta_config.lazy_delta = False
3208 3392
3209 destrevlog._deltabothparents = forcedeltabothparents or oldamd 3393 delta_both_parents = forcedeltabothparents or oldamd
3394 destrevlog.delta_config.delta_both_parents = delta_both_parents
3210 3395
3211 with self.reading(): 3396 with self.reading():
3212 self._clone( 3397 self._clone(
3213 tr, 3398 tr,
3214 destrevlog, 3399 destrevlog,
3217 forcedeltabothparents, 3402 forcedeltabothparents,
3218 sidedata_helpers, 3403 sidedata_helpers,
3219 ) 3404 )
3220 3405
3221 finally: 3406 finally:
3222 destrevlog._lazydelta = oldlazydelta 3407 destrevlog.delta_config.lazy_delta = oldlazydelta
3223 destrevlog._lazydeltabase = oldlazydeltabase 3408 destrevlog.delta_config.lazy_delta_base = oldlazydeltabase
3224 destrevlog._deltabothparents = oldamd 3409 destrevlog.delta_config.delta_both_parents = oldamd
3225 3410
3226 def _clone( 3411 def _clone(
3227 self, 3412 self,
3228 tr, 3413 tr,
3229 destrevlog, 3414 destrevlog,