comparison mercurial/store.py @ 50513:5217e36356bb

store: add logic to group revlog file together For now each file get its own entry, this will help stopping this, soon™. We use such gathering in the `basicstore` code.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 15 May 2023 08:58:33 +0200
parents 1c0244a8cdaf
children d4f54aded22e
comparison
equal deleted inserted replaced
50512:1c0244a8cdaf 50513:5217e36356bb
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com> 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 # 4 #
5 # This software may be used and distributed according to the terms of the 5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 import functools 9 import functools
10 import os 10 import os
11 import re 11 import re
12 import stat 12 import stat
13 from typing import Generator 13 from typing import Generator
393 b'.dat', 393 b'.dat',
394 b'.n', 394 b'.n',
395 b'.nd', 395 b'.nd',
396 b'.sda', 396 b'.sda',
397 ) 397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
401 b'.idx',
402 b'.dat',
403 b'.sda',
404 )
398 # files that are "volatile" and might change between listing and streaming 405 # files that are "volatile" and might change between listing and streaming
399 # 406 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted. 408 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
528 return self._file_size 535 return self._file_size
529 try: 536 try:
530 return vfs.stat(self.unencoded_path).st_size 537 return vfs.stat(self.unencoded_path).st_size
531 except FileNotFoundError: 538 except FileNotFoundError:
532 return 0 539 return 0
540
541
542 def _gather_revlog(files_data):
543 """group files per revlog prefix
544
545 The returns a two level nested dict. The top level key is the revlog prefix
546 without extension, the second level is all the file "suffix" that were
547 seen for this revlog and arbitrary file data as value.
548 """
549 revlogs = collections.defaultdict(dict)
550 for u, value in files_data:
551 name, ext = _split_revlog_ext(u)
552 revlogs[name][ext] = value
553 return sorted(revlogs.items())
554
555
556 def _split_revlog_ext(filename):
557 """split the revlog file prefix from the variable extension"""
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 char = b'-'
560 else:
561 char = b'.'
562 idx = filename.rfind(char)
563 return filename[:idx], filename[idx:]
533 564
534 565
535 class basicstore: 566 class basicstore:
536 '''base class for local repository stores''' 567 '''base class for local repository stores'''
537 568
590 When [undecodable] is None, revlogs names that can't be 621 When [undecodable] is None, revlogs names that can't be
591 decoded cause an exception. When it is provided, it should 622 decoded cause an exception. When it is provided, it should
592 be a list and the filenames that can't be decoded are added 623 be a list and the filenames that can't be decoded are added
593 to it instead. This is very rarely needed.""" 624 to it instead. This is very rarely needed."""
594 files = self._walk(b'data', True) + self._walk(b'meta', True) 625 files = self._walk(b'data', True) + self._walk(b'meta', True)
595 for u, (t, s) in files: 626 files = (f for f in files if f[1][0] is not None)
596 if t is not None: 627 for revlog, details in _gather_revlog(files):
628 for ext, (t, s) in sorted(details.items()):
629 u = revlog + ext
597 yield RevlogStoreEntry( 630 yield RevlogStoreEntry(
598 unencoded_path=u, 631 unencoded_path=u,
599 revlog_type=FILEFLAGS_FILELOG, 632 revlog_type=FILEFLAGS_FILELOG,
600 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN), 633 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
601 is_volatile=bool(t & FILEFLAGS_VOLATILE), 634 is_volatile=bool(t & FILEFLAGS_VOLATILE),