Mercurial > hg-stable
changeset 50679:e06d1a779eb6
store: stop relying on a `revlog_type` property
We want to know if a file is related to a revlog, but the rest is dealt with
differently already, so we simplify things further.
as a bonus, this cleanup This provides a small but noticeable speedup.
The number below use `hg perf::stream-locked-section` to measure the time spend
in the locked section of the streaming clone. Number are run on various
repository and compare different steps.:
1) the effect of this patchs,
2) the effect of the cleanup series,
2) current state compared to because large refactoring.
All benchmarks are run on linux with Python 3.10.7.
### Effect of this patch
# mercurial-2018-08-01-zstd-sparse-revlog
# benchmark.name = perf-stream-locked-section
before: 0.030246 seconds
after: 0.029274 seconds (-3.21%)
# pypy-2018-08-01-zstd-sparse-revlog
before: 0.545012 seconds
after: 0.520872 seconds (-4.43%)
# netbeans-2018-08-01-zstd-sparse-revlog
before: 2.719939 seconds
after: 2.626791 seconds (-3.42%)
# mozilla-central-2018-08-01-zstd-sparse-revlog
before: 6.304179 seconds
after: 6.096700 seconds (-3.29%)
# mozilla-try-2019-02-18-zstd-sparse-revlog
before: 14.142687 seconds
after: 13.640779 seconds (-3.55%)
### Effect of this series
A small but sizeable speedup
# mercurial-2018-08-01-zstd-sparse-revlog
before: 0.031122 seconds
after: 0.029274 seconds (-5.94%)
# pypy-2018-08-01-zstd-sparse-revlog
before: 0.589970 seconds
after: 0.520872 seconds (-11.71%)
# netbeans-2018-08-01-zstd-sparse-revlog
before: 2.980300 seconds
after: 2.626791 seconds (-11.86%)
# mozilla-central-2018-08-01-zstd-sparse-revlog
before: 6.863204 seconds
after: 6.096700 seconds (-11.17%)
# mozilla-try-2019-02-18-zstd-sparse-revlog
before: 14.921393 seconds
after: 13.640779 seconds (-8.58%)
### Current state compared to the pre-refactoring state
The refactoring introduced multiple string manipulation and dictionary creation
that seems to induce a signifiant slowdown
Slowdown
# mercurial-2018-08-01-zstd-sparse-revlog
6.4.3: 0.019459 seconds
after: 0.029274 seconds (+50.44%)
## pypy-2018-08-01-zstd-sparse-revlog
6.4.3: 0.290715 seconds
after: 0.520872 seconds (+79.17%)
# netbeans-2018-08-01-zstd-sparse-revlog
6.4.3: 1.403447 seconds
after: 2.626791 seconds (+87.17%)
# mozilla-central-2018-08-01-zstd-sparse-revlog
6.4.3: 3.163549 seconds
after: 6.096700 seconds (+92.72%)
# mozilla-try-2019-02-18-zstd-sparse-revlog
6.4.3: 6.702184 seconds
after: 13.640779 seconds (+103.53%)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 30 May 2023 17:43:59 +0100 |
parents | 11562d72cb7b |
children | 3b56395404a1 |
files | mercurial/store.py mercurial/upgrade_utils/engine.py |
diffstat | 2 files changed, 22 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/store.py Tue May 30 16:38:13 2023 +0100 +++ b/mercurial/store.py Tue May 30 17:43:59 2023 +0100 @@ -385,8 +385,8 @@ b'requires', ] -REVLOG_FILES_MAIN_EXT = (b'.i',) -REVLOG_FILES_OTHER_EXT = ( +REVLOG_FILES_EXT = ( + b'.i', b'.idx', b'.d', b'.dat', @@ -415,22 +415,16 @@ def is_revlog(f, kind, st): if kind != stat.S_IFREG: - return None - return revlog_type(f) + return False + if f.endswith(REVLOG_FILES_EXT): + return True + return False -def revlog_type(f): - # XXX we need to filter `undo.` created by the transaction here, however - # being naive about it also filter revlog for `undo.*` files, leading to - # issue6542. So we no longer use EXCLUDED. - if f.endswith(REVLOG_FILES_MAIN_EXT): - return FILEFLAGS_REVLOG_MAIN - elif f.endswith(REVLOG_FILES_OTHER_EXT): - t = FILETYPE_FILELOG_OTHER - if f.endswith(REVLOG_FILES_VOLATILE_EXT): - t |= FILEFLAGS_VOLATILE - return t - return None +def is_revlog_file(f): + if f.endswith(REVLOG_FILES_EXT): + return True + return False # the file is part of changelog data @@ -758,10 +752,9 @@ p = visit.pop() for f, kind, st in readdir(p, stat=True): fp = p + b'/' + f - rl_type = is_revlog(f, kind, st) - if rl_type is not None: + if is_revlog(f, kind, st): n = util.pconvert(fp[striplen:]) - l.append((decodedir(n), (rl_type, st.st_size))) + l.append((decodedir(n), st.st_size)) elif kind == stat.S_IFDIR and recurse: visit.append(fp) @@ -794,20 +787,16 @@ ] for base_dir, rl_type, strip_filename in dirs: files = self._walk(base_dir, True, undecodable=undecodable) - files = (f for f in files if f[1][0] is not None) for revlog, details in _gather_revlog(files): - file_details = {} revlog_target_id = revlog.split(b'/', 1)[1] if strip_filename and b'/' in revlog: revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0] revlog_target_id += b'/' - for ext, (t, size) in sorted(details.items()): - file_details[ext] = size yield RevlogStoreEntry( path_prefix=revlog, revlog_type=rl_type, target_id=revlog_target_id, - details=file_details, + details=details, ) def top_entries( @@ -831,17 +820,17 @@ changelogs = collections.defaultdict(dict) manifestlogs = collections.defaultdict(dict) - for u, (t, s) in files: + for u, s in files: if u.startswith(b'00changelog'): name, ext = _split_revlog_ext(u) - changelogs[name][ext] = (t, s) + changelogs[name][ext] = s elif u.startswith(b'00manifest'): name, ext = _split_revlog_ext(u) - manifestlogs[name][ext] = (t, s) + manifestlogs[name][ext] = s else: yield SimpleStoreEntry( entry_path=u, - is_volatile=bool(t & FILEFLAGS_VOLATILE), + is_volatile=False, file_size=s, ) # yield manifest before changelog @@ -853,14 +842,11 @@ assert len(changelogs) <= 1 for data, revlog_type in top_rl: for revlog, details in sorted(data.items()): - file_details = {} - for ext, (t, size) in details.items(): - file_details[ext] = size yield RevlogStoreEntry( path_prefix=revlog, revlog_type=revlog_type, target_id=b'', - details=file_details, + details=details, ) def walk( @@ -1083,7 +1069,7 @@ if ( mode not in (b'r', b'rb') and (path.startswith(b'data/') or path.startswith(b'meta/')) - and revlog_type(path) is not None + and is_revlog_file(path) ): # do not trigger a fncache load when adding a file that already is # known to exist. @@ -1136,14 +1122,12 @@ def data_entries( self, matcher=None, undecodable=None ) -> Generator[BaseStoreEntry, None, None]: - files = ((f, revlog_type(f)) for f in self.fncache) # Note: all files in fncache should be revlog related, However the # fncache might contains such file added by previous version of # Mercurial. - files = (f for f in files if f[1] is not None) + files = ((f, None) for f in self.fncache if is_revlog_file(f)) by_revlog = _gather_revlog(files) for revlog, details in by_revlog: - file_details = {} if revlog.startswith(b'data/'): rl_type = FILEFLAGS_FILELOG revlog_target_id = revlog.split(b'/', 1)[1] @@ -1155,13 +1139,11 @@ else: # unreachable assert False, revlog - for ext in details: - file_details[ext] = None entry = RevlogStoreEntry( path_prefix=revlog, revlog_type=rl_type, target_id=revlog_target_id, - details=file_details, + details=details, ) if _match_tracked_entry(entry, matcher): yield entry
--- a/mercurial/upgrade_utils/engine.py Tue May 30 16:38:13 2023 +0100 +++ b/mercurial/upgrade_utils/engine.py Tue May 30 17:43:59 2023 +0100 @@ -370,7 +370,7 @@ are cloned""" for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)): # don't copy revlogs as they are already cloned - if store.revlog_type(path) is not None: + if store.is_revlog_file(path): continue # Skip transaction related files. if path.startswith(b'undo'):