Mercurial > hg
changeset 47816:32e21ac3adb1 stable
repair: improve performance of detection of revisions affected by issue6528
Explanations inside the patch. I've tested this on Mozilla-Central and it's
5 times faster than the naive approach on my laptop.
Differential Revision: https://phab.mercurial-scm.org/D11262
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 05 Aug 2021 17:00:03 +0200 |
parents | b30a53ffbf9b |
children | 855463b5fe49 |
files | mercurial/revlogutils/rewrite.py |
diffstat | 1 files changed, 57 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlogutils/rewrite.py Tue Jul 27 21:45:27 2021 +0200 +++ b/mercurial/revlogutils/rewrite.py Thu Aug 05 17:00:03 2021 +0200 @@ -10,6 +10,7 @@ import binascii import contextlib import os +import struct from ..node import ( nullrev, @@ -561,7 +562,7 @@ util.tryunlink(new_file_path) -def _is_revision_affected(ui, fl, filerev, path): +def _is_revision_affected(fl, filerev, metadata_cache=None): """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a special meaning compared to the reverse in the context of filelog-based copytracing. issue6528 exists because new code assumed that parent ordering @@ -574,6 +575,8 @@ # We don't care about censored nodes as they never carry metadata return False has_meta = raw_text.startswith(b'\x01\n') + if metadata_cache is not None: + metadata_cache[filerev] = has_meta if has_meta: (p1, p2) = fl.parentrevs(filerev) if p1 != nullrev and p2 == nullrev: @@ -581,6 +584,54 @@ return False +def _is_revision_affected_fast(repo, fl, filerev, metadata_cache): + """Optimization fast-path for `_is_revision_affected`. + + `metadata_cache` is a dict of `{rev: has_metadata}` which allows any + revision to check if its base has metadata, saving computation of the full + text, instead looking at the current delta. + + This optimization only works if the revisions are looked at in order.""" + rl = fl._revlog + + if rl.iscensored(filerev): + # Censored revisions don't contain metadata, so they cannot be affected + metadata_cache[filerev] = False + return False + + p1, p2 = rl.parentrevs(filerev) + if p1 == nullrev or p2 != nullrev: + return False + + delta_parent = rl.deltaparent(filerev) + parent_has_metadata = metadata_cache.get(delta_parent) + if parent_has_metadata is None: + is_affected = _is_revision_affected(fl, filerev, metadata_cache) + return is_affected + + chunk = rl._chunk(filerev) + if not len(chunk): + # No diff for this revision + return parent_has_metadata + + header_length = 12 + if len(chunk) < header_length: + raise error.Abort(_(b"patch cannot be decoded")) + + start, _end, _length = struct.unpack(b">lll", chunk[:header_length]) + + if start < 2: # len(b'\x01\n') == 2 + # This delta does *something* to the metadata marker (if any). + # Check it the slow way + is_affected = _is_revision_affected(fl, filerev, metadata_cache) + return is_affected + + # The diff did not remove or add the metadata header, it's then in the same + # situation as its parent + metadata_cache[filerev] = parent_has_metadata + return parent_has_metadata + + def _from_report(ui, repo, context, from_report, dry_run): """ Fix the revisions given in the `from_report` file, but still checks if the @@ -603,7 +654,7 @@ excluded = set() for filerev in to_fix: - if _is_revision_affected(ui, fl, filerev, filename): + if _is_revision_affected(fl, filerev): msg = b"found affected revision %d for filelog '%s'\n" ui.warn(msg % (filerev, filename)) else: @@ -663,11 +714,11 @@ # Set of filerevs (or hex filenodes if `to_report`) that need fixing to_fix = set() + metadata_cache = {} for filerev in fl.revs(): - # TODO speed up by looking at the start of the delta - # If it hasn't changed, it's not worth looking at the other revs - # in the same chain - affected = _is_revision_affected(ui, fl, filerev, path) + affected = _is_revision_affected_fast( + repo, fl, filerev, metadata_cache + ) if affected: msg = b"found affected revision %d for filelog '%s'\n" ui.warn(msg % (filerev, path))