Mercurial > hg
changeset 47815:b30a53ffbf9b stable
debugcommands: introduce a debug command to repair repos affected by issue6528
This command is quite basic and slow, it will loop over the entirety of the
filelogs in the repository and check each revision for corruption, then fixes
the affected filelogs. It takes under 25 minutes for Mozilla-Central on my
not-top-of-the-line laptop, using the `--to-report` and `--from-report` options
will make this pretty tolerable to use, I think.
This change also introduces a test for the fix.
Differential Revision: https://phab.mercurial-scm.org/D11239
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Tue, 27 Jul 2021 21:45:27 +0200 |
parents | 3b04e4746020 |
children | 32e21ac3adb1 |
files | mercurial/debugcommands.py mercurial/revlogutils/rewrite.py tests/bundles/issue6528.tar tests/test-completion.t tests/test-help.t tests/test-issue6528.t |
diffstat | 6 files changed, 534 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/debugcommands.py Mon Aug 09 19:49:57 2021 -0400 +++ b/mercurial/debugcommands.py Tue Jul 27 21:45:27 2021 +0200 @@ -71,6 +71,7 @@ registrar, repair, repoview, + requirements, revlog, revset, revsetlang, @@ -105,6 +106,7 @@ from .revlogutils import ( deltas as deltautil, nodemap, + rewrite, sidedata, ) @@ -1451,6 +1453,63 @@ ui.write(b"%s\n" % f) +@command( + b"debug-repair-issue6528", + [ + ( + b'', + b'to-report', + b'', + _(b'build a report of affected revisions to this file'), + _(b'FILE'), + ), + ( + b'', + b'from-report', + b'', + _(b'repair revisions listed in this report file'), + _(b'FILE'), + ), + ] + + cmdutil.dryrunopts, +) +def debug_repair_issue6528(ui, repo, **opts): + """find affected revisions and repair them. See issue6528 for more details. + + The `--to-report` and `--from-report` flags allow you to cache and reuse the + computation of affected revisions for a given repository across clones. + The report format is line-based (with empty lines ignored): + + ``` + <ascii-hex of the affected revision>,... <unencoded filelog index filename> + ``` + + There can be multiple broken revisions per filelog, they are separated by + a comma with no spaces. The only space is between the revision(s) and the + filename. + + Note that this does *not* mean that this repairs future affected revisions, + that needs a separate fix at the exchange level that hasn't been written yet + (as of 5.9rc0). + """ + cmdutil.check_incompatible_arguments( + opts, 'to_report', ['from_report', 'dry_run'] + ) + dry_run = opts.get('dry_run') + to_report = opts.get('to_report') + from_report = opts.get('from_report') + # TODO maybe add filelog pattern and revision pattern parameters to help + # narrow down the search for users that know what they're looking for? + + if requirements.REVLOGV1_REQUIREMENT not in repo.requirements: + msg = b"can only repair revlogv1 repositories, v2 is not affected" + raise error.Abort(_(msg)) + + rewrite.repair_issue6528( + ui, repo, dry_run=dry_run, to_report=to_report, from_report=from_report + ) + + @command(b'debugformat', [] + cmdutil.formatteropts) def debugformat(ui, repo, **opts): """display format information about the current repository
--- a/mercurial/revlogutils/rewrite.py Mon Aug 09 19:49:57 2021 -0400 +++ b/mercurial/revlogutils/rewrite.py Tue Jul 27 21:45:27 2021 +0200 @@ -7,6 +7,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import binascii import contextlib import os @@ -472,3 +473,224 @@ new_index_file.write(entry_bin) revlog._docket.index_end = new_index_file.tell() revlog._docket.data_end = new_data_file.tell() + + +def _get_filename_from_filelog_index(path): + # Drop the extension and the `data/` prefix + path_part = path.rsplit(b'.', 1)[0].split(b'/', 1) + if len(path_part) < 2: + msg = _(b"cannot recognize filelog from filename: '%s'") + msg %= path + raise error.Abort(msg) + + return path_part[1] + + +def _filelog_from_filename(repo, path): + """Returns the filelog for the given `path`. Stolen from `engine.py`""" + + from .. import filelog # avoid cycle + + fl = filelog.filelog(repo.svfs, path) + return fl + + +def _write_swapped_parents(repo, rl, rev, offset, fp): + """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`""" + from ..pure import parsers # avoid cycle + + if repo._currentlock(repo._lockref) is None: + # Let's be paranoid about it + msg = "repo needs to be locked to rewrite parents" + raise error.ProgrammingError(msg) + + index_format = parsers.IndexObject.index_format + entry = rl.index[rev] + new_entry = list(entry) + new_entry[5], new_entry[6] = entry[6], entry[5] + packed = index_format.pack(*new_entry[:8]) + fp.seek(offset) + fp.write(packed) + + +def _reorder_filelog_parents(repo, fl, to_fix): + """ + Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the + new version to disk, overwriting the old one with a rename. + """ + from ..pure import parsers # avoid cycle + + ui = repo.ui + assert len(to_fix) > 0 + rl = fl._revlog + if rl._format_version != constants.REVLOGV1: + msg = "expected version 1 revlog, got version '%d'" % rl._format_version + raise error.ProgrammingError(msg) + + index_file = rl._indexfile + new_file_path = index_file + b'.tmp-parents-fix' + repaired_msg = _(b"repaired revision %d of 'filelog %s'\n") + + with ui.uninterruptible(): + try: + util.copyfile( + rl.opener.join(index_file), + rl.opener.join(new_file_path), + checkambig=rl._checkambig, + ) + + with rl.opener(new_file_path, mode=b"r+") as fp: + if rl._inline: + index = parsers.InlinedIndexObject(fp.read()) + for rev in fl.revs(): + if rev in to_fix: + offset = index._calculate_index(rev) + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + else: + index_format = parsers.IndexObject.index_format + for rev in to_fix: + offset = rev * index_format.size + _write_swapped_parents(repo, rl, rev, offset, fp) + ui.write(repaired_msg % (rev, index_file)) + + rl.opener.rename(new_file_path, index_file) + rl.clearcaches() + rl._loadindex() + finally: + util.tryunlink(new_file_path) + + +def _is_revision_affected(ui, fl, filerev, path): + """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a + special meaning compared to the reverse in the context of filelog-based + copytracing. issue6528 exists because new code assumed that parent ordering + didn't matter, so this detects if the revision contains metadata (since + it's only used for filelog-based copytracing) and its parents are in the + "wrong" order.""" + try: + raw_text = fl.rawdata(filerev) + except error.CensoredNodeError: + # We don't care about censored nodes as they never carry metadata + return False + has_meta = raw_text.startswith(b'\x01\n') + if has_meta: + (p1, p2) = fl.parentrevs(filerev) + if p1 != nullrev and p2 == nullrev: + return True + return False + + +def _from_report(ui, repo, context, from_report, dry_run): + """ + Fix the revisions given in the `from_report` file, but still checks if the + revisions are indeed affected to prevent an unfortunate cyclic situation + where we'd swap well-ordered parents again. + + See the doc for `debug_fix_issue6528` for the format documentation. + """ + ui.write(_(b"loading report file '%s'\n") % from_report) + + with context(), open(from_report, mode='rb') as f: + for line in f.read().split(b'\n'): + if not line: + continue + filenodes, filename = line.split(b' ', 1) + fl = _filelog_from_filename(repo, filename) + to_fix = set( + fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',') + ) + excluded = set() + + for filerev in to_fix: + if _is_revision_affected(ui, fl, filerev, filename): + msg = b"found affected revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, filename)) + else: + msg = _(b"revision %s of file '%s' is not affected\n") + msg %= (binascii.hexlify(fl.node(filerev)), filename) + ui.warn(msg) + excluded.add(filerev) + + to_fix = to_fix - excluded + if not to_fix: + msg = _(b"no affected revisions were found for '%s'\n") + ui.write(msg % filename) + continue + if not dry_run: + _reorder_filelog_parents(repo, fl, sorted(to_fix)) + + +def repair_issue6528(ui, repo, dry_run=False, to_report=None, from_report=None): + from .. import store # avoid cycle + + @contextlib.contextmanager + def context(): + if dry_run or to_report: # No need for locking + yield + else: + with repo.wlock(), repo.lock(): + yield + + if from_report: + return _from_report(ui, repo, context, from_report, dry_run) + + report_entries = [] + + with context(): + files = list( + (file_type, path) + for (file_type, path, _e, _s) in repo.store.datafiles() + if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG + ) + + progress = ui.makeprogress( + _(b"looking for affected revisions"), + unit=_(b"filelogs"), + total=len(files), + ) + found_nothing = True + + for file_type, path in files: + if ( + not path.endswith(b'.i') + or not file_type & store.FILEFLAGS_FILELOG + ): + continue + progress.increment() + filename = _get_filename_from_filelog_index(path) + fl = _filelog_from_filename(repo, filename) + + # Set of filerevs (or hex filenodes if `to_report`) that need fixing + to_fix = set() + for filerev in fl.revs(): + # TODO speed up by looking at the start of the delta + # If it hasn't changed, it's not worth looking at the other revs + # in the same chain + affected = _is_revision_affected(ui, fl, filerev, path) + if affected: + msg = b"found affected revision %d for filelog '%s'\n" + ui.warn(msg % (filerev, path)) + found_nothing = False + if not dry_run: + if to_report: + to_fix.add(binascii.hexlify(fl.node(filerev))) + else: + to_fix.add(filerev) + + if to_fix: + to_fix = sorted(to_fix) + if to_report: + report_entries.append((filename, to_fix)) + else: + _reorder_filelog_parents(repo, fl, to_fix) + + if found_nothing: + ui.write(_(b"no affected revisions were found\n")) + + if to_report and report_entries: + with open(to_report, mode="wb") as f: + for path, to_fix in report_entries: + f.write(b"%s %s\n" % (b",".join(to_fix), path)) + + progress.complete()
--- a/tests/test-completion.t Mon Aug 09 19:49:57 2021 -0400 +++ b/tests/test-completion.t Tue Jul 27 21:45:27 2021 +0200 @@ -74,6 +74,7 @@ Show debug commands if there are no other candidates $ hg debugcomplete debug + debug-repair-issue6528 debugancestor debugantivirusrunning debugapplystreamclonebundle @@ -266,6 +267,7 @@ config: untrusted, exp-all-known, edit, local, source, shared, non-shared, global, template continue: dry-run copy: forget, after, at-rev, force, include, exclude, dry-run + debug-repair-issue6528: to-report, from-report, dry-run debugancestor: debugantivirusrunning: debugapplystreamclonebundle:
--- a/tests/test-help.t Mon Aug 09 19:49:57 2021 -0400 +++ b/tests/test-help.t Tue Jul 27 21:45:27 2021 +0200 @@ -975,6 +975,9 @@ $ hg help debug debug commands (internal and unsupported): + debug-repair-issue6528 + find affected revisions and repair them. See issue6528 for more + details. debugancestor find the ancestor revision of two revisions in a given index debugantivirusrunning
--- a/tests/test-issue6528.t Mon Aug 09 19:49:57 2021 -0400 +++ b/tests/test-issue6528.t Tue Jul 27 21:45:27 2021 +0200 @@ -3,7 +3,7 @@ =============================================================== Setup ------ +===== $ hg init base-repo $ cd base-repo @@ -93,7 +93,7 @@ Check the lack of corruption ----------------------------- +============================ $ hg clone --pull base-repo cloned requesting all changes @@ -166,3 +166,249 @@ date: Thu Jan 01 00:00:00 1970 +0000 summary: c_base_c - create a.txt + +Test the command that fixes the issue +===================================== + +Restore a broken repository with multiple broken revisions and a filename that +would get encoded to test the `report` options. +It's a tarball because unbundle might magically fix the issue later. + + $ cd .. + $ mkdir repo-to-fix + $ cd repo-to-fix +#if windows +tar interprets `:` in paths (like `C:`) as being remote, force local on Windows +only since some versions of tar don't have this flag. + + $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar +#else + $ tar xf $TESTDIR/bundles/issue6528.tar +#endif + +Check that the issue is present + $ hg st + M D.txt + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 2a8d3833f2fb 000000000000 + +Dry-run the fix + $ hg debug-repair-issue6528 --dry-run + found affected revision 1 for filelog 'data/D.txt.i' + found affected revision 1 for filelog 'data/b.txt.i' + found affected revision 3 for filelog 'data/b.txt.i' + $ hg st + M D.txt + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 2a8d3833f2fb 000000000000 + +Run the fix + $ hg debug-repair-issue6528 + found affected revision 1 for filelog 'data/D.txt.i' + repaired revision 1 of 'filelog data/D.txt.i' + found affected revision 1 for filelog 'data/b.txt.i' + found affected revision 3 for filelog 'data/b.txt.i' + repaired revision 1 of 'filelog data/b.txt.i' + repaired revision 3 of 'filelog data/b.txt.i' + +Check that the fix worked and that running it twice does nothing + $ hg st + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + $ hg debug-repair-issue6528 + no affected revisions were found + $ hg st + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + +Try the using the report options +-------------------------------- + + $ cd .. + $ mkdir repo-to-fix-report + $ cd repo-to-fix +#if windows +tar interprets `:` in paths (like `C:`) as being remote, force local on Windows +only since some versions of tar don't have this flag. + + $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar +#else + $ tar xf $TESTDIR/bundles/issue6528.tar +#endif + + $ hg debug-repair-issue6528 --to-report $TESTTMP/report.txt + found affected revision 1 for filelog 'data/D.txt.i' + found affected revision 1 for filelog 'data/b.txt.i' + found affected revision 3 for filelog 'data/b.txt.i' + $ cat $TESTTMP/report.txt + 2a80419dfc31d7dfb308ac40f3f138282de7d73b D.txt + a58b36ad6b6545195952793099613c2116f3563b,ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 b.txt + + $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt --dry-run + loading report file '$TESTTMP/report.txt' + found affected revision 1 for filelog 'D.txt' + found affected revision 1 for filelog 'b.txt' + found affected revision 3 for filelog 'b.txt' + $ hg st + M D.txt + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 2a8d3833f2fb 000000000000 + + $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt + loading report file '$TESTTMP/report.txt' + found affected revision 1 for filelog 'D.txt' + repaired revision 1 of 'filelog data/D.txt.i' + found affected revision 1 for filelog 'b.txt' + found affected revision 3 for filelog 'b.txt' + repaired revision 1 of 'filelog data/b.txt.i' + repaired revision 3 of 'filelog data/b.txt.i' + $ hg st + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + +Check that the revision is not "fixed" again + + $ hg debug-repair-issue6528 --from-report $TESTTMP/report.txt + loading report file '$TESTTMP/report.txt' + revision 2a80419dfc31d7dfb308ac40f3f138282de7d73b of file 'D.txt' is not affected + no affected revisions were found for 'D.txt' + revision a58b36ad6b6545195952793099613c2116f3563b of file 'b.txt' is not affected + revision ea4f2f2463cca5b29ddf3461012b8ce5c6dac175 of file 'b.txt' is not affected + no affected revisions were found for 'b.txt' + $ hg st + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + +Try it with a non-inline revlog +------------------------------- + + $ cd .. + $ mkdir $TESTTMP/ext + $ cat << EOF > $TESTTMP/ext/small_inline.py + > from mercurial import revlog + > revlog._maxinline = 8 + > EOF + + $ cat << EOF >> $HGRCPATH + > [extensions] + > small_inline=$TESTTMP/ext/small_inline.py + > EOF + + $ mkdir repo-to-fix-not-inline + $ cd repo-to-fix-not-inline +#if windows +tar interprets `:` in paths (like `C:`) as being remote, force local on Windows +only since some versions of tar don't have this flag. + + $ tar --force-local -xf $TESTDIR/bundles/issue6528.tar +#else + $ tar xf $TESTDIR/bundles/issue6528.tar +#endif + $ echo b >> b.txt + $ hg commit -qm "inline -> separate" + $ find .hg -name *b.txt.d + .hg/store/data/b.txt.d + +Status is correct, but the problem is still there, in the earlier revision + $ hg st + $ hg up 3 + 1 files updated, 0 files merged, 1 files removed, 0 files unresolved + $ hg st + M b.txt + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 05b806ebe5ea 000000000000 + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 216a5fe8b8ed 000000000000 + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 2a8d3833f2fb 000000000000 + 2 8 65aecc89bb5d 2a80419dfc31 000000000000 + +Run the fix on the non-inline revlog + $ hg debug-repair-issue6528 + found affected revision 1 for filelog 'data/D.txt.i' + repaired revision 1 of 'filelog data/D.txt.i' + found affected revision 1 for filelog 'data/b.txt.i' + found affected revision 3 for filelog 'data/b.txt.i' + repaired revision 1 of 'filelog data/b.txt.i' + repaired revision 3 of 'filelog data/b.txt.i' + +Check that it worked + $ hg debugrevlogindex b.txt + rev linkrev nodeid p1 p2 + 0 2 05b806ebe5ea 000000000000 000000000000 + 1 3 a58b36ad6b65 000000000000 05b806ebe5ea + 2 6 216a5fe8b8ed 000000000000 000000000000 + 3 7 ea4f2f2463cc 000000000000 216a5fe8b8ed + 4 8 db234885e2fe ea4f2f2463cc 000000000000 + $ hg debugrevlogindex D.txt + rev linkrev nodeid p1 p2 + 0 6 2a8d3833f2fb 000000000000 000000000000 + 1 7 2a80419dfc31 000000000000 2a8d3833f2fb + 2 8 65aecc89bb5d 2a80419dfc31 000000000000 + $ hg debug-repair-issue6528 + no affected revisions were found + $ hg st