# HG changeset patch # User Pierre-Yves David # Date 1622239916 -7200 # Node ID 33d6269103745797207bcd101eb5fc2dbe3272a2 # Parent 65b86f516ba2618b49284d6f4ff4108b6f5dc1b9 revlog: move censoring code in a dedicated module This code is quite specific and we are about to add more of it for revlog-v2 (and other derived version). So we move this code in a dedicated module in `mercurial/revlogutils/`. This looks like a good fit. The diff is huge because I used `hg copy` to create the new file so that we keep the history of the censor code. Differential Revision: https://phab.mercurial-scm.org/D10789 diff -r 65b86f516ba2 -r 33d626910374 mercurial/revlog.py --- a/mercurial/revlog.py Sat May 29 00:11:32 2021 +0200 +++ b/mercurial/revlog.py Sat May 29 00:11:56 2021 +0200 @@ -80,6 +80,7 @@ util as interfaceutil, ) from .revlogutils import ( + censor, deltas as deltautil, docket as docketutil, flagutil, @@ -3232,88 +3233,15 @@ _(b'cannot censor with version %d revlogs') % self._format_version ) - - censorrev = self.rev(censornode) - tombstone = storageutil.packmeta({b'censored': tombstone}, b'') - - if len(tombstone) > self.rawsize(censorrev): - raise error.Abort( - _(b'censor tombstone must be no longer than censored data') + elif self._format_version == REVLOGV1: + censor.v1_censor(self, tr, censornode, tombstone) + else: + # revlog v2 + raise error.RevlogError( + _(b'cannot censor with version %d revlogs') + % self._format_version ) - # Rewriting the revlog in place is hard. Our strategy for censoring is - # to create a new revlog, copy all revisions to it, then replace the - # revlogs on transaction close. - # - # This is a bit dangerous. We could easily have a mismatch of state. - newrl = revlog( - self.opener, - target=self.target, - radix=self.radix, - postfix=b'tmpcensored', - censorable=True, - ) - newrl._format_version = self._format_version - newrl._format_flags = self._format_flags - newrl._generaldelta = self._generaldelta - newrl._parse_index = self._parse_index - - for rev in self.revs(): - node = self.node(rev) - p1, p2 = self.parents(node) - - if rev == censorrev: - newrl.addrawrevision( - tombstone, - tr, - self.linkrev(censorrev), - p1, - p2, - censornode, - REVIDX_ISCENSORED, - ) - - if newrl.deltaparent(rev) != nullrev: - raise error.Abort( - _( - b'censored revision stored as delta; ' - b'cannot censor' - ), - hint=_( - b'censoring of revlogs is not ' - b'fully implemented; please report ' - b'this bug' - ), - ) - continue - - if self.iscensored(rev): - if self.deltaparent(rev) != nullrev: - raise error.Abort( - _( - b'cannot censor due to censored ' - b'revision having delta stored' - ) - ) - rawtext = self._chunk(rev) - else: - rawtext = self.rawdata(rev) - - newrl.addrawrevision( - rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev) - ) - - tr.addbackup(self._indexfile, location=b'store') - if not self._inline: - tr.addbackup(self._datafile, location=b'store') - - self.opener.rename(newrl._indexfile, self._indexfile) - if not self._inline: - self.opener.rename(newrl._datafile, self._datafile) - - self.clearcaches() - self._loadindex() - def verifyintegrity(self, state): """Verifies the integrity of the revlog. diff -r 65b86f516ba2 -r 33d626910374 mercurial/revlogutils/censor.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/revlogutils/censor.py Sat May 29 00:11:56 2021 +0200 @@ -0,0 +1,102 @@ +# censor code related to censoring revision +# +# Copyright 2021 Pierre-Yves David +# Copyright 2015 Google, Inc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from ..node import ( + nullrev, +) +from ..i18n import _ +from .. import ( + error, +) +from ..utils import ( + storageutil, +) +from . import constants + + +def v1_censor(rl, tr, censornode, tombstone=b''): + """censors a revision in a "version 1" revlog""" + assert rl._format_version == constants.REVLOGV1, rl._format_version + + # avoid cycle + from .. import revlog + + censorrev = rl.rev(censornode) + tombstone = storageutil.packmeta({b'censored': tombstone}, b'') + + if len(tombstone) > rl.rawsize(censorrev): + raise error.Abort( + _(b'censor tombstone must be no longer than censored data') + ) + + # Rewriting the revlog in place is hard. Our strategy for censoring is + # to create a new revlog, copy all revisions to it, then replace the + # revlogs on transaction close. + # + # This is a bit dangerous. We could easily have a mismatch of state. + newrl = revlog.revlog( + rl.opener, + target=rl.target, + radix=rl.radix, + postfix=b'tmpcensored', + censorable=True, + ) + newrl._format_version = rl._format_version + newrl._format_flags = rl._format_flags + newrl._generaldelta = rl._generaldelta + newrl._parse_index = rl._parse_index + + for rev in rl.revs(): + node = rl.node(rev) + p1, p2 = rl.parents(node) + + if rev == censorrev: + newrl.addrawrevision( + tombstone, + tr, + rl.linkrev(censorrev), + p1, + p2, + censornode, + constants.REVIDX_ISCENSORED, + ) + + if newrl.deltaparent(rev) != nullrev: + m = _(b'censored revision stored as delta; cannot censor') + h = _( + b'censoring of revlogs is not fully implemented;' + b' please report this bug' + ) + raise error.Abort(m, hint=h) + continue + + if rl.iscensored(rev): + if rl.deltaparent(rev) != nullrev: + m = _( + b'cannot censor due to censored ' + b'revision having delta stored' + ) + raise error.Abort(m) + rawtext = rl._chunk(rev) + else: + rawtext = rl.rawdata(rev) + + newrl.addrawrevision( + rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev) + ) + + tr.addbackup(rl._indexfile, location=b'store') + if not rl._inline: + tr.addbackup(rl._datafile, location=b'store') + + rl.opener.rename(newrl._indexfile, rl._indexfile) + if not rl._inline: + rl.opener.rename(newrl._datafile, rl._datafile) + + rl.clearcaches() + rl._loadindex()