revlog: move censoring code in a dedicated module
This code is quite specific and we are about to add more of it for revlog-v2
(and other derived version). So we move this code in a dedicated module in
`mercurial/revlogutils/`. This looks like a good fit.
The diff is huge because I used `hg copy` to create the new file so that we keep
the history of the censor code.
Differential Revision: https://phab.mercurial-scm.org/D10789
--- a/mercurial/revlog.py Sat May 29 00:11:32 2021 +0200
+++ b/mercurial/revlog.py Sat May 29 00:11:56 2021 +0200
@@ -80,6 +80,7 @@
util as interfaceutil,
)
from .revlogutils import (
+ censor,
deltas as deltautil,
docket as docketutil,
flagutil,
@@ -3232,88 +3233,15 @@
_(b'cannot censor with version %d revlogs')
% self._format_version
)
-
- censorrev = self.rev(censornode)
- tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
- if len(tombstone) > self.rawsize(censorrev):
- raise error.Abort(
- _(b'censor tombstone must be no longer than censored data')
+ elif self._format_version == REVLOGV1:
+ censor.v1_censor(self, tr, censornode, tombstone)
+ else:
+ # revlog v2
+ raise error.RevlogError(
+ _(b'cannot censor with version %d revlogs')
+ % self._format_version
)
- # Rewriting the revlog in place is hard. Our strategy for censoring is
- # to create a new revlog, copy all revisions to it, then replace the
- # revlogs on transaction close.
- #
- # This is a bit dangerous. We could easily have a mismatch of state.
- newrl = revlog(
- self.opener,
- target=self.target,
- radix=self.radix,
- postfix=b'tmpcensored',
- censorable=True,
- )
- newrl._format_version = self._format_version
- newrl._format_flags = self._format_flags
- newrl._generaldelta = self._generaldelta
- newrl._parse_index = self._parse_index
-
- for rev in self.revs():
- node = self.node(rev)
- p1, p2 = self.parents(node)
-
- if rev == censorrev:
- newrl.addrawrevision(
- tombstone,
- tr,
- self.linkrev(censorrev),
- p1,
- p2,
- censornode,
- REVIDX_ISCENSORED,
- )
-
- if newrl.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'censored revision stored as delta; '
- b'cannot censor'
- ),
- hint=_(
- b'censoring of revlogs is not '
- b'fully implemented; please report '
- b'this bug'
- ),
- )
- continue
-
- if self.iscensored(rev):
- if self.deltaparent(rev) != nullrev:
- raise error.Abort(
- _(
- b'cannot censor due to censored '
- b'revision having delta stored'
- )
- )
- rawtext = self._chunk(rev)
- else:
- rawtext = self.rawdata(rev)
-
- newrl.addrawrevision(
- rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
- )
-
- tr.addbackup(self._indexfile, location=b'store')
- if not self._inline:
- tr.addbackup(self._datafile, location=b'store')
-
- self.opener.rename(newrl._indexfile, self._indexfile)
- if not self._inline:
- self.opener.rename(newrl._datafile, self._datafile)
-
- self.clearcaches()
- self._loadindex()
-
def verifyintegrity(self, state):
"""Verifies the integrity of the revlog.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/revlogutils/censor.py Sat May 29 00:11:56 2021 +0200
@@ -0,0 +1,102 @@
+# censor code related to censoring revision
+#
+# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
+# Copyright 2015 Google, Inc <martinvonz@google.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from ..node import (
+ nullrev,
+)
+from ..i18n import _
+from .. import (
+ error,
+)
+from ..utils import (
+ storageutil,
+)
+from . import constants
+
+
+def v1_censor(rl, tr, censornode, tombstone=b''):
+ """censors a revision in a "version 1" revlog"""
+ assert rl._format_version == constants.REVLOGV1, rl._format_version
+
+ # avoid cycle
+ from .. import revlog
+
+ censorrev = rl.rev(censornode)
+ tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
+
+ if len(tombstone) > rl.rawsize(censorrev):
+ raise error.Abort(
+ _(b'censor tombstone must be no longer than censored data')
+ )
+
+ # Rewriting the revlog in place is hard. Our strategy for censoring is
+ # to create a new revlog, copy all revisions to it, then replace the
+ # revlogs on transaction close.
+ #
+ # This is a bit dangerous. We could easily have a mismatch of state.
+ newrl = revlog.revlog(
+ rl.opener,
+ target=rl.target,
+ radix=rl.radix,
+ postfix=b'tmpcensored',
+ censorable=True,
+ )
+ newrl._format_version = rl._format_version
+ newrl._format_flags = rl._format_flags
+ newrl._generaldelta = rl._generaldelta
+ newrl._parse_index = rl._parse_index
+
+ for rev in rl.revs():
+ node = rl.node(rev)
+ p1, p2 = rl.parents(node)
+
+ if rev == censorrev:
+ newrl.addrawrevision(
+ tombstone,
+ tr,
+ rl.linkrev(censorrev),
+ p1,
+ p2,
+ censornode,
+ constants.REVIDX_ISCENSORED,
+ )
+
+ if newrl.deltaparent(rev) != nullrev:
+ m = _(b'censored revision stored as delta; cannot censor')
+ h = _(
+ b'censoring of revlogs is not fully implemented;'
+ b' please report this bug'
+ )
+ raise error.Abort(m, hint=h)
+ continue
+
+ if rl.iscensored(rev):
+ if rl.deltaparent(rev) != nullrev:
+ m = _(
+ b'cannot censor due to censored '
+ b'revision having delta stored'
+ )
+ raise error.Abort(m)
+ rawtext = rl._chunk(rev)
+ else:
+ rawtext = rl.rawdata(rev)
+
+ newrl.addrawrevision(
+ rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
+ )
+
+ tr.addbackup(rl._indexfile, location=b'store')
+ if not rl._inline:
+ tr.addbackup(rl._datafile, location=b'store')
+
+ rl.opener.rename(newrl._indexfile, rl._indexfile)
+ if not rl._inline:
+ rl.opener.rename(newrl._datafile, rl._datafile)
+
+ rl.clearcaches()
+ rl._loadindex()