changeset 47391:33d626910374

revlog: move censoring code in a dedicated module This code is quite specific and we are about to add more of it for revlog-v2 (and other derived version). So we move this code in a dedicated module in `mercurial/revlogutils/`. This looks like a good fit. The diff is huge because I used `hg copy` to create the new file so that we keep the history of the censor code. Differential Revision: https://phab.mercurial-scm.org/D10789
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 29 May 2021 00:11:56 +0200
parents 65b86f516ba2
children 8089d0fa8400
files mercurial/revlog.py mercurial/revlogutils/censor.py
diffstat 2 files changed, 110 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlog.py	Sat May 29 00:11:32 2021 +0200
+++ b/mercurial/revlog.py	Sat May 29 00:11:56 2021 +0200
@@ -80,6 +80,7 @@
     util as interfaceutil,
 )
 from .revlogutils import (
+    censor,
     deltas as deltautil,
     docket as docketutil,
     flagutil,
@@ -3232,88 +3233,15 @@
                 _(b'cannot censor with version %d revlogs')
                 % self._format_version
             )
-
-        censorrev = self.rev(censornode)
-        tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
-        if len(tombstone) > self.rawsize(censorrev):
-            raise error.Abort(
-                _(b'censor tombstone must be no longer than censored data')
+        elif self._format_version == REVLOGV1:
+            censor.v1_censor(self, tr, censornode, tombstone)
+        else:
+            # revlog v2
+            raise error.RevlogError(
+                _(b'cannot censor with version %d revlogs')
+                % self._format_version
             )
 
-        # Rewriting the revlog in place is hard. Our strategy for censoring is
-        # to create a new revlog, copy all revisions to it, then replace the
-        # revlogs on transaction close.
-        #
-        # This is a bit dangerous. We could easily have a mismatch of state.
-        newrl = revlog(
-            self.opener,
-            target=self.target,
-            radix=self.radix,
-            postfix=b'tmpcensored',
-            censorable=True,
-        )
-        newrl._format_version = self._format_version
-        newrl._format_flags = self._format_flags
-        newrl._generaldelta = self._generaldelta
-        newrl._parse_index = self._parse_index
-
-        for rev in self.revs():
-            node = self.node(rev)
-            p1, p2 = self.parents(node)
-
-            if rev == censorrev:
-                newrl.addrawrevision(
-                    tombstone,
-                    tr,
-                    self.linkrev(censorrev),
-                    p1,
-                    p2,
-                    censornode,
-                    REVIDX_ISCENSORED,
-                )
-
-                if newrl.deltaparent(rev) != nullrev:
-                    raise error.Abort(
-                        _(
-                            b'censored revision stored as delta; '
-                            b'cannot censor'
-                        ),
-                        hint=_(
-                            b'censoring of revlogs is not '
-                            b'fully implemented; please report '
-                            b'this bug'
-                        ),
-                    )
-                continue
-
-            if self.iscensored(rev):
-                if self.deltaparent(rev) != nullrev:
-                    raise error.Abort(
-                        _(
-                            b'cannot censor due to censored '
-                            b'revision having delta stored'
-                        )
-                    )
-                rawtext = self._chunk(rev)
-            else:
-                rawtext = self.rawdata(rev)
-
-            newrl.addrawrevision(
-                rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
-            )
-
-        tr.addbackup(self._indexfile, location=b'store')
-        if not self._inline:
-            tr.addbackup(self._datafile, location=b'store')
-
-        self.opener.rename(newrl._indexfile, self._indexfile)
-        if not self._inline:
-            self.opener.rename(newrl._datafile, self._datafile)
-
-        self.clearcaches()
-        self._loadindex()
-
     def verifyintegrity(self, state):
         """Verifies the integrity of the revlog.
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/revlogutils/censor.py	Sat May 29 00:11:56 2021 +0200
@@ -0,0 +1,102 @@
+# censor code related to censoring revision
+#
+# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
+# Copyright 2015 Google, Inc <martinvonz@google.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from ..node import (
+    nullrev,
+)
+from ..i18n import _
+from .. import (
+    error,
+)
+from ..utils import (
+    storageutil,
+)
+from . import constants
+
+
+def v1_censor(rl, tr, censornode, tombstone=b''):
+    """censors a revision in a "version 1" revlog"""
+    assert rl._format_version == constants.REVLOGV1, rl._format_version
+
+    # avoid cycle
+    from .. import revlog
+
+    censorrev = rl.rev(censornode)
+    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
+
+    if len(tombstone) > rl.rawsize(censorrev):
+        raise error.Abort(
+            _(b'censor tombstone must be no longer than censored data')
+        )
+
+    # Rewriting the revlog in place is hard. Our strategy for censoring is
+    # to create a new revlog, copy all revisions to it, then replace the
+    # revlogs on transaction close.
+    #
+    # This is a bit dangerous. We could easily have a mismatch of state.
+    newrl = revlog.revlog(
+        rl.opener,
+        target=rl.target,
+        radix=rl.radix,
+        postfix=b'tmpcensored',
+        censorable=True,
+    )
+    newrl._format_version = rl._format_version
+    newrl._format_flags = rl._format_flags
+    newrl._generaldelta = rl._generaldelta
+    newrl._parse_index = rl._parse_index
+
+    for rev in rl.revs():
+        node = rl.node(rev)
+        p1, p2 = rl.parents(node)
+
+        if rev == censorrev:
+            newrl.addrawrevision(
+                tombstone,
+                tr,
+                rl.linkrev(censorrev),
+                p1,
+                p2,
+                censornode,
+                constants.REVIDX_ISCENSORED,
+            )
+
+            if newrl.deltaparent(rev) != nullrev:
+                m = _(b'censored revision stored as delta; cannot censor')
+                h = _(
+                    b'censoring of revlogs is not fully implemented;'
+                    b' please report this bug'
+                )
+                raise error.Abort(m, hint=h)
+            continue
+
+        if rl.iscensored(rev):
+            if rl.deltaparent(rev) != nullrev:
+                m = _(
+                    b'cannot censor due to censored '
+                    b'revision having delta stored'
+                )
+                raise error.Abort(m)
+            rawtext = rl._chunk(rev)
+        else:
+            rawtext = rl.rawdata(rev)
+
+        newrl.addrawrevision(
+            rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
+        )
+
+    tr.addbackup(rl._indexfile, location=b'store')
+    if not rl._inline:
+        tr.addbackup(rl._datafile, location=b'store')
+
+    rl.opener.rename(newrl._indexfile, rl._indexfile)
+    if not rl._inline:
+        rl.opener.rename(newrl._datafile, rl._datafile)
+
+    rl.clearcaches()
+    rl._loadindex()