revlog: rewrite `censors.py` to `rewrite.py`
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 22 Jun 2021 11:09:25 +0200
changeset 47470 bc8536e09a20
parent 47469 5fa083a5ff04
child 47471 f7a94e2d4470
revlog: rewrite `censors.py` to `rewrite.py` The logic we use for censoring revision will be mostly common with the one we needs for stripping. So we rename the module to `rewrite` to better match its future content. Differential Revision: https://phab.mercurial-scm.org/D10896
mercurial/revlog.py
mercurial/revlogutils/censor.py
mercurial/revlogutils/rewrite.py
--- a/mercurial/revlog.py	Thu Jun 17 19:48:25 2021 +0200
+++ b/mercurial/revlog.py	Tue Jun 22 11:09:25 2021 +0200
@@ -81,13 +81,13 @@
     util as interfaceutil,
 )
 from .revlogutils import (
-    censor,
     deltas as deltautil,
     docket as docketutil,
     flagutil,
     nodemap as nodemaputil,
     randomaccessfile,
     revlogv0,
+    rewrite,
     sidedata as sidedatautil,
 )
 from .utils import (
@@ -3075,9 +3075,9 @@
                 % self._format_version
             )
         elif self._format_version == REVLOGV1:
-            censor.v1_censor(self, tr, censornode, tombstone)
+            rewrite.v1_censor(self, tr, censornode, tombstone)
         else:
-            censor.v2_censor(self, tr, censornode, tombstone)
+            rewrite.v2_censor(self, tr, censornode, tombstone)
 
     def verifyintegrity(self, state):
         """Verifies the integrity of the revlog.
--- a/mercurial/revlogutils/censor.py	Thu Jun 17 19:48:25 2021 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,359 +0,0 @@
-# censor code related to censoring revision
-# coding: utf8
-#
-# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
-# Copyright 2015 Google, Inc <martinvonz@google.com>
-#
-# This software may be used and distributed according to the terms of the
-# GNU General Public License version 2 or any later version.
-
-import contextlib
-import os
-
-from ..node import (
-    nullrev,
-)
-from .constants import (
-    COMP_MODE_PLAIN,
-    ENTRY_DATA_COMPRESSED_LENGTH,
-    ENTRY_DATA_COMPRESSION_MODE,
-    ENTRY_DATA_OFFSET,
-    ENTRY_DATA_UNCOMPRESSED_LENGTH,
-    ENTRY_DELTA_BASE,
-    ENTRY_LINK_REV,
-    ENTRY_NODE_ID,
-    ENTRY_PARENT_1,
-    ENTRY_PARENT_2,
-    ENTRY_SIDEDATA_COMPRESSED_LENGTH,
-    ENTRY_SIDEDATA_COMPRESSION_MODE,
-    ENTRY_SIDEDATA_OFFSET,
-    REVLOGV0,
-    REVLOGV1,
-)
-from ..i18n import _
-
-from .. import (
-    error,
-    pycompat,
-    revlogutils,
-    util,
-)
-from ..utils import (
-    storageutil,
-)
-from . import (
-    constants,
-    deltas,
-)
-
-
-def v1_censor(rl, tr, censornode, tombstone=b''):
-    """censors a revision in a "version 1" revlog"""
-    assert rl._format_version == constants.REVLOGV1, rl._format_version
-
-    # avoid cycle
-    from .. import revlog
-
-    censorrev = rl.rev(censornode)
-    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
-    # Rewriting the revlog in place is hard. Our strategy for censoring is
-    # to create a new revlog, copy all revisions to it, then replace the
-    # revlogs on transaction close.
-    #
-    # This is a bit dangerous. We could easily have a mismatch of state.
-    newrl = revlog.revlog(
-        rl.opener,
-        target=rl.target,
-        radix=rl.radix,
-        postfix=b'tmpcensored',
-        censorable=True,
-    )
-    newrl._format_version = rl._format_version
-    newrl._format_flags = rl._format_flags
-    newrl._generaldelta = rl._generaldelta
-    newrl._parse_index = rl._parse_index
-
-    for rev in rl.revs():
-        node = rl.node(rev)
-        p1, p2 = rl.parents(node)
-
-        if rev == censorrev:
-            newrl.addrawrevision(
-                tombstone,
-                tr,
-                rl.linkrev(censorrev),
-                p1,
-                p2,
-                censornode,
-                constants.REVIDX_ISCENSORED,
-            )
-
-            if newrl.deltaparent(rev) != nullrev:
-                m = _(b'censored revision stored as delta; cannot censor')
-                h = _(
-                    b'censoring of revlogs is not fully implemented;'
-                    b' please report this bug'
-                )
-                raise error.Abort(m, hint=h)
-            continue
-
-        if rl.iscensored(rev):
-            if rl.deltaparent(rev) != nullrev:
-                m = _(
-                    b'cannot censor due to censored '
-                    b'revision having delta stored'
-                )
-                raise error.Abort(m)
-            rawtext = rl._chunk(rev)
-        else:
-            rawtext = rl.rawdata(rev)
-
-        newrl.addrawrevision(
-            rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
-        )
-
-    tr.addbackup(rl._indexfile, location=b'store')
-    if not rl._inline:
-        tr.addbackup(rl._datafile, location=b'store')
-
-    rl.opener.rename(newrl._indexfile, rl._indexfile)
-    if not rl._inline:
-        rl.opener.rename(newrl._datafile, rl._datafile)
-
-    rl.clearcaches()
-    rl._loadindex()
-
-
-def v2_censor(rl, tr, censornode, tombstone=b''):
-    """censors a revision in a "version 2" revlog"""
-    # General principle
-    #
-    # We create new revlog files (index/data/sidedata) to copy the content of
-    # the existing data without the censored data.
-    #
-    # We need to recompute new delta for any revision that used the censored
-    # revision as delta base. As the cumulative size of the new delta may be
-    # large, we store them in a temporary file until they are stored in their
-    # final destination.
-    #
-    # All data before the censored data can be blindly copied. The rest needs
-    # to be copied as we go and the associated index entry needs adjustement.
-
-    assert rl._format_version != REVLOGV0, rl._format_version
-    assert rl._format_version != REVLOGV1, rl._format_version
-
-    old_index = rl.index
-    docket = rl._docket
-
-    censor_rev = rl.rev(censornode)
-    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
-
-    censored_entry = rl.index[censor_rev]
-    index_cutoff = rl.index.entry_size * censor_rev
-    data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16
-    sidedata_cutoff = rl.sidedata_cut_off(censor_rev)
-
-    # rev → (new_base, data_start, data_end)
-    rewritten_entries = {}
-
-    dc = deltas.deltacomputer(rl)
-    excl = [censor_rev]
-
-    with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
-        with rl._segmentfile._open_read() as dfh:
-            for rev in range(censor_rev + 1, len(old_index)):
-                entry = old_index[rev]
-                if censor_rev != entry[ENTRY_DELTA_BASE]:
-                    continue
-                # This is a revision that use the censored revision as the base
-                # for its delta. We need a need new deltas
-                if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
-                    # this revision is empty, we can delta against nullrev
-                    rewritten_entries[rev] = (nullrev, 0, 0)
-                else:
-
-                    text = rl.rawdata(rev, _df=dfh)
-                    info = revlogutils.revisioninfo(
-                        node=entry[ENTRY_NODE_ID],
-                        p1=rl.node(entry[ENTRY_PARENT_1]),
-                        p2=rl.node(entry[ENTRY_PARENT_2]),
-                        btext=[text],
-                        textlen=len(text),
-                        cachedelta=None,
-                        flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
-                    )
-                    d = dc.finddeltainfo(
-                        info, dfh, excluded_bases=excl, target_rev=rev
-                    )
-                    default_comp = rl._docket.default_compression_header
-                    comp_mode, d = deltas.delta_compression(default_comp, d)
-                    # using `tell` is a bit lazy, but we are not here for speed
-                    start = tmp_storage.tell()
-                    tmp_storage.write(d.data[1])
-                    end = tmp_storage.tell()
-                    rewritten_entries[rev] = (d.base, start, end, comp_mode)
-
-        old_index_filepath = rl.opener.join(docket.index_filepath())
-        old_data_filepath = rl.opener.join(docket.data_filepath())
-        old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath())
-
-        new_index_filepath = rl.opener.join(docket.new_index_file())
-        new_data_filepath = rl.opener.join(docket.new_data_file())
-        new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file())
-
-        util.copyfile(
-            old_index_filepath, new_index_filepath, nb_bytes=index_cutoff
-        )
-        util.copyfile(
-            old_data_filepath, new_data_filepath, nb_bytes=data_cutoff
-        )
-        util.copyfile(
-            old_sidedata_filepath,
-            new_sidedata_filepath,
-            nb_bytes=sidedata_cutoff,
-        )
-        rl.opener.register_file(docket.index_filepath())
-        rl.opener.register_file(docket.data_filepath())
-        rl.opener.register_file(docket.sidedata_filepath())
-
-        docket.index_end = index_cutoff
-        docket.data_end = data_cutoff
-        docket.sidedata_end = sidedata_cutoff
-
-        # reload the revlog internal information
-        rl.clearcaches()
-        rl._loadindex(docket=docket)
-
-        @contextlib.contextmanager
-        def all_files():
-            # hide opening in an helper function to please check-code, black
-            # and various python ersion at the same time
-            with open(old_data_filepath, 'rb') as old_data_file:
-                with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
-                    with open(new_index_filepath, 'r+b') as new_index_file:
-                        with open(new_data_filepath, 'r+b') as new_data_file:
-                            with open(
-                                new_sidedata_filepath, 'r+b'
-                            ) as new_sidedata_file:
-                                yield (
-                                    old_data_file,
-                                    old_sidedata_file,
-                                    new_index_file,
-                                    new_data_file,
-                                    new_sidedata_file,
-                                )
-
-        # we dont need to open the old index file since its content already
-        # exist in a usable form in `old_index`.
-        with all_files() as (
-            old_data_file,
-            old_sidedata_file,
-            new_index_file,
-            new_data_file,
-            new_sidedata_file,
-        ):
-            new_index_file.seek(0, os.SEEK_END)
-            assert new_index_file.tell() == index_cutoff
-            new_data_file.seek(0, os.SEEK_END)
-            assert new_data_file.tell() == data_cutoff
-            new_sidedata_file.seek(0, os.SEEK_END)
-            assert new_sidedata_file.tell() == sidedata_cutoff
-
-            ### writing the censored revision
-            entry = old_index[censor_rev]
-
-            # XXX consider trying the default compression too
-            new_data_size = len(tombstone)
-            new_data_offset = new_data_file.tell()
-            new_data_file.write(tombstone)
-
-            # we are not adding any sidedata as they might leak info about the censored version
-
-            new_entry = revlogutils.entry(
-                flags=constants.REVIDX_ISCENSORED,
-                data_offset=new_data_offset,
-                data_compressed_length=new_data_size,
-                data_uncompressed_length=new_data_size,
-                data_delta_base=censor_rev,
-                link_rev=entry[ENTRY_LINK_REV],
-                parent_rev_1=entry[ENTRY_PARENT_1],
-                parent_rev_2=entry[ENTRY_PARENT_2],
-                node_id=entry[ENTRY_NODE_ID],
-                sidedata_offset=0,
-                sidedata_compressed_length=0,
-                data_compression_mode=COMP_MODE_PLAIN,
-                sidedata_compression_mode=COMP_MODE_PLAIN,
-            )
-            rl.index.append(new_entry)
-            entry_bin = rl.index.entry_binary(censor_rev)
-            new_index_file.write(entry_bin)
-            docket.index_end = new_index_file.tell()
-            docket.data_end = new_data_file.tell()
-
-            #### Writing all subsequent revisions
-            for rev in range(censor_rev + 1, len(old_index)):
-                entry = old_index[rev]
-                flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
-                old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
-
-                if rev not in rewritten_entries:
-                    old_data_file.seek(old_data_offset)
-                    new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
-                    new_data = old_data_file.read(new_data_size)
-                    data_delta_base = entry[ENTRY_DELTA_BASE]
-                    d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
-                else:
-                    (
-                        data_delta_base,
-                        start,
-                        end,
-                        d_comp_mode,
-                    ) = rewritten_entries[rev]
-                    new_data_size = end - start
-                    tmp_storage.seek(start)
-                    new_data = tmp_storage.read(new_data_size)
-
-                # It might be faster to group continuous read/write operation,
-                # however, this is censor, an operation that is not focussed
-                # around stellar performance. So I have not written this
-                # optimisation yet.
-                new_data_offset = new_data_file.tell()
-                new_data_file.write(new_data)
-
-                sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
-                new_sidedata_offset = new_sidedata_file.tell()
-                if 0 < sidedata_size:
-                    old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
-                    old_sidedata_file.seek(old_sidedata_offset)
-                    new_sidedata = old_sidedata_file.read(sidedata_size)
-                    new_sidedata_file.write(new_sidedata)
-
-                data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
-                sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
-                assert data_delta_base <= rev, (data_delta_base, rev)
-
-                new_entry = revlogutils.entry(
-                    flags=flags,
-                    data_offset=new_data_offset,
-                    data_compressed_length=new_data_size,
-                    data_uncompressed_length=data_uncompressed_length,
-                    data_delta_base=data_delta_base,
-                    link_rev=entry[ENTRY_LINK_REV],
-                    parent_rev_1=entry[ENTRY_PARENT_1],
-                    parent_rev_2=entry[ENTRY_PARENT_2],
-                    node_id=entry[ENTRY_NODE_ID],
-                    sidedata_offset=new_sidedata_offset,
-                    sidedata_compressed_length=sidedata_size,
-                    data_compression_mode=d_comp_mode,
-                    sidedata_compression_mode=sd_com_mode,
-                )
-                rl.index.append(new_entry)
-                entry_bin = rl.index.entry_binary(rev)
-                new_index_file.write(entry_bin)
-
-                docket.index_end = new_index_file.tell()
-                docket.data_end = new_data_file.tell()
-                docket.sidedata_end = new_sidedata_file.tell()
-
-    docket.write(transaction=None, stripping=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/revlogutils/rewrite.py	Tue Jun 22 11:09:25 2021 +0200
@@ -0,0 +1,359 @@
+# censor code related to censoring revision
+# coding: utf8
+#
+# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
+# Copyright 2015 Google, Inc <martinvonz@google.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+import contextlib
+import os
+
+from ..node import (
+    nullrev,
+)
+from .constants import (
+    COMP_MODE_PLAIN,
+    ENTRY_DATA_COMPRESSED_LENGTH,
+    ENTRY_DATA_COMPRESSION_MODE,
+    ENTRY_DATA_OFFSET,
+    ENTRY_DATA_UNCOMPRESSED_LENGTH,
+    ENTRY_DELTA_BASE,
+    ENTRY_LINK_REV,
+    ENTRY_NODE_ID,
+    ENTRY_PARENT_1,
+    ENTRY_PARENT_2,
+    ENTRY_SIDEDATA_COMPRESSED_LENGTH,
+    ENTRY_SIDEDATA_COMPRESSION_MODE,
+    ENTRY_SIDEDATA_OFFSET,
+    REVLOGV0,
+    REVLOGV1,
+)
+from ..i18n import _
+
+from .. import (
+    error,
+    pycompat,
+    revlogutils,
+    util,
+)
+from ..utils import (
+    storageutil,
+)
+from . import (
+    constants,
+    deltas,
+)
+
+
+def v1_censor(rl, tr, censornode, tombstone=b''):
+    """censors a revision in a "version 1" revlog"""
+    assert rl._format_version == constants.REVLOGV1, rl._format_version
+
+    # avoid cycle
+    from .. import revlog
+
+    censorrev = rl.rev(censornode)
+    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
+
+    # Rewriting the revlog in place is hard. Our strategy for censoring is
+    # to create a new revlog, copy all revisions to it, then replace the
+    # revlogs on transaction close.
+    #
+    # This is a bit dangerous. We could easily have a mismatch of state.
+    newrl = revlog.revlog(
+        rl.opener,
+        target=rl.target,
+        radix=rl.radix,
+        postfix=b'tmpcensored',
+        censorable=True,
+    )
+    newrl._format_version = rl._format_version
+    newrl._format_flags = rl._format_flags
+    newrl._generaldelta = rl._generaldelta
+    newrl._parse_index = rl._parse_index
+
+    for rev in rl.revs():
+        node = rl.node(rev)
+        p1, p2 = rl.parents(node)
+
+        if rev == censorrev:
+            newrl.addrawrevision(
+                tombstone,
+                tr,
+                rl.linkrev(censorrev),
+                p1,
+                p2,
+                censornode,
+                constants.REVIDX_ISCENSORED,
+            )
+
+            if newrl.deltaparent(rev) != nullrev:
+                m = _(b'censored revision stored as delta; cannot censor')
+                h = _(
+                    b'censoring of revlogs is not fully implemented;'
+                    b' please report this bug'
+                )
+                raise error.Abort(m, hint=h)
+            continue
+
+        if rl.iscensored(rev):
+            if rl.deltaparent(rev) != nullrev:
+                m = _(
+                    b'cannot censor due to censored '
+                    b'revision having delta stored'
+                )
+                raise error.Abort(m)
+            rawtext = rl._chunk(rev)
+        else:
+            rawtext = rl.rawdata(rev)
+
+        newrl.addrawrevision(
+            rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
+        )
+
+    tr.addbackup(rl._indexfile, location=b'store')
+    if not rl._inline:
+        tr.addbackup(rl._datafile, location=b'store')
+
+    rl.opener.rename(newrl._indexfile, rl._indexfile)
+    if not rl._inline:
+        rl.opener.rename(newrl._datafile, rl._datafile)
+
+    rl.clearcaches()
+    rl._loadindex()
+
+
+def v2_censor(rl, tr, censornode, tombstone=b''):
+    """censors a revision in a "version 2" revlog"""
+    # General principle
+    #
+    # We create new revlog files (index/data/sidedata) to copy the content of
+    # the existing data without the censored data.
+    #
+    # We need to recompute new delta for any revision that used the censored
+    # revision as delta base. As the cumulative size of the new delta may be
+    # large, we store them in a temporary file until they are stored in their
+    # final destination.
+    #
+    # All data before the censored data can be blindly copied. The rest needs
+    # to be copied as we go and the associated index entry needs adjustement.
+
+    assert rl._format_version != REVLOGV0, rl._format_version
+    assert rl._format_version != REVLOGV1, rl._format_version
+
+    old_index = rl.index
+    docket = rl._docket
+
+    censor_rev = rl.rev(censornode)
+    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
+
+    censored_entry = rl.index[censor_rev]
+    index_cutoff = rl.index.entry_size * censor_rev
+    data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16
+    sidedata_cutoff = rl.sidedata_cut_off(censor_rev)
+
+    # rev → (new_base, data_start, data_end)
+    rewritten_entries = {}
+
+    dc = deltas.deltacomputer(rl)
+    excl = [censor_rev]
+
+    with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
+        with rl._segmentfile._open_read() as dfh:
+            for rev in range(censor_rev + 1, len(old_index)):
+                entry = old_index[rev]
+                if censor_rev != entry[ENTRY_DELTA_BASE]:
+                    continue
+                # This is a revision that use the censored revision as the base
+                # for its delta. We need a need new deltas
+                if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
+                    # this revision is empty, we can delta against nullrev
+                    rewritten_entries[rev] = (nullrev, 0, 0)
+                else:
+
+                    text = rl.rawdata(rev, _df=dfh)
+                    info = revlogutils.revisioninfo(
+                        node=entry[ENTRY_NODE_ID],
+                        p1=rl.node(entry[ENTRY_PARENT_1]),
+                        p2=rl.node(entry[ENTRY_PARENT_2]),
+                        btext=[text],
+                        textlen=len(text),
+                        cachedelta=None,
+                        flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
+                    )
+                    d = dc.finddeltainfo(
+                        info, dfh, excluded_bases=excl, target_rev=rev
+                    )
+                    default_comp = rl._docket.default_compression_header
+                    comp_mode, d = deltas.delta_compression(default_comp, d)
+                    # using `tell` is a bit lazy, but we are not here for speed
+                    start = tmp_storage.tell()
+                    tmp_storage.write(d.data[1])
+                    end = tmp_storage.tell()
+                    rewritten_entries[rev] = (d.base, start, end, comp_mode)
+
+        old_index_filepath = rl.opener.join(docket.index_filepath())
+        old_data_filepath = rl.opener.join(docket.data_filepath())
+        old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath())
+
+        new_index_filepath = rl.opener.join(docket.new_index_file())
+        new_data_filepath = rl.opener.join(docket.new_data_file())
+        new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file())
+
+        util.copyfile(
+            old_index_filepath, new_index_filepath, nb_bytes=index_cutoff
+        )
+        util.copyfile(
+            old_data_filepath, new_data_filepath, nb_bytes=data_cutoff
+        )
+        util.copyfile(
+            old_sidedata_filepath,
+            new_sidedata_filepath,
+            nb_bytes=sidedata_cutoff,
+        )
+        rl.opener.register_file(docket.index_filepath())
+        rl.opener.register_file(docket.data_filepath())
+        rl.opener.register_file(docket.sidedata_filepath())
+
+        docket.index_end = index_cutoff
+        docket.data_end = data_cutoff
+        docket.sidedata_end = sidedata_cutoff
+
+        # reload the revlog internal information
+        rl.clearcaches()
+        rl._loadindex(docket=docket)
+
+        @contextlib.contextmanager
+        def all_files():
+            # hide opening in an helper function to please check-code, black
+            # and various python ersion at the same time
+            with open(old_data_filepath, 'rb') as old_data_file:
+                with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
+                    with open(new_index_filepath, 'r+b') as new_index_file:
+                        with open(new_data_filepath, 'r+b') as new_data_file:
+                            with open(
+                                new_sidedata_filepath, 'r+b'
+                            ) as new_sidedata_file:
+                                yield (
+                                    old_data_file,
+                                    old_sidedata_file,
+                                    new_index_file,
+                                    new_data_file,
+                                    new_sidedata_file,
+                                )
+
+        # we dont need to open the old index file since its content already
+        # exist in a usable form in `old_index`.
+        with all_files() as (
+            old_data_file,
+            old_sidedata_file,
+            new_index_file,
+            new_data_file,
+            new_sidedata_file,
+        ):
+            new_index_file.seek(0, os.SEEK_END)
+            assert new_index_file.tell() == index_cutoff
+            new_data_file.seek(0, os.SEEK_END)
+            assert new_data_file.tell() == data_cutoff
+            new_sidedata_file.seek(0, os.SEEK_END)
+            assert new_sidedata_file.tell() == sidedata_cutoff
+
+            ### writing the censored revision
+            entry = old_index[censor_rev]
+
+            # XXX consider trying the default compression too
+            new_data_size = len(tombstone)
+            new_data_offset = new_data_file.tell()
+            new_data_file.write(tombstone)
+
+            # we are not adding any sidedata as they might leak info about the censored version
+
+            new_entry = revlogutils.entry(
+                flags=constants.REVIDX_ISCENSORED,
+                data_offset=new_data_offset,
+                data_compressed_length=new_data_size,
+                data_uncompressed_length=new_data_size,
+                data_delta_base=censor_rev,
+                link_rev=entry[ENTRY_LINK_REV],
+                parent_rev_1=entry[ENTRY_PARENT_1],
+                parent_rev_2=entry[ENTRY_PARENT_2],
+                node_id=entry[ENTRY_NODE_ID],
+                sidedata_offset=0,
+                sidedata_compressed_length=0,
+                data_compression_mode=COMP_MODE_PLAIN,
+                sidedata_compression_mode=COMP_MODE_PLAIN,
+            )
+            rl.index.append(new_entry)
+            entry_bin = rl.index.entry_binary(censor_rev)
+            new_index_file.write(entry_bin)
+            docket.index_end = new_index_file.tell()
+            docket.data_end = new_data_file.tell()
+
+            #### Writing all subsequent revisions
+            for rev in range(censor_rev + 1, len(old_index)):
+                entry = old_index[rev]
+                flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
+                old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
+
+                if rev not in rewritten_entries:
+                    old_data_file.seek(old_data_offset)
+                    new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
+                    new_data = old_data_file.read(new_data_size)
+                    data_delta_base = entry[ENTRY_DELTA_BASE]
+                    d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
+                else:
+                    (
+                        data_delta_base,
+                        start,
+                        end,
+                        d_comp_mode,
+                    ) = rewritten_entries[rev]
+                    new_data_size = end - start
+                    tmp_storage.seek(start)
+                    new_data = tmp_storage.read(new_data_size)
+
+                # It might be faster to group continuous read/write operation,
+                # however, this is censor, an operation that is not focussed
+                # around stellar performance. So I have not written this
+                # optimisation yet.
+                new_data_offset = new_data_file.tell()
+                new_data_file.write(new_data)
+
+                sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
+                new_sidedata_offset = new_sidedata_file.tell()
+                if 0 < sidedata_size:
+                    old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
+                    old_sidedata_file.seek(old_sidedata_offset)
+                    new_sidedata = old_sidedata_file.read(sidedata_size)
+                    new_sidedata_file.write(new_sidedata)
+
+                data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
+                sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
+                assert data_delta_base <= rev, (data_delta_base, rev)
+
+                new_entry = revlogutils.entry(
+                    flags=flags,
+                    data_offset=new_data_offset,
+                    data_compressed_length=new_data_size,
+                    data_uncompressed_length=data_uncompressed_length,
+                    data_delta_base=data_delta_base,
+                    link_rev=entry[ENTRY_LINK_REV],
+                    parent_rev_1=entry[ENTRY_PARENT_1],
+                    parent_rev_2=entry[ENTRY_PARENT_2],
+                    node_id=entry[ENTRY_NODE_ID],
+                    sidedata_offset=new_sidedata_offset,
+                    sidedata_compressed_length=sidedata_size,
+                    data_compression_mode=d_comp_mode,
+                    sidedata_compression_mode=sd_com_mode,
+                )
+                rl.index.append(new_entry)
+                entry_bin = rl.index.entry_binary(rev)
+                new_index_file.write(entry_bin)
+
+                docket.index_end = new_index_file.tell()
+                docket.data_end = new_data_file.tell()
+                docket.sidedata_end = new_sidedata_file.tell()
+
+    docket.write(transaction=None, stripping=True)