changeset 47468:9b70aa7bcbab

censor: extract the part about writing the other revision in a function The v2_censor function is huge, now that its content has settled a bit it is a good time to split individual part inside dedicated function. We continue with a small function that add a non-censored revision back to the revlog. Differential Revision: https://phab.mercurial-scm.org/D10899
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 22 Jun 2021 21:49:31 +0200
parents 3ab267f0cbe4
children 60c48458ee6c
files mercurial/revlogutils/rewrite.py
diffstat 1 files changed, 84 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlogutils/rewrite.py	Tue Jun 22 21:41:33 2021 +0200
+++ b/mercurial/revlogutils/rewrite.py	Tue Jun 22 21:49:31 2021 +0200
@@ -270,72 +270,97 @@
                 tombstone,
             )
 
-            #### Writing all subsequent revisions
+            # Writing all subsequent revisions
             for rev in range(censor_rev + 1, len(old_index)):
-                entry = old_index[rev]
-                flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
-                old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
+                _rewrite_simple(
+                    rl,
+                    old_index,
+                    open_files,
+                    rev,
+                    rewritten_entries,
+                    tmp_storage,
+                )
+    docket.write(transaction=None, stripping=True)
+
 
-                if rev not in rewritten_entries:
-                    old_data_file.seek(old_data_offset)
-                    new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
-                    new_data = old_data_file.read(new_data_size)
-                    data_delta_base = entry[ENTRY_DELTA_BASE]
-                    d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
-                else:
-                    (
-                        data_delta_base,
-                        start,
-                        end,
-                        d_comp_mode,
-                    ) = rewritten_entries[rev]
-                    new_data_size = end - start
-                    tmp_storage.seek(start)
-                    new_data = tmp_storage.read(new_data_size)
-
-                # It might be faster to group continuous read/write operation,
-                # however, this is censor, an operation that is not focussed
-                # around stellar performance. So I have not written this
-                # optimisation yet.
-                new_data_offset = new_data_file.tell()
-                new_data_file.write(new_data)
+def _rewrite_simple(
+    revlog,
+    old_index,
+    all_files,
+    rev,
+    rewritten_entries,
+    tmp_storage,
+):
+    """append a normal revision to the index after the rewritten one(s)"""
+    (
+        old_data_file,
+        old_sidedata_file,
+        new_index_file,
+        new_data_file,
+        new_sidedata_file,
+    ) = all_files
+    entry = old_index[rev]
+    flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
+    old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
 
-                sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
-                new_sidedata_offset = new_sidedata_file.tell()
-                if 0 < sidedata_size:
-                    old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
-                    old_sidedata_file.seek(old_sidedata_offset)
-                    new_sidedata = old_sidedata_file.read(sidedata_size)
-                    new_sidedata_file.write(new_sidedata)
+    if rev not in rewritten_entries:
+        old_data_file.seek(old_data_offset)
+        new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
+        new_data = old_data_file.read(new_data_size)
+        data_delta_base = entry[ENTRY_DELTA_BASE]
+        d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
+    else:
+        (
+            data_delta_base,
+            start,
+            end,
+            d_comp_mode,
+        ) = rewritten_entries[rev]
+        new_data_size = end - start
+        tmp_storage.seek(start)
+        new_data = tmp_storage.read(new_data_size)
 
-                data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
-                sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
-                assert data_delta_base <= rev, (data_delta_base, rev)
+    # It might be faster to group continuous read/write operation,
+    # however, this is censor, an operation that is not focussed
+    # around stellar performance. So I have not written this
+    # optimisation yet.
+    new_data_offset = new_data_file.tell()
+    new_data_file.write(new_data)
 
-                new_entry = revlogutils.entry(
-                    flags=flags,
-                    data_offset=new_data_offset,
-                    data_compressed_length=new_data_size,
-                    data_uncompressed_length=data_uncompressed_length,
-                    data_delta_base=data_delta_base,
-                    link_rev=entry[ENTRY_LINK_REV],
-                    parent_rev_1=entry[ENTRY_PARENT_1],
-                    parent_rev_2=entry[ENTRY_PARENT_2],
-                    node_id=entry[ENTRY_NODE_ID],
-                    sidedata_offset=new_sidedata_offset,
-                    sidedata_compressed_length=sidedata_size,
-                    data_compression_mode=d_comp_mode,
-                    sidedata_compression_mode=sd_com_mode,
-                )
-                rl.index.append(new_entry)
-                entry_bin = rl.index.entry_binary(rev)
-                new_index_file.write(entry_bin)
+    sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
+    new_sidedata_offset = new_sidedata_file.tell()
+    if 0 < sidedata_size:
+        old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
+        old_sidedata_file.seek(old_sidedata_offset)
+        new_sidedata = old_sidedata_file.read(sidedata_size)
+        new_sidedata_file.write(new_sidedata)
+
+    data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
+    sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
+    assert data_delta_base <= rev, (data_delta_base, rev)
 
-                docket.index_end = new_index_file.tell()
-                docket.data_end = new_data_file.tell()
-                docket.sidedata_end = new_sidedata_file.tell()
+    new_entry = revlogutils.entry(
+        flags=flags,
+        data_offset=new_data_offset,
+        data_compressed_length=new_data_size,
+        data_uncompressed_length=data_uncompressed_length,
+        data_delta_base=data_delta_base,
+        link_rev=entry[ENTRY_LINK_REV],
+        parent_rev_1=entry[ENTRY_PARENT_1],
+        parent_rev_2=entry[ENTRY_PARENT_2],
+        node_id=entry[ENTRY_NODE_ID],
+        sidedata_offset=new_sidedata_offset,
+        sidedata_compressed_length=sidedata_size,
+        data_compression_mode=d_comp_mode,
+        sidedata_compression_mode=sd_com_mode,
+    )
+    revlog.index.append(new_entry)
+    entry_bin = revlog.index.entry_binary(rev)
+    new_index_file.write(entry_bin)
 
-    docket.write(transaction=None, stripping=True)
+    revlog._docket.index_end = new_index_file.tell()
+    revlog._docket.data_end = new_data_file.tell()
+    revlog._docket.sidedata_end = new_sidedata_file.tell()
 
 
 def _rewrite_censor(