1 # censor code related to censoring revision |
|
2 # coding: utf8 |
|
3 # |
|
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> |
|
5 # Copyright 2015 Google, Inc <martinvonz@google.com> |
|
6 # |
|
7 # This software may be used and distributed according to the terms of the |
|
8 # GNU General Public License version 2 or any later version. |
|
9 |
|
10 import contextlib |
|
11 import os |
|
12 |
|
13 from ..node import ( |
|
14 nullrev, |
|
15 ) |
|
16 from .constants import ( |
|
17 COMP_MODE_PLAIN, |
|
18 ENTRY_DATA_COMPRESSED_LENGTH, |
|
19 ENTRY_DATA_COMPRESSION_MODE, |
|
20 ENTRY_DATA_OFFSET, |
|
21 ENTRY_DATA_UNCOMPRESSED_LENGTH, |
|
22 ENTRY_DELTA_BASE, |
|
23 ENTRY_LINK_REV, |
|
24 ENTRY_NODE_ID, |
|
25 ENTRY_PARENT_1, |
|
26 ENTRY_PARENT_2, |
|
27 ENTRY_SIDEDATA_COMPRESSED_LENGTH, |
|
28 ENTRY_SIDEDATA_COMPRESSION_MODE, |
|
29 ENTRY_SIDEDATA_OFFSET, |
|
30 REVLOGV0, |
|
31 REVLOGV1, |
|
32 ) |
|
33 from ..i18n import _ |
|
34 |
|
35 from .. import ( |
|
36 error, |
|
37 pycompat, |
|
38 revlogutils, |
|
39 util, |
|
40 ) |
|
41 from ..utils import ( |
|
42 storageutil, |
|
43 ) |
|
44 from . import ( |
|
45 constants, |
|
46 deltas, |
|
47 ) |
|
48 |
|
49 |
|
50 def v1_censor(rl, tr, censornode, tombstone=b''): |
|
51 """censors a revision in a "version 1" revlog""" |
|
52 assert rl._format_version == constants.REVLOGV1, rl._format_version |
|
53 |
|
54 # avoid cycle |
|
55 from .. import revlog |
|
56 |
|
57 censorrev = rl.rev(censornode) |
|
58 tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
|
59 |
|
60 # Rewriting the revlog in place is hard. Our strategy for censoring is |
|
61 # to create a new revlog, copy all revisions to it, then replace the |
|
62 # revlogs on transaction close. |
|
63 # |
|
64 # This is a bit dangerous. We could easily have a mismatch of state. |
|
65 newrl = revlog.revlog( |
|
66 rl.opener, |
|
67 target=rl.target, |
|
68 radix=rl.radix, |
|
69 postfix=b'tmpcensored', |
|
70 censorable=True, |
|
71 ) |
|
72 newrl._format_version = rl._format_version |
|
73 newrl._format_flags = rl._format_flags |
|
74 newrl._generaldelta = rl._generaldelta |
|
75 newrl._parse_index = rl._parse_index |
|
76 |
|
77 for rev in rl.revs(): |
|
78 node = rl.node(rev) |
|
79 p1, p2 = rl.parents(node) |
|
80 |
|
81 if rev == censorrev: |
|
82 newrl.addrawrevision( |
|
83 tombstone, |
|
84 tr, |
|
85 rl.linkrev(censorrev), |
|
86 p1, |
|
87 p2, |
|
88 censornode, |
|
89 constants.REVIDX_ISCENSORED, |
|
90 ) |
|
91 |
|
92 if newrl.deltaparent(rev) != nullrev: |
|
93 m = _(b'censored revision stored as delta; cannot censor') |
|
94 h = _( |
|
95 b'censoring of revlogs is not fully implemented;' |
|
96 b' please report this bug' |
|
97 ) |
|
98 raise error.Abort(m, hint=h) |
|
99 continue |
|
100 |
|
101 if rl.iscensored(rev): |
|
102 if rl.deltaparent(rev) != nullrev: |
|
103 m = _( |
|
104 b'cannot censor due to censored ' |
|
105 b'revision having delta stored' |
|
106 ) |
|
107 raise error.Abort(m) |
|
108 rawtext = rl._chunk(rev) |
|
109 else: |
|
110 rawtext = rl.rawdata(rev) |
|
111 |
|
112 newrl.addrawrevision( |
|
113 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev) |
|
114 ) |
|
115 |
|
116 tr.addbackup(rl._indexfile, location=b'store') |
|
117 if not rl._inline: |
|
118 tr.addbackup(rl._datafile, location=b'store') |
|
119 |
|
120 rl.opener.rename(newrl._indexfile, rl._indexfile) |
|
121 if not rl._inline: |
|
122 rl.opener.rename(newrl._datafile, rl._datafile) |
|
123 |
|
124 rl.clearcaches() |
|
125 rl._loadindex() |
|
126 |
|
127 |
|
128 def v2_censor(rl, tr, censornode, tombstone=b''): |
|
129 """censors a revision in a "version 2" revlog""" |
|
130 # General principle |
|
131 # |
|
132 # We create new revlog files (index/data/sidedata) to copy the content of |
|
133 # the existing data without the censored data. |
|
134 # |
|
135 # We need to recompute new delta for any revision that used the censored |
|
136 # revision as delta base. As the cumulative size of the new delta may be |
|
137 # large, we store them in a temporary file until they are stored in their |
|
138 # final destination. |
|
139 # |
|
140 # All data before the censored data can be blindly copied. The rest needs |
|
141 # to be copied as we go and the associated index entry needs adjustement. |
|
142 |
|
143 assert rl._format_version != REVLOGV0, rl._format_version |
|
144 assert rl._format_version != REVLOGV1, rl._format_version |
|
145 |
|
146 old_index = rl.index |
|
147 docket = rl._docket |
|
148 |
|
149 censor_rev = rl.rev(censornode) |
|
150 tombstone = storageutil.packmeta({b'censored': tombstone}, b'') |
|
151 |
|
152 censored_entry = rl.index[censor_rev] |
|
153 index_cutoff = rl.index.entry_size * censor_rev |
|
154 data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16 |
|
155 sidedata_cutoff = rl.sidedata_cut_off(censor_rev) |
|
156 |
|
157 # rev → (new_base, data_start, data_end) |
|
158 rewritten_entries = {} |
|
159 |
|
160 dc = deltas.deltacomputer(rl) |
|
161 excl = [censor_rev] |
|
162 |
|
163 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: |
|
164 with rl._segmentfile._open_read() as dfh: |
|
165 for rev in range(censor_rev + 1, len(old_index)): |
|
166 entry = old_index[rev] |
|
167 if censor_rev != entry[ENTRY_DELTA_BASE]: |
|
168 continue |
|
169 # This is a revision that use the censored revision as the base |
|
170 # for its delta. We need a need new deltas |
|
171 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0: |
|
172 # this revision is empty, we can delta against nullrev |
|
173 rewritten_entries[rev] = (nullrev, 0, 0) |
|
174 else: |
|
175 |
|
176 text = rl.rawdata(rev, _df=dfh) |
|
177 info = revlogutils.revisioninfo( |
|
178 node=entry[ENTRY_NODE_ID], |
|
179 p1=rl.node(entry[ENTRY_PARENT_1]), |
|
180 p2=rl.node(entry[ENTRY_PARENT_2]), |
|
181 btext=[text], |
|
182 textlen=len(text), |
|
183 cachedelta=None, |
|
184 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF, |
|
185 ) |
|
186 d = dc.finddeltainfo( |
|
187 info, dfh, excluded_bases=excl, target_rev=rev |
|
188 ) |
|
189 default_comp = rl._docket.default_compression_header |
|
190 comp_mode, d = deltas.delta_compression(default_comp, d) |
|
191 # using `tell` is a bit lazy, but we are not here for speed |
|
192 start = tmp_storage.tell() |
|
193 tmp_storage.write(d.data[1]) |
|
194 end = tmp_storage.tell() |
|
195 rewritten_entries[rev] = (d.base, start, end, comp_mode) |
|
196 |
|
197 old_index_filepath = rl.opener.join(docket.index_filepath()) |
|
198 old_data_filepath = rl.opener.join(docket.data_filepath()) |
|
199 old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath()) |
|
200 |
|
201 new_index_filepath = rl.opener.join(docket.new_index_file()) |
|
202 new_data_filepath = rl.opener.join(docket.new_data_file()) |
|
203 new_sidedata_filepath = rl.opener.join(docket.new_sidedata_file()) |
|
204 |
|
205 util.copyfile( |
|
206 old_index_filepath, new_index_filepath, nb_bytes=index_cutoff |
|
207 ) |
|
208 util.copyfile( |
|
209 old_data_filepath, new_data_filepath, nb_bytes=data_cutoff |
|
210 ) |
|
211 util.copyfile( |
|
212 old_sidedata_filepath, |
|
213 new_sidedata_filepath, |
|
214 nb_bytes=sidedata_cutoff, |
|
215 ) |
|
216 rl.opener.register_file(docket.index_filepath()) |
|
217 rl.opener.register_file(docket.data_filepath()) |
|
218 rl.opener.register_file(docket.sidedata_filepath()) |
|
219 |
|
220 docket.index_end = index_cutoff |
|
221 docket.data_end = data_cutoff |
|
222 docket.sidedata_end = sidedata_cutoff |
|
223 |
|
224 # reload the revlog internal information |
|
225 rl.clearcaches() |
|
226 rl._loadindex(docket=docket) |
|
227 |
|
228 @contextlib.contextmanager |
|
229 def all_files(): |
|
230 # hide opening in an helper function to please check-code, black |
|
231 # and various python ersion at the same time |
|
232 with open(old_data_filepath, 'rb') as old_data_file: |
|
233 with open(old_sidedata_filepath, 'rb') as old_sidedata_file: |
|
234 with open(new_index_filepath, 'r+b') as new_index_file: |
|
235 with open(new_data_filepath, 'r+b') as new_data_file: |
|
236 with open( |
|
237 new_sidedata_filepath, 'r+b' |
|
238 ) as new_sidedata_file: |
|
239 yield ( |
|
240 old_data_file, |
|
241 old_sidedata_file, |
|
242 new_index_file, |
|
243 new_data_file, |
|
244 new_sidedata_file, |
|
245 ) |
|
246 |
|
247 # we dont need to open the old index file since its content already |
|
248 # exist in a usable form in `old_index`. |
|
249 with all_files() as ( |
|
250 old_data_file, |
|
251 old_sidedata_file, |
|
252 new_index_file, |
|
253 new_data_file, |
|
254 new_sidedata_file, |
|
255 ): |
|
256 new_index_file.seek(0, os.SEEK_END) |
|
257 assert new_index_file.tell() == index_cutoff |
|
258 new_data_file.seek(0, os.SEEK_END) |
|
259 assert new_data_file.tell() == data_cutoff |
|
260 new_sidedata_file.seek(0, os.SEEK_END) |
|
261 assert new_sidedata_file.tell() == sidedata_cutoff |
|
262 |
|
263 ### writing the censored revision |
|
264 entry = old_index[censor_rev] |
|
265 |
|
266 # XXX consider trying the default compression too |
|
267 new_data_size = len(tombstone) |
|
268 new_data_offset = new_data_file.tell() |
|
269 new_data_file.write(tombstone) |
|
270 |
|
271 # we are not adding any sidedata as they might leak info about the censored version |
|
272 |
|
273 new_entry = revlogutils.entry( |
|
274 flags=constants.REVIDX_ISCENSORED, |
|
275 data_offset=new_data_offset, |
|
276 data_compressed_length=new_data_size, |
|
277 data_uncompressed_length=new_data_size, |
|
278 data_delta_base=censor_rev, |
|
279 link_rev=entry[ENTRY_LINK_REV], |
|
280 parent_rev_1=entry[ENTRY_PARENT_1], |
|
281 parent_rev_2=entry[ENTRY_PARENT_2], |
|
282 node_id=entry[ENTRY_NODE_ID], |
|
283 sidedata_offset=0, |
|
284 sidedata_compressed_length=0, |
|
285 data_compression_mode=COMP_MODE_PLAIN, |
|
286 sidedata_compression_mode=COMP_MODE_PLAIN, |
|
287 ) |
|
288 rl.index.append(new_entry) |
|
289 entry_bin = rl.index.entry_binary(censor_rev) |
|
290 new_index_file.write(entry_bin) |
|
291 docket.index_end = new_index_file.tell() |
|
292 docket.data_end = new_data_file.tell() |
|
293 |
|
294 #### Writing all subsequent revisions |
|
295 for rev in range(censor_rev + 1, len(old_index)): |
|
296 entry = old_index[rev] |
|
297 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF |
|
298 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16 |
|
299 |
|
300 if rev not in rewritten_entries: |
|
301 old_data_file.seek(old_data_offset) |
|
302 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH] |
|
303 new_data = old_data_file.read(new_data_size) |
|
304 data_delta_base = entry[ENTRY_DELTA_BASE] |
|
305 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE] |
|
306 else: |
|
307 ( |
|
308 data_delta_base, |
|
309 start, |
|
310 end, |
|
311 d_comp_mode, |
|
312 ) = rewritten_entries[rev] |
|
313 new_data_size = end - start |
|
314 tmp_storage.seek(start) |
|
315 new_data = tmp_storage.read(new_data_size) |
|
316 |
|
317 # It might be faster to group continuous read/write operation, |
|
318 # however, this is censor, an operation that is not focussed |
|
319 # around stellar performance. So I have not written this |
|
320 # optimisation yet. |
|
321 new_data_offset = new_data_file.tell() |
|
322 new_data_file.write(new_data) |
|
323 |
|
324 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH] |
|
325 new_sidedata_offset = new_sidedata_file.tell() |
|
326 if 0 < sidedata_size: |
|
327 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET] |
|
328 old_sidedata_file.seek(old_sidedata_offset) |
|
329 new_sidedata = old_sidedata_file.read(sidedata_size) |
|
330 new_sidedata_file.write(new_sidedata) |
|
331 |
|
332 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] |
|
333 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE] |
|
334 assert data_delta_base <= rev, (data_delta_base, rev) |
|
335 |
|
336 new_entry = revlogutils.entry( |
|
337 flags=flags, |
|
338 data_offset=new_data_offset, |
|
339 data_compressed_length=new_data_size, |
|
340 data_uncompressed_length=data_uncompressed_length, |
|
341 data_delta_base=data_delta_base, |
|
342 link_rev=entry[ENTRY_LINK_REV], |
|
343 parent_rev_1=entry[ENTRY_PARENT_1], |
|
344 parent_rev_2=entry[ENTRY_PARENT_2], |
|
345 node_id=entry[ENTRY_NODE_ID], |
|
346 sidedata_offset=new_sidedata_offset, |
|
347 sidedata_compressed_length=sidedata_size, |
|
348 data_compression_mode=d_comp_mode, |
|
349 sidedata_compression_mode=sd_com_mode, |
|
350 ) |
|
351 rl.index.append(new_entry) |
|
352 entry_bin = rl.index.entry_binary(rev) |
|
353 new_index_file.write(entry_bin) |
|
354 |
|
355 docket.index_end = new_index_file.tell() |
|
356 docket.data_end = new_data_file.tell() |
|
357 docket.sidedata_end = new_sidedata_file.tell() |
|
358 |
|
359 docket.write(transaction=None, stripping=True) |
|