comparison mercurial/revlogutils/rewrite.py @ 47469:60c48458ee6c

censor: extract the part about recomputing delta in a function The v2_censor function is huge, now that its content has settled a bit it is a good time to split individual part inside dedicated function. It is now the turn of the logic that recompute new delta to replace the one based on the revision that are going away. Differential Revision: https://phab.mercurial-scm.org/D10900
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 22 Jun 2021 22:10:22 +0200
parents 9b70aa7bcbab
children d6afe1478a2a
comparison
equal deleted inserted replaced
47468:9b70aa7bcbab 47469:60c48458ee6c
152 censored_entry = rl.index[censor_rev] 152 censored_entry = rl.index[censor_rev]
153 index_cutoff = rl.index.entry_size * censor_rev 153 index_cutoff = rl.index.entry_size * censor_rev
154 data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16 154 data_cutoff = censored_entry[ENTRY_DATA_OFFSET] >> 16
155 sidedata_cutoff = rl.sidedata_cut_off(censor_rev) 155 sidedata_cutoff = rl.sidedata_cut_off(censor_rev)
156 156
157 # rev → (new_base, data_start, data_end)
158 rewritten_entries = {}
159
160 dc = deltas.deltacomputer(rl)
161 excl = [censor_rev]
162
163 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage: 157 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
164 with rl._segmentfile._open_read() as dfh: 158 # rev → (new_base, data_start, data_end, compression_mode)
165 for rev in range(censor_rev + 1, len(old_index)): 159 rewritten_entries = _precompute_rewritten_delta(
166 entry = old_index[rev] 160 rl,
167 if censor_rev != entry[ENTRY_DELTA_BASE]: 161 old_index,
168 continue 162 {censor_rev},
169 # This is a revision that use the censored revision as the base 163 tmp_storage,
170 # for its delta. We need a need new deltas 164 )
171 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
172 # this revision is empty, we can delta against nullrev
173 rewritten_entries[rev] = (nullrev, 0, 0)
174 else:
175
176 text = rl.rawdata(rev, _df=dfh)
177 info = revlogutils.revisioninfo(
178 node=entry[ENTRY_NODE_ID],
179 p1=rl.node(entry[ENTRY_PARENT_1]),
180 p2=rl.node(entry[ENTRY_PARENT_2]),
181 btext=[text],
182 textlen=len(text),
183 cachedelta=None,
184 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
185 )
186 d = dc.finddeltainfo(
187 info, dfh, excluded_bases=excl, target_rev=rev
188 )
189 default_comp = rl._docket.default_compression_header
190 comp_mode, d = deltas.delta_compression(default_comp, d)
191 # using `tell` is a bit lazy, but we are not here for speed
192 start = tmp_storage.tell()
193 tmp_storage.write(d.data[1])
194 end = tmp_storage.tell()
195 rewritten_entries[rev] = (d.base, start, end, comp_mode)
196 165
197 old_index_filepath = rl.opener.join(docket.index_filepath()) 166 old_index_filepath = rl.opener.join(docket.index_filepath())
198 old_data_filepath = rl.opener.join(docket.data_filepath()) 167 old_data_filepath = rl.opener.join(docket.data_filepath())
199 old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath()) 168 old_sidedata_filepath = rl.opener.join(docket.sidedata_filepath())
200 169
279 rev, 248 rev,
280 rewritten_entries, 249 rewritten_entries,
281 tmp_storage, 250 tmp_storage,
282 ) 251 )
283 docket.write(transaction=None, stripping=True) 252 docket.write(transaction=None, stripping=True)
253
254
255 def _precompute_rewritten_delta(
256 revlog,
257 old_index,
258 excluded_revs,
259 tmp_storage,
260 ):
261 """Compute new delta for revisions whose delta is based on revision that
262 will not survive as is.
263
264 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
265 """
266 dc = deltas.deltacomputer(revlog)
267 rewritten_entries = {}
268 first_excl_rev = min(excluded_revs)
269 with revlog._segmentfile._open_read() as dfh:
270 for rev in range(first_excl_rev, len(old_index)):
271 if rev in excluded_revs:
272 # this revision will be preserved as is, so we don't need to
273 # consider recomputing a delta.
274 continue
275 entry = old_index[rev]
276 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
277 continue
278 # This is a revision that use the censored revision as the base
279 # for its delta. We need a need new deltas
280 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
281 # this revision is empty, we can delta against nullrev
282 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
283 else:
284
285 text = revlog.rawdata(rev, _df=dfh)
286 info = revlogutils.revisioninfo(
287 node=entry[ENTRY_NODE_ID],
288 p1=revlog.node(entry[ENTRY_PARENT_1]),
289 p2=revlog.node(entry[ENTRY_PARENT_2]),
290 btext=[text],
291 textlen=len(text),
292 cachedelta=None,
293 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
294 )
295 d = dc.finddeltainfo(
296 info, dfh, excluded_bases=excluded_revs, target_rev=rev
297 )
298 default_comp = revlog._docket.default_compression_header
299 comp_mode, d = deltas.delta_compression(default_comp, d)
300 # using `tell` is a bit lazy, but we are not here for speed
301 start = tmp_storage.tell()
302 tmp_storage.write(d.data[1])
303 end = tmp_storage.tell()
304 rewritten_entries[rev] = (d.base, start, end, comp_mode)
305 return rewritten_entries
284 306
285 307
286 def _rewrite_simple( 308 def _rewrite_simple(
287 revlog, 309 revlog,
288 old_index, 310 old_index,