comparison mercurial/metadata.py @ 45634:9a6b409b8ebc

changing-files: rework the way we store changed files in side-data We need to store new data so this is a good opportunity to rework this fully. 1) We directly store the list of affected file in the side data: * This avoid having to fetch and parse the `files` list in the revision in addition to the sidedata. Making the data more self sufficient. * This work around situation where that `files` field contains wrong information, and open the way to other bug fixing (eg: issue6219) * The format (fixed initial index, sorted files) allow for fast lookup of filename within the structure. * This unify the storage of affected files and copies sources and destination, limiting the number filename stored redundantly. * This prepare for the fact we should drop the `files` as soon as we do any change affecting the revision schema. * This rely on compression to avoid a significant increase of the changelog.d. More testing on this will be done before we freeze the final format. 2) We can store additional data: * The new "merged" field, * A future "salvaged" set recording files that might have been deleted but have were still present in the final result. Differential Revision: https://phab.mercurial-scm.org/D9090
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 15 Sep 2020 10:55:17 +0200
parents d31483377673
children 9003e6524f78
comparison
equal deleted inserted replaced
45632:7d0e54056586 45634:9a6b409b8ebc
6 # This software may be used and distributed according to the terms of the 6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version. 7 # GNU General Public License version 2 or any later version.
8 from __future__ import absolute_import, print_function 8 from __future__ import absolute_import, print_function
9 9
10 import multiprocessing 10 import multiprocessing
11 import struct
11 12
12 from . import ( 13 from . import (
13 error, 14 error,
14 node, 15 node,
15 pycompat, 16 pycompat,
371 # Perhaps someone had chosen the same key name (e.g. "added") and 372 # Perhaps someone had chosen the same key name (e.g. "added") and
372 # used different syntax for the value. 373 # used different syntax for the value.
373 return None 374 return None
374 375
375 376
377 # see mercurial/helptext/internals/revlogs.txt for details about the format
378
379 ACTION_MASK = int("111" "00", 2)
380 # note: untouched file used as copy source will as `000` for this mask.
381 ADDED_FLAG = int("001" "00", 2)
382 MERGED_FLAG = int("010" "00", 2)
383 REMOVED_FLAG = int("011" "00", 2)
384 # `100` is reserved for future use
385 TOUCHED_FLAG = int("101" "00", 2)
386
387 COPIED_MASK = int("11", 2)
388 COPIED_FROM_P1_FLAG = int("10", 2)
389 COPIED_FROM_P2_FLAG = int("11", 2)
390
391 # structure is <flag><filename-end><copy-source>
392 INDEX_HEADER = struct.Struct(">L")
393 INDEX_ENTRY = struct.Struct(">bLL")
394
395
376 def encode_files_sidedata(files): 396 def encode_files_sidedata(files):
377 sortedfiles = sorted(files.touched) 397 all_files = set(files.touched)
378 sidedata = {} 398 all_files.update(files.copied_from_p1.values())
379 p1copies = files.copied_from_p1 399 all_files.update(files.copied_from_p2.values())
380 if p1copies: 400 all_files = sorted(all_files)
381 p1copies = encodecopies(sortedfiles, p1copies) 401 file_idx = {f: i for (i, f) in enumerate(all_files)}
382 sidedata[sidedatamod.SD_P1COPIES] = p1copies 402 file_idx[None] = 0
383 p2copies = files.copied_from_p2 403
384 if p2copies: 404 chunks = [INDEX_HEADER.pack(len(all_files))]
385 p2copies = encodecopies(sortedfiles, p2copies) 405
386 sidedata[sidedatamod.SD_P2COPIES] = p2copies 406 filename_length = 0
387 filesadded = files.added 407 for f in all_files:
388 if filesadded: 408 filename_size = len(f)
389 filesadded = encodefileindices(sortedfiles, filesadded) 409 filename_length += filename_size
390 sidedata[sidedatamod.SD_FILESADDED] = filesadded 410 flag = 0
391 filesremoved = files.removed 411 if f in files.added:
392 if filesremoved: 412 flag |= ADDED_FLAG
393 filesremoved = encodefileindices(sortedfiles, filesremoved) 413 elif f in files.merged:
394 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved 414 flag |= MERGED_FLAG
395 if not sidedata: 415 elif f in files.removed:
396 sidedata = None 416 flag |= REMOVED_FLAG
397 return sidedata 417 elif f in files.touched:
418 flag |= TOUCHED_FLAG
419
420 copy = None
421 if f in files.copied_from_p1:
422 flag |= COPIED_FROM_P1_FLAG
423 copy = files.copied_from_p1.get(f)
424 elif f in files.copied_from_p2:
425 copy = files.copied_from_p2.get(f)
426 flag |= COPIED_FROM_P2_FLAG
427 copy_idx = file_idx[copy]
428 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
429 chunks.extend(all_files)
430 return {sidedatamod.SD_FILES: b''.join(chunks)}
398 431
399 432
400 def decode_files_sidedata(changelogrevision, sidedata): 433 def decode_files_sidedata(changelogrevision, sidedata):
401 """Return a ChangingFiles instance from a changelogrevision using sidata 434 md = ChangingFiles()
402 """ 435 raw = sidedata.get(sidedatamod.SD_FILES)
403 touched = changelogrevision.files 436
404 437 if raw is None:
405 rawindices = sidedata.get(sidedatamod.SD_FILESADDED) 438 return md
406 added = decodefileindices(touched, rawindices) 439
407 440 copies = []
408 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED) 441 all_files = []
409 removed = decodefileindices(touched, rawindices) 442
410 443 assert len(raw) >= INDEX_HEADER.size
411 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES) 444 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
412 p1_copies = decodecopies(touched, rawcopies) 445
413 446 offset = INDEX_HEADER.size
414 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES) 447 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
415 p2_copies = decodecopies(touched, rawcopies) 448 file_offset_last = file_offset_base
416 449
417 return ChangingFiles( 450 assert len(raw) >= file_offset_base
418 touched=touched, 451
419 added=added, 452 for idx in range(total_files):
420 removed=removed, 453 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
421 p1_copies=p1_copies, 454 file_end += file_offset_base
422 p2_copies=p2_copies, 455 filename = raw[file_offset_last:file_end]
423 ) 456 filesize = file_end - file_offset_last
457 assert len(filename) == filesize
458 offset += INDEX_ENTRY.size
459 file_offset_last = file_end
460 all_files.append(filename)
461 if flag & ACTION_MASK == ADDED_FLAG:
462 md.mark_added(filename)
463 elif flag & ACTION_MASK == MERGED_FLAG:
464 md.mark_merged(filename)
465 elif flag & ACTION_MASK == REMOVED_FLAG:
466 md.mark_removed(filename)
467 elif flag & ACTION_MASK == TOUCHED_FLAG:
468 md.mark_touched(filename)
469
470 copied = None
471 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
472 copied = md.mark_copied_from_p1
473 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
474 copied = md.mark_copied_from_p2
475
476 if copied is not None:
477 copies.append((copied, filename, copy_idx))
478
479 for copied, filename, copy_idx in copies:
480 copied(all_files[copy_idx], filename)
481
482 return md
424 483
425 484
426 def _getsidedata(srcrepo, rev): 485 def _getsidedata(srcrepo, rev):
427 ctx = srcrepo[rev] 486 ctx = srcrepo[rev]
428 filescopies = computechangesetcopies(ctx) 487 filescopies = computechangesetcopies(ctx)
429 filesadded = computechangesetfilesadded(ctx) 488 filesadded = computechangesetfilesadded(ctx)
430 filesremoved = computechangesetfilesremoved(ctx) 489 filesremoved = computechangesetfilesremoved(ctx)
431 sidedata = {} 490 filesmerged = computechangesetfilesmerged(ctx)
432 if any([filescopies, filesadded, filesremoved]): 491 files = ChangingFiles()
433 sortedfiles = sorted(ctx.files()) 492 files.update_touched(ctx.files())
434 p1copies, p2copies = filescopies 493 files.update_added(filesadded)
435 p1copies = encodecopies(sortedfiles, p1copies) 494 files.update_removed(filesremoved)
436 p2copies = encodecopies(sortedfiles, p2copies) 495 files.update_merged(filesmerged)
437 filesadded = encodefileindices(sortedfiles, filesadded) 496 files.update_copies_from_p1(filescopies[0])
438 filesremoved = encodefileindices(sortedfiles, filesremoved) 497 files.update_copies_from_p2(filescopies[1])
439 if p1copies: 498 return encode_files_sidedata(files)
440 sidedata[sidedatamod.SD_P1COPIES] = p1copies
441 if p2copies:
442 sidedata[sidedatamod.SD_P2COPIES] = p2copies
443 if filesadded:
444 sidedata[sidedatamod.SD_FILESADDED] = filesadded
445 if filesremoved:
446 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
447 return sidedata
448 499
449 500
450 def getsidedataadder(srcrepo, destrepo): 501 def getsidedataadder(srcrepo, destrepo):
451 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade') 502 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
452 if pycompat.iswindows or not use_w: 503 if pycompat.iswindows or not use_w: