Mercurial > hg
comparison mercurial/metadata.py @ 45634:9a6b409b8ebc
changing-files: rework the way we store changed files in side-data
We need to store new data so this is a good opportunity to rework this fully.
1) We directly store the list of affected file in the side data:
* This avoid having to fetch and parse the `files` list in the revision in
addition to the sidedata. Making the data more self sufficient.
* This work around situation where that `files` field contains wrong
information, and open the way to other bug fixing (eg: issue6219)
* The format (fixed initial index, sorted files) allow for fast lookup of
filename within the structure.
* This unify the storage of affected files and copies sources and destination,
limiting the number filename stored redundantly.
* This prepare for the fact we should drop the `files` as soon as we do any
change affecting the revision schema.
* This rely on compression to avoid a significant increase of the changelog.d.
More testing on this will be done before we freeze the final format.
2) We can store additional data:
* The new "merged" field,
* A future "salvaged" set recording files that might have been deleted but have
were still present in the final result.
Differential Revision: https://phab.mercurial-scm.org/D9090
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 15 Sep 2020 10:55:17 +0200 |
parents | d31483377673 |
children | 9003e6524f78 |
comparison
equal
deleted
inserted
replaced
45632:7d0e54056586 | 45634:9a6b409b8ebc |
---|---|
6 # This software may be used and distributed according to the terms of the | 6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2 or any later version. | 7 # GNU General Public License version 2 or any later version. |
8 from __future__ import absolute_import, print_function | 8 from __future__ import absolute_import, print_function |
9 | 9 |
10 import multiprocessing | 10 import multiprocessing |
11 import struct | |
11 | 12 |
12 from . import ( | 13 from . import ( |
13 error, | 14 error, |
14 node, | 15 node, |
15 pycompat, | 16 pycompat, |
371 # Perhaps someone had chosen the same key name (e.g. "added") and | 372 # Perhaps someone had chosen the same key name (e.g. "added") and |
372 # used different syntax for the value. | 373 # used different syntax for the value. |
373 return None | 374 return None |
374 | 375 |
375 | 376 |
377 # see mercurial/helptext/internals/revlogs.txt for details about the format | |
378 | |
379 ACTION_MASK = int("111" "00", 2) | |
380 # note: untouched file used as copy source will as `000` for this mask. | |
381 ADDED_FLAG = int("001" "00", 2) | |
382 MERGED_FLAG = int("010" "00", 2) | |
383 REMOVED_FLAG = int("011" "00", 2) | |
384 # `100` is reserved for future use | |
385 TOUCHED_FLAG = int("101" "00", 2) | |
386 | |
387 COPIED_MASK = int("11", 2) | |
388 COPIED_FROM_P1_FLAG = int("10", 2) | |
389 COPIED_FROM_P2_FLAG = int("11", 2) | |
390 | |
391 # structure is <flag><filename-end><copy-source> | |
392 INDEX_HEADER = struct.Struct(">L") | |
393 INDEX_ENTRY = struct.Struct(">bLL") | |
394 | |
395 | |
376 def encode_files_sidedata(files): | 396 def encode_files_sidedata(files): |
377 sortedfiles = sorted(files.touched) | 397 all_files = set(files.touched) |
378 sidedata = {} | 398 all_files.update(files.copied_from_p1.values()) |
379 p1copies = files.copied_from_p1 | 399 all_files.update(files.copied_from_p2.values()) |
380 if p1copies: | 400 all_files = sorted(all_files) |
381 p1copies = encodecopies(sortedfiles, p1copies) | 401 file_idx = {f: i for (i, f) in enumerate(all_files)} |
382 sidedata[sidedatamod.SD_P1COPIES] = p1copies | 402 file_idx[None] = 0 |
383 p2copies = files.copied_from_p2 | 403 |
384 if p2copies: | 404 chunks = [INDEX_HEADER.pack(len(all_files))] |
385 p2copies = encodecopies(sortedfiles, p2copies) | 405 |
386 sidedata[sidedatamod.SD_P2COPIES] = p2copies | 406 filename_length = 0 |
387 filesadded = files.added | 407 for f in all_files: |
388 if filesadded: | 408 filename_size = len(f) |
389 filesadded = encodefileindices(sortedfiles, filesadded) | 409 filename_length += filename_size |
390 sidedata[sidedatamod.SD_FILESADDED] = filesadded | 410 flag = 0 |
391 filesremoved = files.removed | 411 if f in files.added: |
392 if filesremoved: | 412 flag |= ADDED_FLAG |
393 filesremoved = encodefileindices(sortedfiles, filesremoved) | 413 elif f in files.merged: |
394 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved | 414 flag |= MERGED_FLAG |
395 if not sidedata: | 415 elif f in files.removed: |
396 sidedata = None | 416 flag |= REMOVED_FLAG |
397 return sidedata | 417 elif f in files.touched: |
418 flag |= TOUCHED_FLAG | |
419 | |
420 copy = None | |
421 if f in files.copied_from_p1: | |
422 flag |= COPIED_FROM_P1_FLAG | |
423 copy = files.copied_from_p1.get(f) | |
424 elif f in files.copied_from_p2: | |
425 copy = files.copied_from_p2.get(f) | |
426 flag |= COPIED_FROM_P2_FLAG | |
427 copy_idx = file_idx[copy] | |
428 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx)) | |
429 chunks.extend(all_files) | |
430 return {sidedatamod.SD_FILES: b''.join(chunks)} | |
398 | 431 |
399 | 432 |
400 def decode_files_sidedata(changelogrevision, sidedata): | 433 def decode_files_sidedata(changelogrevision, sidedata): |
401 """Return a ChangingFiles instance from a changelogrevision using sidata | 434 md = ChangingFiles() |
402 """ | 435 raw = sidedata.get(sidedatamod.SD_FILES) |
403 touched = changelogrevision.files | 436 |
404 | 437 if raw is None: |
405 rawindices = sidedata.get(sidedatamod.SD_FILESADDED) | 438 return md |
406 added = decodefileindices(touched, rawindices) | 439 |
407 | 440 copies = [] |
408 rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED) | 441 all_files = [] |
409 removed = decodefileindices(touched, rawindices) | 442 |
410 | 443 assert len(raw) >= INDEX_HEADER.size |
411 rawcopies = sidedata.get(sidedatamod.SD_P1COPIES) | 444 total_files = INDEX_HEADER.unpack_from(raw, 0)[0] |
412 p1_copies = decodecopies(touched, rawcopies) | 445 |
413 | 446 offset = INDEX_HEADER.size |
414 rawcopies = sidedata.get(sidedatamod.SD_P2COPIES) | 447 file_offset_base = offset + (INDEX_ENTRY.size * total_files) |
415 p2_copies = decodecopies(touched, rawcopies) | 448 file_offset_last = file_offset_base |
416 | 449 |
417 return ChangingFiles( | 450 assert len(raw) >= file_offset_base |
418 touched=touched, | 451 |
419 added=added, | 452 for idx in range(total_files): |
420 removed=removed, | 453 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset) |
421 p1_copies=p1_copies, | 454 file_end += file_offset_base |
422 p2_copies=p2_copies, | 455 filename = raw[file_offset_last:file_end] |
423 ) | 456 filesize = file_end - file_offset_last |
457 assert len(filename) == filesize | |
458 offset += INDEX_ENTRY.size | |
459 file_offset_last = file_end | |
460 all_files.append(filename) | |
461 if flag & ACTION_MASK == ADDED_FLAG: | |
462 md.mark_added(filename) | |
463 elif flag & ACTION_MASK == MERGED_FLAG: | |
464 md.mark_merged(filename) | |
465 elif flag & ACTION_MASK == REMOVED_FLAG: | |
466 md.mark_removed(filename) | |
467 elif flag & ACTION_MASK == TOUCHED_FLAG: | |
468 md.mark_touched(filename) | |
469 | |
470 copied = None | |
471 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG: | |
472 copied = md.mark_copied_from_p1 | |
473 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG: | |
474 copied = md.mark_copied_from_p2 | |
475 | |
476 if copied is not None: | |
477 copies.append((copied, filename, copy_idx)) | |
478 | |
479 for copied, filename, copy_idx in copies: | |
480 copied(all_files[copy_idx], filename) | |
481 | |
482 return md | |
424 | 483 |
425 | 484 |
426 def _getsidedata(srcrepo, rev): | 485 def _getsidedata(srcrepo, rev): |
427 ctx = srcrepo[rev] | 486 ctx = srcrepo[rev] |
428 filescopies = computechangesetcopies(ctx) | 487 filescopies = computechangesetcopies(ctx) |
429 filesadded = computechangesetfilesadded(ctx) | 488 filesadded = computechangesetfilesadded(ctx) |
430 filesremoved = computechangesetfilesremoved(ctx) | 489 filesremoved = computechangesetfilesremoved(ctx) |
431 sidedata = {} | 490 filesmerged = computechangesetfilesmerged(ctx) |
432 if any([filescopies, filesadded, filesremoved]): | 491 files = ChangingFiles() |
433 sortedfiles = sorted(ctx.files()) | 492 files.update_touched(ctx.files()) |
434 p1copies, p2copies = filescopies | 493 files.update_added(filesadded) |
435 p1copies = encodecopies(sortedfiles, p1copies) | 494 files.update_removed(filesremoved) |
436 p2copies = encodecopies(sortedfiles, p2copies) | 495 files.update_merged(filesmerged) |
437 filesadded = encodefileindices(sortedfiles, filesadded) | 496 files.update_copies_from_p1(filescopies[0]) |
438 filesremoved = encodefileindices(sortedfiles, filesremoved) | 497 files.update_copies_from_p2(filescopies[1]) |
439 if p1copies: | 498 return encode_files_sidedata(files) |
440 sidedata[sidedatamod.SD_P1COPIES] = p1copies | |
441 if p2copies: | |
442 sidedata[sidedatamod.SD_P2COPIES] = p2copies | |
443 if filesadded: | |
444 sidedata[sidedatamod.SD_FILESADDED] = filesadded | |
445 if filesremoved: | |
446 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved | |
447 return sidedata | |
448 | 499 |
449 | 500 |
450 def getsidedataadder(srcrepo, destrepo): | 501 def getsidedataadder(srcrepo, destrepo): |
451 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade') | 502 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade') |
452 if pycompat.iswindows or not use_w: | 503 if pycompat.iswindows or not use_w: |