comparison mercurial/metadata.py @ 45668:47ad23549b81

changing-files: add clean computation of changed file for merges This is "a tad more complicated" than the previous cases. See inline documentation for details (have fun). Differential Revision: https://phab.mercurial-scm.org/D9128
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 29 Sep 2020 22:47:54 +0200
parents 0303fc1f43f8
children e53778ad64bf
comparison
equal deleted inserted replaced
45667:0303fc1f43f8 45668:47ad23549b81
1 # coding: utf8
1 # metadata.py -- code related to various metadata computation and access. 2 # metadata.py -- code related to various metadata computation and access.
2 # 3 #
3 # Copyright 2019 Google, Inc <martinvonz@google.com> 4 # Copyright 2019 Google, Inc <martinvonz@google.com>
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net> 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # 6 #
236 # In the wild, one can encounter changeset where p1 is null but p2 is not 237 # In the wild, one can encounter changeset where p1 is null but p2 is not
237 return _process_linear(p1, ctx, parent=2) 238 return _process_linear(p1, ctx, parent=2)
238 elif p1.rev() == p2.rev(): 239 elif p1.rev() == p2.rev():
239 # In the wild, one can encounter such "non-merge" 240 # In the wild, one can encounter such "non-merge"
240 return _process_linear(p1, ctx) 241 return _process_linear(p1, ctx)
241 filescopies = computechangesetcopies(ctx) 242 else:
242 filesadded = computechangesetfilesadded(ctx) 243 return _process_merge(p1, p2, ctx)
243 filesremoved = computechangesetfilesremoved(ctx)
244 filesmerged = computechangesetfilesmerged(ctx)
245 files = ChangingFiles()
246 files.update_touched(ctx.files())
247 files.update_added(filesadded)
248 files.update_removed(filesremoved)
249 files.update_merged(filesmerged)
250 files.update_copies_from_p1(filescopies[0])
251 files.update_copies_from_p2(filescopies[1])
252 return files
253 244
254 245
255 def _process_root(ctx): 246 def _process_root(ctx):
256 """compute the appropriate changed files for a changeset with no parents 247 """compute the appropriate changed files for a changeset with no parents
257 """ 248 """
297 if copy_info: 288 if copy_info:
298 source, srcnode = copy_info 289 source, srcnode = copy_info
299 copied(source, filename) 290 copied(source, filename)
300 291
301 return md 292 return md
293
294
295 def _process_merge(p1_ctx, p2_ctx, ctx):
296 """compute the appropriate changed files for a changeset with two parents
297
298 This is a more advance case. The information we need to record is summarise
299 in the following table:
300
301 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
302 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │
303 │ p2 ╲ p1 │ │ │ │ │
304 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
305 │ │ │🄱 No Changes │🄳 No Changes │ │
306 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │
307 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │
308 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
309 │ │🄶 No Changes │ │ │ │
310 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │
311 │ │🄷 Deleted[1] │ │ │ │
312 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
313 │ │🄸 No Changes │ │ │ │
314 │ (None, Some) │ OR │ ø │🄼 Added │🄽 Merged │
315 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │
316 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
317 │ │ │ │ │ │
318 │ (Some, Some) │🄺 No Changes │ ø │🄾 Merged │🄿 Merged │
319 │ │ [3] │ │ (copied?) │ (copied?) │
320 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
321
322 Special case [1]:
323
324 The situation is:
325 - parent-A: file exists,
326 - parent-B: no file,
327 - working-copy: no file.
328
329 Detecting a "deletion" will depend on the presence of actual change on
330 the "parent-A" branch:
331
332 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
333 compared to the merge ancestors, then parent-A branch left the file
334 untouched while parent-B deleted it. We simply apply the change from
335 "parent-B" branch the file was automatically dropped.
336 The result is:
337 - file is not recorded as touched by the merge.
338
339 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
340 the file was "deleted again". From a user perspective, the message
341 about "locally changed" while "remotely deleted" (or the other way
342 around) was issued and the user chose to deleted the file.
343 The result:
344 - file is recorded as touched by the merge.
345
346
347 Special case [2]:
348
349 The situation is:
350 - parent-A: no file,
351 - parent-B: file,
352 - working-copy: file (same content as parent-B).
353
354 There are three subcases depending on the ancestors contents:
355
356 - A) the file is missing in all ancestors,
357 - B) at least one ancestor has the file with filenode ≠ from parent-B,
358 - C) all ancestors use the same filenode as parent-B,
359
360 Subcase (A) is the simpler, nothing happend on parent-A side while
361 parent-B added it.
362
363 The result:
364 - the file is not marked as touched by the merge.
365
366 Subcase (B) is the counter part of "Special case [1]", the file was
367 modified on parent-B side, while parent-A side deleted it. However this
368 time, the conflict was solved by keeping the file (and its
369 modification). We consider the file as "salvaged".
370
371 The result:
372 - the file is marked as "salvaged" by the merge.
373
374 Subcase (C) is subtle variation of the case above. In this case, the
375 file in unchanged on the parent-B side and actively removed on the
376 parent-A side. So the merge machinery correctly decide it should be
377 removed. However, the file was explicitly restored to its parent-B
378 content before the merge was commited. The file is be marked
379 as salvaged too. From the merge result perspective, this is similar to
380 Subcase (B), however from the merge resolution perspective they differ
381 since in (C), there was some conflict not obvious solution to the
382 merge (That got reversed)
383
384 Special case [3]:
385
386 The situation is:
387 - parent-A: file,
388 - parent-B: file (different filenode as parent-A),
389 - working-copy: file (same filenode as parent-B).
390
391 This case is in theory much simple, for this to happens, this mean the
392 filenode in parent-A is purely replacing the one in parent-B (either a
393 descendant, or a full new file history, see changeset). So the merge
394 introduce no changes, and the file is not affected by the merge...
395
396 However, in the wild it is possible to find commit with the above is not
397 True. For example repository have some commit where the *new* node is an
398 ancestor of the node in parent-A, or where parent-A and parent-B are two
399 branches of the same file history, yet not merge-filenode were created
400 (while the "merge" should have led to a "modification").
401
402 Detecting such cases (and not recording the file as modified) would be a
403 nice bonus. However do not any of this yet.
404 """
405
406 md = ChangingFiles()
407
408 m = ctx.manifest()
409 p1m = p1_ctx.manifest()
410 p2m = p2_ctx.manifest()
411 diff_p1 = p1m.diff(m)
412 diff_p2 = p2m.diff(m)
413
414 cahs = ctx.repo().changelog.commonancestorsheads(
415 p1_ctx.node(), p2_ctx.node()
416 )
417 if not cahs:
418 cahs = [node.nullrev]
419 mas = [ctx.repo()[r].manifest() for r in cahs]
420
421 copy_candidates = []
422
423 # Dealing with case 🄰 happens automatically. Since there are no entry in
424 # d1 nor d2, we won't iterate on it ever.
425
426 # Iteration over d1 content will deal with all cases, but the one in the
427 # first column of the table.
428 for filename, d1 in diff_p1.items():
429
430 d2 = diff_p2.pop(filename, None)
431
432 if d2 is None:
433 # this deal with the first line of the table.
434 _process_other_unchanged(md, mas, filename, d1)
435 else:
436
437 if d1[0][0] is None and d2[0][0] is None:
438 # case 🄼 — both deleted the file.
439 md.mark_added(filename)
440 copy_candidates.append(filename)
441 elif d1[1][0] is None and d2[1][0] is None:
442 # case 🄻 — both deleted the file.
443 md.mark_removed(filename)
444 elif d1[1][0] is not None and d2[1][0] is not None:
445 # case 🄽 🄾 🄿
446 md.mark_merged(filename)
447 copy_candidates.append(filename)
448 else:
449 # Impossible case, the post-merge file status cannot be None on
450 # one side and Something on the other side.
451 assert False, "unreachable"
452
453 # Iteration over remaining d2 content deal with the first column of the
454 # table.
455 for filename, d2 in diff_p2.items():
456 _process_other_unchanged(md, mas, filename, d2)
457
458 for filename in copy_candidates:
459 copy_info = ctx[filename].renamed()
460 if copy_info:
461 source, srcnode = copy_info
462 if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
463 md.mark_copied_from_p1(source, filename)
464 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
465 md.mark_copied_from_p2(source, filename)
466 return md
467
468
469 def _find(manifest, filename):
470 """return the associate filenode or None"""
471 if filename not in manifest:
472 return None
473 return manifest.find(filename)[0]
474
475
476 def _process_other_unchanged(md, mas, filename, diff):
477 source_node = diff[0][0]
478 target_node = diff[1][0]
479
480 if source_node is not None and target_node is None:
481 if any(not _find(ma, filename) == source_node for ma in mas):
482 # case 🄲 of 🄷
483 md.mark_removed(filename)
484 # else, we have case 🄱 or 🄶 : no change need to be recorded
485 elif source_node is None and target_node is not None:
486 if any(_find(ma, filename) is not None for ma in mas):
487 # case 🄴 or 🄹
488 md.mark_salvaged(filename)
489 # else, we have case 🄳 or 🄸 : simple merge without intervention
490 elif source_node is not None and target_node is not None:
491 # case 🄵 or 🄺 : simple merge without intervention
492 #
493 # In buggy case where source_node is not an ancestors of target_node.
494 # There should have a been a new filenode created, recording this as
495 # "modified". We do not deal with them yet.
496 pass
497 else:
498 # An impossible case, the diff algorithm should not return entry if the
499 # file is missing on both side.
500 assert False, "unreachable"
501
502
503 def _missing_from_all_ancestors(mas, filename):
504 return all(_find(ma, filename) is None for ma in mas)
302 505
303 506
304 def computechangesetfilesadded(ctx): 507 def computechangesetfilesadded(ctx):
305 """return the list of files added in a changeset 508 """return the list of files added in a changeset
306 """ 509 """