Mercurial > hg
comparison mercurial/metadata.py @ 45668:47ad23549b81
changing-files: add clean computation of changed file for merges
This is "a tad more complicated" than the previous cases. See inline
documentation for details (have fun).
Differential Revision: https://phab.mercurial-scm.org/D9128
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 29 Sep 2020 22:47:54 +0200 |
parents | 0303fc1f43f8 |
children | e53778ad64bf |
comparison
equal
deleted
inserted
replaced
45667:0303fc1f43f8 | 45668:47ad23549b81 |
---|---|
1 # coding: utf8 | |
1 # metadata.py -- code related to various metadata computation and access. | 2 # metadata.py -- code related to various metadata computation and access. |
2 # | 3 # |
3 # Copyright 2019 Google, Inc <martinvonz@google.com> | 4 # Copyright 2019 Google, Inc <martinvonz@google.com> |
4 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net> | 5 # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net> |
5 # | 6 # |
236 # In the wild, one can encounter changeset where p1 is null but p2 is not | 237 # In the wild, one can encounter changeset where p1 is null but p2 is not |
237 return _process_linear(p1, ctx, parent=2) | 238 return _process_linear(p1, ctx, parent=2) |
238 elif p1.rev() == p2.rev(): | 239 elif p1.rev() == p2.rev(): |
239 # In the wild, one can encounter such "non-merge" | 240 # In the wild, one can encounter such "non-merge" |
240 return _process_linear(p1, ctx) | 241 return _process_linear(p1, ctx) |
241 filescopies = computechangesetcopies(ctx) | 242 else: |
242 filesadded = computechangesetfilesadded(ctx) | 243 return _process_merge(p1, p2, ctx) |
243 filesremoved = computechangesetfilesremoved(ctx) | |
244 filesmerged = computechangesetfilesmerged(ctx) | |
245 files = ChangingFiles() | |
246 files.update_touched(ctx.files()) | |
247 files.update_added(filesadded) | |
248 files.update_removed(filesremoved) | |
249 files.update_merged(filesmerged) | |
250 files.update_copies_from_p1(filescopies[0]) | |
251 files.update_copies_from_p2(filescopies[1]) | |
252 return files | |
253 | 244 |
254 | 245 |
255 def _process_root(ctx): | 246 def _process_root(ctx): |
256 """compute the appropriate changed files for a changeset with no parents | 247 """compute the appropriate changed files for a changeset with no parents |
257 """ | 248 """ |
297 if copy_info: | 288 if copy_info: |
298 source, srcnode = copy_info | 289 source, srcnode = copy_info |
299 copied(source, filename) | 290 copied(source, filename) |
300 | 291 |
301 return md | 292 return md |
293 | |
294 | |
295 def _process_merge(p1_ctx, p2_ctx, ctx): | |
296 """compute the appropriate changed files for a changeset with two parents | |
297 | |
298 This is a more advance case. The information we need to record is summarise | |
299 in the following table: | |
300 | |
301 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐ | |
302 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │ | |
303 │ p2 ╲ p1 │ │ │ │ │ | |
304 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ | |
305 │ │ │🄱 No Changes │🄳 No Changes │ │ | |
306 │ ø │🄰 No Changes │ OR │ OR │🄵 No Changes │ | |
307 │ │ │🄲 Deleted[1] │🄴 Salvaged[2]│ [3] │ | |
308 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ | |
309 │ │🄶 No Changes │ │ │ │ | |
310 │ (Some, None) │ OR │🄻 Deleted │ ø │ ø │ | |
311 │ │🄷 Deleted[1] │ │ │ │ | |
312 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ | |
313 │ │🄸 No Changes │ │ │ │ | |
314 │ (None, Some) │ OR │ ø │🄼 Added │🄽 Merged │ | |
315 │ │🄹 Salvaged[2]│ │ (copied?) │ (copied?) │ | |
316 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ | |
317 │ │ │ │ │ │ | |
318 │ (Some, Some) │🄺 No Changes │ ø │🄾 Merged │🄿 Merged │ | |
319 │ │ [3] │ │ (copied?) │ (copied?) │ | |
320 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘ | |
321 | |
322 Special case [1]: | |
323 | |
324 The situation is: | |
325 - parent-A: file exists, | |
326 - parent-B: no file, | |
327 - working-copy: no file. | |
328 | |
329 Detecting a "deletion" will depend on the presence of actual change on | |
330 the "parent-A" branch: | |
331 | |
332 Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged | |
333 compared to the merge ancestors, then parent-A branch left the file | |
334 untouched while parent-B deleted it. We simply apply the change from | |
335 "parent-B" branch the file was automatically dropped. | |
336 The result is: | |
337 - file is not recorded as touched by the merge. | |
338 | |
339 Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and | |
340 the file was "deleted again". From a user perspective, the message | |
341 about "locally changed" while "remotely deleted" (or the other way | |
342 around) was issued and the user chose to deleted the file. | |
343 The result: | |
344 - file is recorded as touched by the merge. | |
345 | |
346 | |
347 Special case [2]: | |
348 | |
349 The situation is: | |
350 - parent-A: no file, | |
351 - parent-B: file, | |
352 - working-copy: file (same content as parent-B). | |
353 | |
354 There are three subcases depending on the ancestors contents: | |
355 | |
356 - A) the file is missing in all ancestors, | |
357 - B) at least one ancestor has the file with filenode ≠ from parent-B, | |
358 - C) all ancestors use the same filenode as parent-B, | |
359 | |
360 Subcase (A) is the simpler, nothing happend on parent-A side while | |
361 parent-B added it. | |
362 | |
363 The result: | |
364 - the file is not marked as touched by the merge. | |
365 | |
366 Subcase (B) is the counter part of "Special case [1]", the file was | |
367 modified on parent-B side, while parent-A side deleted it. However this | |
368 time, the conflict was solved by keeping the file (and its | |
369 modification). We consider the file as "salvaged". | |
370 | |
371 The result: | |
372 - the file is marked as "salvaged" by the merge. | |
373 | |
374 Subcase (C) is subtle variation of the case above. In this case, the | |
375 file in unchanged on the parent-B side and actively removed on the | |
376 parent-A side. So the merge machinery correctly decide it should be | |
377 removed. However, the file was explicitly restored to its parent-B | |
378 content before the merge was commited. The file is be marked | |
379 as salvaged too. From the merge result perspective, this is similar to | |
380 Subcase (B), however from the merge resolution perspective they differ | |
381 since in (C), there was some conflict not obvious solution to the | |
382 merge (That got reversed) | |
383 | |
384 Special case [3]: | |
385 | |
386 The situation is: | |
387 - parent-A: file, | |
388 - parent-B: file (different filenode as parent-A), | |
389 - working-copy: file (same filenode as parent-B). | |
390 | |
391 This case is in theory much simple, for this to happens, this mean the | |
392 filenode in parent-A is purely replacing the one in parent-B (either a | |
393 descendant, or a full new file history, see changeset). So the merge | |
394 introduce no changes, and the file is not affected by the merge... | |
395 | |
396 However, in the wild it is possible to find commit with the above is not | |
397 True. For example repository have some commit where the *new* node is an | |
398 ancestor of the node in parent-A, or where parent-A and parent-B are two | |
399 branches of the same file history, yet not merge-filenode were created | |
400 (while the "merge" should have led to a "modification"). | |
401 | |
402 Detecting such cases (and not recording the file as modified) would be a | |
403 nice bonus. However do not any of this yet. | |
404 """ | |
405 | |
406 md = ChangingFiles() | |
407 | |
408 m = ctx.manifest() | |
409 p1m = p1_ctx.manifest() | |
410 p2m = p2_ctx.manifest() | |
411 diff_p1 = p1m.diff(m) | |
412 diff_p2 = p2m.diff(m) | |
413 | |
414 cahs = ctx.repo().changelog.commonancestorsheads( | |
415 p1_ctx.node(), p2_ctx.node() | |
416 ) | |
417 if not cahs: | |
418 cahs = [node.nullrev] | |
419 mas = [ctx.repo()[r].manifest() for r in cahs] | |
420 | |
421 copy_candidates = [] | |
422 | |
423 # Dealing with case 🄰 happens automatically. Since there are no entry in | |
424 # d1 nor d2, we won't iterate on it ever. | |
425 | |
426 # Iteration over d1 content will deal with all cases, but the one in the | |
427 # first column of the table. | |
428 for filename, d1 in diff_p1.items(): | |
429 | |
430 d2 = diff_p2.pop(filename, None) | |
431 | |
432 if d2 is None: | |
433 # this deal with the first line of the table. | |
434 _process_other_unchanged(md, mas, filename, d1) | |
435 else: | |
436 | |
437 if d1[0][0] is None and d2[0][0] is None: | |
438 # case 🄼 — both deleted the file. | |
439 md.mark_added(filename) | |
440 copy_candidates.append(filename) | |
441 elif d1[1][0] is None and d2[1][0] is None: | |
442 # case 🄻 — both deleted the file. | |
443 md.mark_removed(filename) | |
444 elif d1[1][0] is not None and d2[1][0] is not None: | |
445 # case 🄽 🄾 🄿 | |
446 md.mark_merged(filename) | |
447 copy_candidates.append(filename) | |
448 else: | |
449 # Impossible case, the post-merge file status cannot be None on | |
450 # one side and Something on the other side. | |
451 assert False, "unreachable" | |
452 | |
453 # Iteration over remaining d2 content deal with the first column of the | |
454 # table. | |
455 for filename, d2 in diff_p2.items(): | |
456 _process_other_unchanged(md, mas, filename, d2) | |
457 | |
458 for filename in copy_candidates: | |
459 copy_info = ctx[filename].renamed() | |
460 if copy_info: | |
461 source, srcnode = copy_info | |
462 if source in p1_ctx and p1_ctx[source].filenode() == srcnode: | |
463 md.mark_copied_from_p1(source, filename) | |
464 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode: | |
465 md.mark_copied_from_p2(source, filename) | |
466 return md | |
467 | |
468 | |
469 def _find(manifest, filename): | |
470 """return the associate filenode or None""" | |
471 if filename not in manifest: | |
472 return None | |
473 return manifest.find(filename)[0] | |
474 | |
475 | |
476 def _process_other_unchanged(md, mas, filename, diff): | |
477 source_node = diff[0][0] | |
478 target_node = diff[1][0] | |
479 | |
480 if source_node is not None and target_node is None: | |
481 if any(not _find(ma, filename) == source_node for ma in mas): | |
482 # case 🄲 of 🄷 | |
483 md.mark_removed(filename) | |
484 # else, we have case 🄱 or 🄶 : no change need to be recorded | |
485 elif source_node is None and target_node is not None: | |
486 if any(_find(ma, filename) is not None for ma in mas): | |
487 # case 🄴 or 🄹 | |
488 md.mark_salvaged(filename) | |
489 # else, we have case 🄳 or 🄸 : simple merge without intervention | |
490 elif source_node is not None and target_node is not None: | |
491 # case 🄵 or 🄺 : simple merge without intervention | |
492 # | |
493 # In buggy case where source_node is not an ancestors of target_node. | |
494 # There should have a been a new filenode created, recording this as | |
495 # "modified". We do not deal with them yet. | |
496 pass | |
497 else: | |
498 # An impossible case, the diff algorithm should not return entry if the | |
499 # file is missing on both side. | |
500 assert False, "unreachable" | |
501 | |
502 | |
503 def _missing_from_all_ancestors(mas, filename): | |
504 return all(_find(ma, filename) is None for ma in mas) | |
302 | 505 |
303 | 506 |
304 def computechangesetfilesadded(ctx): | 507 def computechangesetfilesadded(ctx): |
305 """return the list of files added in a changeset | 508 """return the list of files added in a changeset |
306 """ | 509 """ |