changeset 52081:a021da4ec509

merge: add a config to allow conflict-free merge of changes on adjacent lines This change adds a config to make it no longer a conflict to merge changes made on adjacent lines. The reason these changes are considered a conflict is that there's no region of text at the relevant position (sync region) that's kept unchanged by both sides of the merge. The problem can be solved by making the sync regions being a bit more powerful: we can keep a 0-length sync region if we find that a block unchanged by one side is ajacent to a block unchanged by the other side. Since these 0-length sync regions are emitted using the ~same algorithm as the normal non-empty sync regions, this change involves no arbitrary decisions and I expect it to work pretty well. 0-length sync regions do create an ambiguity in a special case where two pairs of adjacent regions "meet" at the same point. This corresponds to an insertion made at the same place by the two sides of the merge, and this still results in a conflict.
author Arseniy Alekseyev <aalekseyev@janestreet.com
date Thu, 24 Oct 2024 17:35:53 +0200
parents 513b413702e8
children 1b4e96420a3c
files mercurial/configitems.toml mercurial/debugcommands.py mercurial/filemerge.py mercurial/simplemerge.py tests/test-merge-relaxed-block-sync.t
diffstat 5 files changed, 264 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/configitems.toml	Wed Oct 23 17:08:57 2024 -0400
+++ b/mercurial/configitems.toml	Thu Oct 24 17:35:53 2024 +0200
@@ -1530,6 +1530,24 @@
 default = false
 
 [[items]]
+section = "experimental"
+name = "relaxed-block-sync-merge"
+default = false
+documentation="""When using built-in simple merge tools, this config makes it so that changes
+touching adjacent file regions no longer conflict with each other.
+
+In particular, addition/modification/removal adjacent to modification/removal
+are all allowed with no conflict.
+
+Addition next to addition is still treated as a conflict because it presents
+a legitimate ambiguity.
+
+The change tweaks existing logic for aligning file changes, making it so
+that a 0-length spacing between regions is just as good as a 1-line spacing.
+(default: False)
+"""
+
+[[items]]
 section = "merge-tools"
 name = ".*"
 generic = true
--- a/mercurial/debugcommands.py	Wed Oct 23 17:08:57 2024 -0400
+++ b/mercurial/debugcommands.py	Thu Oct 24 17:35:53 2024 +0200
@@ -255,6 +255,10 @@
     progress = ui.makeprogress(
         _(b'building'), unit=_(b'revisions'), total=total
     )
+    merge_relaxed_sync = ui.configbool(
+        b'experimental',
+        b'relaxed-block-sync-merge',
+    )
     with progress, repo.wlock(), repo.lock(), repo.transaction(b"builddag"):
         at = -1
         atbranch = b'default'
@@ -279,7 +283,12 @@
                         base, local, other = [
                             x[fn].data() for x in (pa, p1, p2)
                         ]
-                        m3 = simplemerge.Merge3Text(base, local, other)
+                        m3 = simplemerge.Merge3Text(
+                            base,
+                            local,
+                            other,
+                            relaxed_sync=merge_relaxed_sync,
+                        )
                         ml = [
                             l.strip()
                             for l in simplemerge.render_minimized(m3)[0]
--- a/mercurial/filemerge.py	Wed Oct 23 17:08:57 2024 -0400
+++ b/mercurial/filemerge.py	Thu Oct 24 17:35:53 2024 +0200
@@ -481,6 +481,8 @@
     suppresses the markers."""
     ui = repo.ui
 
+    relaxed_sync = ui.configbool(b'experimental', b'relaxed-block-sync-merge')
+
     try:
         _verifytext(local, ui)
         _verifytext(base, ui)
@@ -489,7 +491,11 @@
         return True, True, False
     else:
         merged_text, conflicts = simplemerge.simplemerge(
-            local, base, other, mode=mode
+            local,
+            base,
+            other,
+            mode=mode,
+            relaxed_sync=relaxed_sync,
         )
         # fcd.flags() already has the merged flags (done in
         # mergestate.resolve())
--- a/mercurial/simplemerge.py	Wed Oct 23 17:08:57 2024 -0400
+++ b/mercurial/simplemerge.py	Thu Oct 24 17:35:53 2024 +0200
@@ -49,6 +49,30 @@
         return None
 
 
+def intersect_or_touch(ra, rb):
+    """Given two ranges return the range where they intersect or touch or None.
+
+    >>> intersect_or_touch((0, 10), (0, 6))
+    (0, 6)
+    >>> intersect_or_touch((0, 10), (5, 15))
+    (5, 10)
+    >>> intersect_or_touch((0, 10), (10, 15))
+    (10, 10)
+    >>> intersect_or_touch((0, 9), (10, 15))
+    >>> intersect_or_touch((0, 9), (7, 15))
+    (7, 9)
+    """
+    assert ra[0] <= ra[1]
+    assert rb[0] <= rb[1]
+
+    sa = max(ra[0], rb[0])
+    sb = min(ra[1], rb[1])
+    if sa <= sb:
+        return sa, sb
+    else:
+        return None
+
+
 def compare_range(a, astart, aend, b, bstart, bend):
     """Compare a[astart:aend] == b[bstart:bend], without slicing."""
     if (aend - astart) != (bend - bstart):
@@ -66,7 +90,16 @@
     Given strings BASE, OTHER, THIS, tries to produce a combined text
     incorporating the changes from both BASE->OTHER and BASE->THIS."""
 
-    def __init__(self, basetext, atext, btext, base=None, a=None, b=None):
+    def __init__(
+        self,
+        basetext,
+        atext,
+        btext,
+        base=None,
+        a=None,
+        b=None,
+        relaxed_sync=False,
+    ):
         self.basetext = basetext
         self.atext = atext
         self.btext = btext
@@ -76,6 +109,7 @@
             a = mdiff.splitnewlines(atext)
         if b is None:
             b = mdiff.splitnewlines(btext)
+        self.relaxed_sync = relaxed_sync
         self.base = base
         self.a = a
         self.b = b
@@ -220,6 +254,11 @@
         len_a = len(amatches)
         len_b = len(bmatches)
 
+        if self.relaxed_sync:
+            intersect_fun = intersect_or_touch
+        else:
+            intersect_fun = intersect
+
         sl = []
 
         while ia < len_a and ib < len_b:
@@ -228,7 +267,7 @@
 
             # there is an unconflicted block at i; how long does it
             # extend?  until whichever one ends earlier.
-            i = intersect((abase, abase + alen), (bbase, bbase + blen))
+            i = intersect_fun((abase, abase + alen), (bbase, bbase + blen))
             if i:
                 intbase = i[0]
                 intend = i[1]
@@ -258,13 +297,41 @@
             # advance whichever one ends first in the base text
             if (abase + alen) < (bbase + blen):
                 ia += 1
+            elif not self.relaxed_sync:
+                # if the blocks end at the same time we know they can't overlap
+                # any other block, so no need for the complicated checks below
+                ib += 1
+            elif (abase + alen) > (bbase + blen):
+                ib += 1
             else:
-                ib += 1
+                # If both end at the same time, either may touching the
+                # follow-up matching block on the other side.
+                # Advance the one whose next block comes sooner.
+                if ia + 1 == len_a:
+                    # if we run out of blocks on A side, we may as well advance B
+                    # since there's nothing on A side for that to touch
+                    ib += 1
+                elif ib + 1 == len_b:
+                    ia += 1
+                elif amatches[ia + 1][0] > bmatches[ib + 1][0]:
+                    ib += 1
+                elif amatches[ia + 1][0] < bmatches[ib + 1][0]:
+                    ia += 1
+                else:
+                    # symmetric situation: both sides added lines to the same place
+                    # it's less surprising if we treat it as a conflict, so skip
+                    # both without a preferred order
+                    ia += 1
+                    ib += 1
 
         intbase = len(self.base)
         abase = len(self.a)
         bbase = len(self.b)
-        sl.append((intbase, intbase, abase, abase, bbase, bbase))
+        sentinel_hunk = (intbase, intbase, abase, abase, bbase, bbase)
+        # we avoid duplicate sentinel hunk at the end to make the
+        # test output cleaner
+        if not (sl and sl[len(sl) - 1] == sentinel_hunk):
+            sl.append(sentinel_hunk)
 
         return sl
 
@@ -498,6 +565,7 @@
     other,
     mode=b'merge',
     allow_binary=False,
+    relaxed_sync=False,
 ):
     """Performs the simplemerge algorithm.
 
@@ -509,7 +577,9 @@
         _verifytext(base)
         _verifytext(other)
 
-    m3 = Merge3Text(base.text(), local.text(), other.text())
+    m3 = Merge3Text(
+        base.text(), local.text(), other.text(), relaxed_sync=relaxed_sync
+    )
     conflicts = False
     if mode == b'union':
         lines = _resolve(m3, (1, 2))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-merge-relaxed-block-sync.t	Thu Oct 24 17:35:53 2024 +0200
@@ -0,0 +1,154 @@
+==============================================
+Test merge algorithm with "relaxed block sync"
+==============================================
+
+Setup
+=====
+
+  $ cat >> $HGRCPATH << EOF
+  > [experimental]
+  > relaxed-block-sync-merge=yes
+  > [ui]
+  > merge=:merge3
+  > EOF
+  $ unset HGMERGE
+
+  $ hg init repo
+  $ cd repo
+
+  $ m=../scratch
+  $ mkdir "$m"
+
+# For the purpose of this test, we use a file [listing] that has one line
+# per file of [scratch] directory.
+# This way, the patches can be represented as bash scripts.
+#
+# Adding a line is then just "touch", removing a line is "rm", and
+# modifying a line is "echo modfied > file1".
+
+# Make_change takes a "patch script", as described above, and
+# produces a file [listing] with the coresponding contents
+# past applying the patch to a fixed base state.
+  $ make_change() {
+  >   cmd=$1
+  >    rm -r ../scratch
+  >    mkdir ../scratch
+  >    (cat listing 2>/dev/null || true) | while IFS=' :' read k v; do echo "$v" > ../scratch/"$k"; done
+  >   
+  >   (
+  >    cd ../scratch
+  >    eval "$cmd" >&2
+  >    for f in *; do val=$(cat "$f"); printf "$f: $val\n"; done) > listing
+  > }
+
+# mk_rev takes a [base] and a patch, and produces a child revision of [base]
+# corresponding to that patch.
+  $ mk_rev() {
+  >   base=$1
+  >   cmd=$2
+  >   (hg update -C "$base" -q
+  >   make_change "$cmd"
+  >   (hg commit -qAm _ 2>&1) | grep -v 'commit already existed') >&2
+  >   hg log -r . -T '{rev}'
+  > }
+
+  $ test() {
+  >   cmd1=$1
+  >   cmd2=$2
+  >   r2=$(mk_rev 0 "$cmd2")
+  >   r1=$(mk_rev 0 "$cmd1")
+  >   # already at r1
+  >   hg merge -q "$r2"
+  >   cat listing
+  > }
+
+  $ rev0=$(mk_rev 'rev(-1)' 'echo val1 > key1; echo val2 > key2; echo val3 > key3; ')
+  $ cat listing
+  key1: val1
+  key2: val2
+  key3: val3
+
+Actual testing
+==============
+
+easy merge: no need for relaxed block sync:
+-------------------------------------------
+
+  $ test 'echo modified1 > key1' 'echo modified3 > key3'
+  key1: modified1
+  key2: val2
+  key3: modified3
+
+Add adjacent to modify:
+-----------------------
+
+  $ test 'echo modified > key3' 'echo val4 > key4'
+  key1: val1
+  key2: val2
+  key3: modified
+  key4: val4
+
+Modify adjacent to modify:
+--------------------------
+
+  $ test 'echo modified3 > key3' 'echo modified2 > key2'
+  key1: val1
+  key2: modified2
+  key3: modified3
+
+Remove adjacent to modify:
+--------------------------
+
+  $ test 'rm key2' 'echo modified > key1'
+  key1: modified
+  key3: val3
+
+Add adjacent to remove:
+-----------------------
+
+  $ test 'rm key2' 'touch key1a'
+  key1: val1
+  key1a: 
+  key3: val3
+
+Remove adjacent to remove:
+--------------------------
+
+  $ test 'rm key2' 'rm key1'
+  key3: val3
+
+It even works if you're sandwiched between additions above and below:
+
+  $ test 'echo val-changed-3 > key3' 'touch key2a; touch key4'
+  key1: val1
+  key2: val2
+  key2a: 
+  key3: val-changed-3
+  key4: 
+
+Add adjacent to add:
+--------------------
+
+Add adjacent to add is still disallowed because we don't  know what order to add
+lines in:
+
+  $ test 'touch key1a' 'touch key1b'
+  warning: conflicts while merging listing! (edit, then use 'hg resolve --mark')
+  key1: val1
+  <<<<<<< working copy:    744662bcc33a - test: _
+  key1a: 
+  ||||||| common ancestor: b1791e356cd4 - test: _
+  =======
+  key1b: 
+  >>>>>>> merge rev:       06735b47f956 - test: _
+  key2: val2
+  key3: val3
+
+Add kinda-adjacent to add can still work if there's an
+adjacent line that helps resolve the order ambiguity:
+
+  $ test 'touch key1a; rm key2' 'touch key2a'
+  key1: val1
+  key1a: 
+  key2a: 
+  key3: val3