rebase: rewrite core algorithm (issue5578) (issue5630)
authorJun Wu <quark@fb.com>
Thu, 10 Aug 2017 21:30:31 -0700
changeset 33807 0975506120fb
parent 33806 f7d6978a4da9
child 33808 fa3aa6c98bb7
rebase: rewrite core algorithm (issue5578) (issue5630) "defineparents" is the core algorithm of rebase. The old code has too many tech debts (like outdated comments, confusing assertions, etc) and is very error-prone to be improved. This patch rewrites "defineparents" to make the code easier to reason about, and solve a bunch of issues, including: - Assertion error: no base found (demonstrated by D212, issue5578) - Asymmetric result (demonstrated by D211, "F with one parent") - Wrong new parent (demonstrated by D262, "C':A'A'") - "revlog index out of range" (demonstrated by D262, issue5630) - "nothing to merge" (demonstrated by D262) As a side effect, merge base decision has been made more solid - rebase now prints out explicitly what could go wrong when it cannot find a unique suitable merge base. .. fix:: Issue 5578, Issue 5630 Core rebase algorithm has been rewritten to be more robust. Differential Revision: https://phab.mercurial-scm.org/D21
hgext/rebase.py
tests/test-rebase-brute-force.t
tests/test-rebase-newancestor.t
tests/test-rebase-obsolete.t
--- a/hgext/rebase.py	Sat Aug 12 21:40:48 2017 -0700
+++ b/hgext/rebase.py	Thu Aug 10 21:30:31 2017 -0700
@@ -66,7 +66,6 @@
 revprecursor = -4
 # plain prune (no successor)
 revpruned = -5
-revskipped = (revignored, revprecursor, revpruned)
 
 cmdtable = {}
 command = registrar.command(cmdtable)
@@ -390,10 +389,7 @@
                 ui.status(_('rebasing %s\n') % desc)
                 ui.progress(_("rebasing"), pos, ("%d:%s" % (rev, ctx)),
                             _('changesets'), total)
-                p1, p2, base = defineparents(repo, rev, self.dest,
-                                             self.state,
-                                             self.destancestors,
-                                             self.obsoletenotrebased)
+                p1, p2, base = defineparents(repo, rev, self.dest, self.state)
                 self.storestatus(tr=tr)
                 storecollapsemsg(repo, self.collapsemsg)
                 if len(repo[None].parents()) == 2:
@@ -463,9 +459,7 @@
         repo, ui, opts = self.repo, self.ui, self.opts
         if self.collapsef and not self.keepopen:
             p1, p2, _base = defineparents(repo, min(self.state),
-                                          self.dest, self.state,
-                                          self.destancestors,
-                                          self.obsoletenotrebased)
+                                          self.dest, self.state)
             editopt = opts.get('edit')
             editform = 'rebase.collapse'
             if self.collapsemsg:
@@ -960,15 +954,6 @@
         result.append(adjusted)
     return result
 
-def nearestrebased(repo, rev, state):
-    """return the nearest ancestors of rev in the rebase result"""
-    rebased = [r for r in state if state[r] > nullmerge]
-    candidates = repo.revs('max(%ld  and (::%d))', rebased, rev)
-    if candidates:
-        return state[candidates.first()]
-    else:
-        return None
-
 def _checkobsrebase(repo, ui, rebaseobsrevs, rebasesetrevs, rebaseobsskipped):
     """
     Abort if rebase will create divergence or rebase is noop because of markers
@@ -992,107 +977,173 @@
               "experimental.allowdivergence=True")
         raise error.Abort(msg % (",".join(divhashes),), hint=h)
 
-def defineparents(repo, rev, dest, state, destancestors,
-                  obsoletenotrebased):
-    'Return the new parent relationship of the revision that will be rebased'
-    parents = repo[rev].parents()
-    p1 = p2 = nullrev
-    rp1 = None
+def successorrevs(repo, rev):
+    """yield revision numbers for successors of rev"""
+    unfi = repo.unfiltered()
+    nodemap = unfi.changelog.nodemap
+    for s in obsutil.allsuccessors(unfi.obsstore, [unfi[rev].node()]):
+        if s in nodemap:
+            yield nodemap[s]
+
+def defineparents(repo, rev, dest, state):
+    """Return new parents and optionally a merge base for rev being rebased
+
+    The destination specified by "dest" cannot always be used directly because
+    previously rebase result could affect destination. For example,
 
-    p1n = parents[0].rev()
-    if p1n in destancestors:
-        p1 = dest
-    elif p1n in state:
-        if state[p1n] == nullmerge:
-            p1 = dest
-        elif state[p1n] in revskipped:
-            p1 = nearestrebased(repo, p1n, state)
-            if p1 is None:
-                p1 = dest
-        else:
-            p1 = state[p1n]
-    else: # p1n external
-        p1 = dest
-        p2 = p1n
+          D E    rebase -r C+D+E -d B
+          |/     C will be rebased to C'
+        B C      D's new destination will be C' instead of B
+        |/       E's new destination will be C' instead of B
+        A
+
+    The new parents of a merge is slightly more complicated. See the comment
+    block below.
+    """
+    cl = repo.changelog
+    def isancestor(a, b):
+        # take revision numbers instead of nodes
+        if a == b:
+            return True
+        elif a > b:
+            return False
+        return cl.isancestor(cl.node(a), cl.node(b))
+
+    oldps = repo.changelog.parentrevs(rev) # old parents
+    newps = [nullrev, nullrev] # new parents
+    dests = adjustdest(repo, rev, dest, state) # adjusted destinations
+    bases = list(oldps) # merge base candidates, initially just old parents
 
-    if len(parents) == 2 and parents[1].rev() not in destancestors:
-        p2n = parents[1].rev()
-        # interesting second parent
-        if p2n in state:
-            if p1 == dest: # p1n in destancestors or external
-                p1 = state[p2n]
-                if p1 == revprecursor:
-                    rp1 = obsoletenotrebased[p2n]
-            elif state[p2n] in revskipped:
-                p2 = nearestrebased(repo, p2n, state)
-                if p2 is None:
-                    # no ancestors rebased yet, detach
-                    p2 = dest
-            else:
-                p2 = state[p2n]
-        else: # p2n external
-            if p2 != nullrev: # p1n external too => rev is a merged revision
-                raise error.Abort(_('cannot use revision %d as base, result '
-                        'would have 3 parents') % rev)
-            p2 = p2n
-    repo.ui.debug(" future parents are %d and %d\n" %
-                            (repo[rp1 or p1].rev(), repo[p2].rev()))
+    if all(r == nullrev for r in oldps[1:]):
+        # For non-merge changeset, just move p to adjusted dest as requested.
+        newps[0] = dests[0]
+    else:
+        # For merge changeset, if we move p to dests[i] unconditionally, both
+        # parents may change and the end result looks like "the merge loses a
+        # parent", which is a surprise. This is a limit because "--dest" only
+        # accepts one dest per src.
+        #
+        # Therefore, only move p with reasonable conditions (in this order):
+        #   1. use dest, if dest is a descendent of (p or one of p's successors)
+        #   2. use p's rebased result, if p is rebased (state[p] > 0)
+        #
+        # Comparing with adjustdest, the logic here does some additional work:
+        #   1. decide which parents will not be moved towards dest
+        #   2. if the above decision is "no", should a parent still be moved
+        #      because it was rebased?
+        #
+        # For example:
+        #
+        #     C    # "rebase -r C -d D" is an error since none of the parents
+        #    /|    # can be moved. "rebase -r B+C -d D" will move C's parent
+        #   A B D  # B (using rule "2."), since B will be rebased.
+        #
+        # The loop tries to be not rely on the fact that a Mercurial node has
+        # at most 2 parents.
+        for i, p in enumerate(oldps):
+            np = p # new parent
+            if any(isancestor(x, dests[i]) for x in successorrevs(repo, p)):
+                np = dests[i]
+            elif p in state and state[p] > 0:
+                np = state[p]
 
-    if not any(p.rev() in state for p in parents):
-        # Case (1) root changeset of a non-detaching rebase set.
-        # Let the merge mechanism find the base itself.
+            # "bases" only record "special" merge bases that cannot be
+            # calculated from changelog DAG (i.e. isancestor(p, np) is False).
+            # For example:
+            #
+            #   B'   # rebase -s B -d D, when B was rebased to B'. dest for C
+            #   | C  # is B', but merge base for C is B, instead of
+            #   D |  # changelog.ancestor(C, B') == A. If changelog DAG and
+            #   | B  # "state" edges are merged (so there will be an edge from
+            #   |/   # B to B'), the merge base is still ancestor(C, B') in
+            #   A    # the merged graph.
+            #
+            # Also see https://bz.mercurial-scm.org/show_bug.cgi?id=1950#c8
+            # which uses "virtual null merge" to explain this situation.
+            if isancestor(p, np):
+                bases[i] = nullrev
+
+            # If one parent becomes an ancestor of the other, drop the ancestor
+            for j, x in enumerate(newps[:i]):
+                if x == nullrev:
+                    continue
+                if isancestor(np, x):
+                    np = nullrev
+                elif isancestor(x, np):
+                    newps[j] = np
+                    np = nullrev
+                    bases[j], bases[i] = bases[i], bases[j]
+
+            newps[i] = np
+
+        # "rebasenode" updates to new p1, and the old p1 will be used as merge
+        # base. If only p2 changes, merging using unchanged p1 as merge base is
+        # suboptimal. Therefore swap parents to make the merge sane.
+        if newps[1] != nullrev and oldps[0] == newps[0]:
+            assert len(newps) == 2 and len(oldps) == 2
+            newps.reverse()
+            bases.reverse()
+
+        # No parent change might be an error because we fail to make rev a
+        # descendent of requested dest. This can happen, for example:
+        #
+        #     C    # rebase -r C -d D
+        #    /|    # None of A and B will be changed to D and rebase fails.
+        #   A B D
+        if set(newps) == set(oldps) and dest not in newps:
+            # The error message is for compatibility. It's a bit misleading
+            # since rebase is not supposed to add new parents.
+            raise error.Abort(_('cannot use revision %d as base, '
+                                'result would have 3 parents') % rev)
+
+    repo.ui.debug(" future parents are %d and %d\n" % tuple(newps))
+
+    # "rebasenode" updates to new p1, use the corresponding merge base.
+    if bases[0] != nullrev:
+        base = bases[0]
+    else:
         base = None
-    elif not repo[rev].p2():
-        # Case (2) detaching the node with a single parent, use this parent
-        base = repo[rev].p1().rev()
-    else:
-        # Assuming there is a p1, this is the case where there also is a p2.
-        # We are thus rebasing a merge and need to pick the right merge base.
-        #
-        # Imagine we have:
-        # - M: current rebase revision in this step
-        # - A: one parent of M
-        # - B: other parent of M
-        # - D: destination of this merge step (p1 var)
-        #
-        # Consider the case where D is a descendant of A or B and the other is
-        # 'outside'. In this case, the right merge base is the D ancestor.
-        #
-        # An informal proof, assuming A is 'outside' and B is the D ancestor:
-        #
-        # If we pick B as the base, the merge involves:
-        # - changes from B to M (actual changeset payload)
-        # - changes from B to D (induced by rebase) as D is a rebased
-        #   version of B)
-        # Which exactly represent the rebase operation.
-        #
-        # If we pick A as the base, the merge involves:
-        # - changes from A to M (actual changeset payload)
-        # - changes from A to D (with include changes between unrelated A and B
-        #   plus changes induced by rebase)
-        # Which does not represent anything sensible and creates a lot of
-        # conflicts. A is thus not the right choice - B is.
-        #
-        # Note: The base found in this 'proof' is only correct in the specified
-        # case. This base does not make sense if is not D a descendant of A or B
-        # or if the other is not parent 'outside' (especially not if the other
-        # parent has been rebased). The current implementation does not
-        # make it feasible to consider different cases separately. In these
-        # other cases we currently just leave it to the user to correctly
-        # resolve an impossible merge using a wrong ancestor.
-        #
-        # xx, p1 could be -4, and both parents could probably be -4...
-        for p in repo[rev].parents():
-            if state.get(p.rev()) == p1:
-                base = p.rev()
-                break
-        else: # fallback when base not found
-            base = None
+
+    # Check if the merge will contain unwanted changes. That may happen if
+    # there are multiple special (non-changelog ancestor) merge bases, which
+    # cannot be handled well by the 3-way merge algorithm. For example:
+    #
+    #     F
+    #    /|
+    #   D E  # "rebase -r D+E+F -d Z", when rebasing F, if "D" was chosen
+    #   | |  # as merge base, the difference between D and F will include
+    #   B C  # C, so the rebased F will contain C surprisingly. If "E" was
+    #   |/   #  chosen, the rebased F will contain B.
+    #   A Z
+    #
+    # But our merge base candidates (D and E in above case) could still be
+    # better than the default (ancestor(F, Z) == null). Therefore still
+    # pick one (so choose p1 above).
+    if sum(1 for b in bases if b != nullrev) > 1:
+        assert base is not None
 
-            # Raise because this function is called wrong (see issue 4106)
-            raise AssertionError('no base found to rebase on '
-                                 '(defineparents called wrong)')
-    return rp1 or p1, p2, base
+        # Revisions in the side (not chosen as merge base) branch that might
+        # contain "surprising" contents
+        siderevs = list(repo.revs('((%ld-%d) %% (%d+%d))',
+                                  bases, base, base, dest))
+
+        # If those revisions are covered by rebaseset, the result is good.
+        # A merge in rebaseset would be considered to cover its ancestors.
+        if siderevs:
+            rebaseset = [r for r, d in state.items() if d > 0]
+            merges = [r for r in rebaseset if cl.parentrevs(r)[1] != nullrev]
+            unwantedrevs = list(repo.revs('%ld - (::%ld) - %ld',
+                                          siderevs, merges, rebaseset))
+
+            # For revs not covered, it is worth a warning.
+            if unwantedrevs:
+                repo.ui.warn(
+                    _('warning: rebasing %d:%s may include unwanted changes '
+                      'from %s\n')
+                    % (rev, repo[rev], ', '.join('%d:%s' % (r, repo[r])
+                                                 for r in unwantedrevs)))
+
+    return newps[0], newps[1], base
 
 def isagitpatch(repo, patchname):
     'Return true if the given patch is in git format'
--- a/tests/test-rebase-brute-force.t	Sat Aug 12 21:40:48 2017 -0700
+++ b/tests/test-rebase-brute-force.t	Thu Aug 10 21:30:31 2017 -0700
@@ -31,8 +31,8 @@
     AD: A':Z D':Z
     BD: B':Z D':B'
    ABD: A':Z B':Z D':B'
-    CD: CRASH: revlog index out of range
-   ACD: A':Z C':A'A' D':Z
+    CD: ABORT: cannot use revision 3 as base, result would have 3 parents
+   ACD: A':Z C':A'B D':Z
    BCD: B':Z C':B'A D':B'
   ABCD: A':Z B':Z C':A'B' D':B'
 
@@ -52,4 +52,4 @@
     C: ABORT: cannot use revision 3 as base, result would have 3 parents
    BC: B':Z C':B'A
    AC: 
-  BAC: ABORT: nothing to merge
+  BAC: B':Z C':B'A
--- a/tests/test-rebase-newancestor.t	Sat Aug 12 21:40:48 2017 -0700
+++ b/tests/test-rebase-newancestor.t	Thu Aug 10 21:30:31 2017 -0700
@@ -3,7 +3,7 @@
   > usegeneraldelta=yes
   > [extensions]
   > rebase=
-  > 
+  > drawdag=$TESTDIR/drawdag.py
   > [alias]
   > tglog = log -G --template "{rev}: '{desc}' {branches}\n"
   > EOF
@@ -334,3 +334,101 @@
   |/
   o  0: 'common'
   
+Due to the limitation of 3-way merge algorithm (1 merge base), rebasing a merge
+may include unwanted content:
+
+  $ hg init $TESTTMP/dual-merge-base1
+  $ cd $TESTTMP/dual-merge-base1
+  $ hg debugdrawdag <<'EOS'
+  >   F
+  >  /|
+  > D E
+  > | |
+  > B C
+  > |/
+  > A Z
+  > |/
+  > R
+  > EOS
+  $ hg rebase -r D+E+F -d Z
+  rebasing 5:5f2c926dfecf "D" (D)
+  rebasing 6:b296604d9846 "E" (E)
+  rebasing 7:caa9781e507d "F" (F tip)
+  warning: rebasing 7:caa9781e507d may include unwanted changes from 4:d6003a550c2c
+  saved backup bundle to $TESTTMP/dual-merge-base1/.hg/strip-backup/b296604d9846-0516f6d2-rebase.hg (glob)
+  $ hg log -r 4 -T '{files}\n'
+  C
+  $ hg manifest -r 'desc(F)'
+  C
+  D
+  E
+  R
+  Z
+
+The warning does not get printed if there is no unwanted change detected:
+
+  $ hg init $TESTTMP/dual-merge-base2
+  $ cd $TESTTMP/dual-merge-base2
+  $ hg debugdrawdag <<'EOS'
+  >   D
+  >  /|
+  > B C
+  > |/
+  > A Z
+  > |/
+  > R
+  > EOS
+  $ hg rebase -r B+C+D -d Z
+  rebasing 3:c1e6b162678d "B" (B)
+  rebasing 4:d6003a550c2c "C" (C)
+  rebasing 5:c8f78076273e "D" (D tip)
+  saved backup bundle to $TESTTMP/dual-merge-base2/.hg/strip-backup/d6003a550c2c-6f1424b6-rebase.hg (glob)
+  $ hg manifest -r 'desc(D)'
+  B
+  C
+  R
+  Z
+
+The merge base could be different from old p1 (changed parent becomes new p1):
+
+  $ hg init $TESTTMP/chosen-merge-base1
+  $ cd $TESTTMP/chosen-merge-base1
+  $ hg debugdrawdag <<'EOS'
+  >   F
+  >  /|
+  > D E
+  > | |
+  > B C Z
+  > EOS
+  $ hg rebase -r D+F -d Z
+  rebasing 3:004dc1679908 "D" (D)
+  rebasing 5:4be4cbf6f206 "F" (F tip)
+  saved backup bundle to $TESTTMP/chosen-merge-base1/.hg/strip-backup/004dc1679908-06a66a3c-rebase.hg (glob)
+  $ hg manifest -r 'desc(F)'
+  C
+  D
+  E
+  Z
+  $ hg log -r `hg log -r 'desc(F)' -T '{p1node}'` -T '{desc}\n'
+  D
+
+  $ hg init $TESTTMP/chosen-merge-base2
+  $ cd $TESTTMP/chosen-merge-base2
+  $ hg debugdrawdag <<'EOS'
+  >   F
+  >  /|
+  > D E
+  > | |
+  > B C Z
+  > EOS
+  $ hg rebase -r E+F -d Z
+  rebasing 4:974e4943c210 "E" (E)
+  rebasing 5:4be4cbf6f206 "F" (F tip)
+  saved backup bundle to $TESTTMP/chosen-merge-base2/.hg/strip-backup/974e4943c210-b2874da5-rebase.hg (glob)
+  $ hg manifest -r 'desc(F)'
+  B
+  D
+  E
+  Z
+  $ hg log -r `hg log -r 'desc(F)' -T '{p1node}'` -T '{desc}\n'
+  E
--- a/tests/test-rebase-obsolete.t	Sat Aug 12 21:40:48 2017 -0700
+++ b/tests/test-rebase-obsolete.t	Thu Aug 10 21:30:31 2017 -0700
@@ -488,9 +488,14 @@
   >   A
   > EOF
 
-BROKEN: This raises an exception
-  $ hg rebase -d G -r 'B + D + F' 2>&1 | grep '^AssertionError'
-  AssertionError: no base found to rebase on (defineparents called wrong)
+  $ hg rebase -d G -r 'B + D + F'
+  rebasing 1:112478962961 "B" (B)
+  rebasing 2:b18e25de2cf5 "D" (D)
+  not rebasing ignored 4:26805aba1e60 "C" (C)
+  not rebasing ignored 5:4b61ff5c62e2 "E" (E)
+  rebasing 6:f15c3adaf214 "F" (F tip)
+  abort: cannot use revision 6 as base, result would have 3 parents
+  [255]
 
   $ cd ..
 
@@ -962,17 +967,19 @@
   >   A
   > EOF
 
-BROKEN: Raises an exception
-  $ hg rebase -d B -s E 2>&1 | grep AssertionError:
-  AssertionError: no base found to rebase on (defineparents called wrong)
+  $ hg rebase -d B -s E
+  note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B"
+  rebasing 4:66f1a38021c9 "F" (F tip)
   $ hg log -G
-  o    4:66f1a38021c9 F
+  o    5:aae1787dacee F
   |\
-  | x  3:7fb047a69f22 E
-  | |
-  o |  2:b18e25de2cf5 D
-  |/
-  | o  1:112478962961 B
+  | | x  4:66f1a38021c9 F
+  | |/|
+  | | x  3:7fb047a69f22 E
+  | | |
+  | o |  2:b18e25de2cf5 D
+  | |/
+  o /  1:112478962961 B
   |/
   o  0:426bada5c675 A
   
@@ -994,19 +1001,19 @@
   $ hg rebase -d C -s D
   note: not rebasing 2:b18e25de2cf5 "D" (D), already in destination as 1:112478962961 "B"
   rebasing 5:66f1a38021c9 "F" (F tip)
-BROKEN: not rebased on top of requested destination (C)
+
   $ hg log -G
-  o    6:50e9d60b99c6 F
+  o    6:0913febf6439 F
   |\
-  | | x  5:66f1a38021c9 F
-  | |/|
-  +-----o  4:26805aba1e60 C
+  +---x  5:66f1a38021c9 F
+  | | |
+  | o |  4:26805aba1e60 C
   | | |
-  | o |  3:7fb047a69f22 E
+  o | |  3:7fb047a69f22 E
   | | |
-  | | x  2:b18e25de2cf5 D
-  | |/
-  o |  1:112478962961 B
+  +---x  2:b18e25de2cf5 D
+  | |
+  | o  1:112478962961 B
   |/
   o  0:426bada5c675 A
   
@@ -1025,19 +1032,21 @@
   >   A
   > EOF
 
-BROKEN: Raises an exception
-  $ hg rebase -d C -s E 2>&1 | grep AssertionError:
-  AssertionError: no base found to rebase on (defineparents called wrong)
+  $ hg rebase -d C -s E
+  note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B"
+  rebasing 5:66f1a38021c9 "F" (F tip)
   $ hg log -G
-  o    5:66f1a38021c9 F
+  o    6:c6ab0cc6d220 F
   |\
-  | | o  4:26805aba1e60 C
+  +---x  5:66f1a38021c9 F
   | | |
-  | x |  3:7fb047a69f22 E
+  | o |  4:26805aba1e60 C
   | | |
-  o | |  2:b18e25de2cf5 D
-  |/ /
-  | o  1:112478962961 B
+  | | x  3:7fb047a69f22 E
+  | | |
+  o---+  2:b18e25de2cf5 D
+   / /
+  o /  1:112478962961 B
   |/
   o  0:426bada5c675 A
   
@@ -1060,6 +1069,7 @@
   rebasing 2:b18e25de2cf5 "D" (D)
   note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B"
   rebasing 5:66f1a38021c9 "F" (F tip)
+  warning: rebasing 5:66f1a38021c9 may include unwanted changes from 3:7fb047a69f22
   $ hg log -G
   o  7:9ed45af61fa0 F
   |
@@ -1096,13 +1106,13 @@
   note: not rebasing 2:b18e25de2cf5 "D" (D), already in destination as 1:112478962961 "B"
   rebasing 3:7fb047a69f22 "E" (E)
   rebasing 5:66f1a38021c9 "F" (F tip)
-BROKEN: This should have resulted in a rebased F with one parent, just like in
-the test case above
+  warning: rebasing 5:66f1a38021c9 may include unwanted changes from 2:b18e25de2cf5
+
   $ hg log -G
-  o    7:c1e6f26e339d F
-  |\
-  | o  6:533690786a86 E
-  |/
+  o  7:502540f44880 F
+  |
+  o  6:533690786a86 E
+  |
   | x    5:66f1a38021c9 F
   | |\
   o | |  4:26805aba1e60 C