Mercurial > hg
changeset 33786:0975506120fb
rebase: rewrite core algorithm (issue5578) (issue5630)
"defineparents" is the core algorithm of rebase. The old code has too many
tech debts (like outdated comments, confusing assertions, etc) and is very
error-prone to be improved. This patch rewrites "defineparents" to make the
code easier to reason about, and solve a bunch of issues, including:
- Assertion error: no base found (demonstrated by D212, issue5578)
- Asymmetric result (demonstrated by D211, "F with one parent")
- Wrong new parent (demonstrated by D262, "C':A'A'")
- "revlog index out of range" (demonstrated by D262, issue5630)
- "nothing to merge" (demonstrated by D262)
As a side effect, merge base decision has been made more solid - rebase now
prints out explicitly what could go wrong when it cannot find a unique
suitable merge base.
.. fix:: Issue 5578, Issue 5630
Core rebase algorithm has been rewritten to be more robust.
Differential Revision: https://phab.mercurial-scm.org/D21
author | Jun Wu <quark@fb.com> |
---|---|
date | Thu, 10 Aug 2017 21:30:31 -0700 |
parents | f7d6978a4da9 |
children | fa3aa6c98bb7 |
files | hgext/rebase.py tests/test-rebase-brute-force.t tests/test-rebase-newancestor.t tests/test-rebase-obsolete.t |
diffstat | 4 files changed, 312 insertions(+), 153 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/rebase.py Sat Aug 12 21:40:48 2017 -0700 +++ b/hgext/rebase.py Thu Aug 10 21:30:31 2017 -0700 @@ -66,7 +66,6 @@ revprecursor = -4 # plain prune (no successor) revpruned = -5 -revskipped = (revignored, revprecursor, revpruned) cmdtable = {} command = registrar.command(cmdtable) @@ -390,10 +389,7 @@ ui.status(_('rebasing %s\n') % desc) ui.progress(_("rebasing"), pos, ("%d:%s" % (rev, ctx)), _('changesets'), total) - p1, p2, base = defineparents(repo, rev, self.dest, - self.state, - self.destancestors, - self.obsoletenotrebased) + p1, p2, base = defineparents(repo, rev, self.dest, self.state) self.storestatus(tr=tr) storecollapsemsg(repo, self.collapsemsg) if len(repo[None].parents()) == 2: @@ -463,9 +459,7 @@ repo, ui, opts = self.repo, self.ui, self.opts if self.collapsef and not self.keepopen: p1, p2, _base = defineparents(repo, min(self.state), - self.dest, self.state, - self.destancestors, - self.obsoletenotrebased) + self.dest, self.state) editopt = opts.get('edit') editform = 'rebase.collapse' if self.collapsemsg: @@ -960,15 +954,6 @@ result.append(adjusted) return result -def nearestrebased(repo, rev, state): - """return the nearest ancestors of rev in the rebase result""" - rebased = [r for r in state if state[r] > nullmerge] - candidates = repo.revs('max(%ld and (::%d))', rebased, rev) - if candidates: - return state[candidates.first()] - else: - return None - def _checkobsrebase(repo, ui, rebaseobsrevs, rebasesetrevs, rebaseobsskipped): """ Abort if rebase will create divergence or rebase is noop because of markers @@ -992,107 +977,173 @@ "experimental.allowdivergence=True") raise error.Abort(msg % (",".join(divhashes),), hint=h) -def defineparents(repo, rev, dest, state, destancestors, - obsoletenotrebased): - 'Return the new parent relationship of the revision that will be rebased' - parents = repo[rev].parents() - p1 = p2 = nullrev - rp1 = None +def successorrevs(repo, rev): + """yield revision numbers for successors of rev""" + unfi = repo.unfiltered() + nodemap = unfi.changelog.nodemap + for s in obsutil.allsuccessors(unfi.obsstore, [unfi[rev].node()]): + if s in nodemap: + yield nodemap[s] + +def defineparents(repo, rev, dest, state): + """Return new parents and optionally a merge base for rev being rebased + + The destination specified by "dest" cannot always be used directly because + previously rebase result could affect destination. For example, - p1n = parents[0].rev() - if p1n in destancestors: - p1 = dest - elif p1n in state: - if state[p1n] == nullmerge: - p1 = dest - elif state[p1n] in revskipped: - p1 = nearestrebased(repo, p1n, state) - if p1 is None: - p1 = dest - else: - p1 = state[p1n] - else: # p1n external - p1 = dest - p2 = p1n + D E rebase -r C+D+E -d B + |/ C will be rebased to C' + B C D's new destination will be C' instead of B + |/ E's new destination will be C' instead of B + A + + The new parents of a merge is slightly more complicated. See the comment + block below. + """ + cl = repo.changelog + def isancestor(a, b): + # take revision numbers instead of nodes + if a == b: + return True + elif a > b: + return False + return cl.isancestor(cl.node(a), cl.node(b)) + + oldps = repo.changelog.parentrevs(rev) # old parents + newps = [nullrev, nullrev] # new parents + dests = adjustdest(repo, rev, dest, state) # adjusted destinations + bases = list(oldps) # merge base candidates, initially just old parents - if len(parents) == 2 and parents[1].rev() not in destancestors: - p2n = parents[1].rev() - # interesting second parent - if p2n in state: - if p1 == dest: # p1n in destancestors or external - p1 = state[p2n] - if p1 == revprecursor: - rp1 = obsoletenotrebased[p2n] - elif state[p2n] in revskipped: - p2 = nearestrebased(repo, p2n, state) - if p2 is None: - # no ancestors rebased yet, detach - p2 = dest - else: - p2 = state[p2n] - else: # p2n external - if p2 != nullrev: # p1n external too => rev is a merged revision - raise error.Abort(_('cannot use revision %d as base, result ' - 'would have 3 parents') % rev) - p2 = p2n - repo.ui.debug(" future parents are %d and %d\n" % - (repo[rp1 or p1].rev(), repo[p2].rev())) + if all(r == nullrev for r in oldps[1:]): + # For non-merge changeset, just move p to adjusted dest as requested. + newps[0] = dests[0] + else: + # For merge changeset, if we move p to dests[i] unconditionally, both + # parents may change and the end result looks like "the merge loses a + # parent", which is a surprise. This is a limit because "--dest" only + # accepts one dest per src. + # + # Therefore, only move p with reasonable conditions (in this order): + # 1. use dest, if dest is a descendent of (p or one of p's successors) + # 2. use p's rebased result, if p is rebased (state[p] > 0) + # + # Comparing with adjustdest, the logic here does some additional work: + # 1. decide which parents will not be moved towards dest + # 2. if the above decision is "no", should a parent still be moved + # because it was rebased? + # + # For example: + # + # C # "rebase -r C -d D" is an error since none of the parents + # /| # can be moved. "rebase -r B+C -d D" will move C's parent + # A B D # B (using rule "2."), since B will be rebased. + # + # The loop tries to be not rely on the fact that a Mercurial node has + # at most 2 parents. + for i, p in enumerate(oldps): + np = p # new parent + if any(isancestor(x, dests[i]) for x in successorrevs(repo, p)): + np = dests[i] + elif p in state and state[p] > 0: + np = state[p] - if not any(p.rev() in state for p in parents): - # Case (1) root changeset of a non-detaching rebase set. - # Let the merge mechanism find the base itself. + # "bases" only record "special" merge bases that cannot be + # calculated from changelog DAG (i.e. isancestor(p, np) is False). + # For example: + # + # B' # rebase -s B -d D, when B was rebased to B'. dest for C + # | C # is B', but merge base for C is B, instead of + # D | # changelog.ancestor(C, B') == A. If changelog DAG and + # | B # "state" edges are merged (so there will be an edge from + # |/ # B to B'), the merge base is still ancestor(C, B') in + # A # the merged graph. + # + # Also see https://bz.mercurial-scm.org/show_bug.cgi?id=1950#c8 + # which uses "virtual null merge" to explain this situation. + if isancestor(p, np): + bases[i] = nullrev + + # If one parent becomes an ancestor of the other, drop the ancestor + for j, x in enumerate(newps[:i]): + if x == nullrev: + continue + if isancestor(np, x): + np = nullrev + elif isancestor(x, np): + newps[j] = np + np = nullrev + bases[j], bases[i] = bases[i], bases[j] + + newps[i] = np + + # "rebasenode" updates to new p1, and the old p1 will be used as merge + # base. If only p2 changes, merging using unchanged p1 as merge base is + # suboptimal. Therefore swap parents to make the merge sane. + if newps[1] != nullrev and oldps[0] == newps[0]: + assert len(newps) == 2 and len(oldps) == 2 + newps.reverse() + bases.reverse() + + # No parent change might be an error because we fail to make rev a + # descendent of requested dest. This can happen, for example: + # + # C # rebase -r C -d D + # /| # None of A and B will be changed to D and rebase fails. + # A B D + if set(newps) == set(oldps) and dest not in newps: + # The error message is for compatibility. It's a bit misleading + # since rebase is not supposed to add new parents. + raise error.Abort(_('cannot use revision %d as base, ' + 'result would have 3 parents') % rev) + + repo.ui.debug(" future parents are %d and %d\n" % tuple(newps)) + + # "rebasenode" updates to new p1, use the corresponding merge base. + if bases[0] != nullrev: + base = bases[0] + else: base = None - elif not repo[rev].p2(): - # Case (2) detaching the node with a single parent, use this parent - base = repo[rev].p1().rev() - else: - # Assuming there is a p1, this is the case where there also is a p2. - # We are thus rebasing a merge and need to pick the right merge base. - # - # Imagine we have: - # - M: current rebase revision in this step - # - A: one parent of M - # - B: other parent of M - # - D: destination of this merge step (p1 var) - # - # Consider the case where D is a descendant of A or B and the other is - # 'outside'. In this case, the right merge base is the D ancestor. - # - # An informal proof, assuming A is 'outside' and B is the D ancestor: - # - # If we pick B as the base, the merge involves: - # - changes from B to M (actual changeset payload) - # - changes from B to D (induced by rebase) as D is a rebased - # version of B) - # Which exactly represent the rebase operation. - # - # If we pick A as the base, the merge involves: - # - changes from A to M (actual changeset payload) - # - changes from A to D (with include changes between unrelated A and B - # plus changes induced by rebase) - # Which does not represent anything sensible and creates a lot of - # conflicts. A is thus not the right choice - B is. - # - # Note: The base found in this 'proof' is only correct in the specified - # case. This base does not make sense if is not D a descendant of A or B - # or if the other is not parent 'outside' (especially not if the other - # parent has been rebased). The current implementation does not - # make it feasible to consider different cases separately. In these - # other cases we currently just leave it to the user to correctly - # resolve an impossible merge using a wrong ancestor. - # - # xx, p1 could be -4, and both parents could probably be -4... - for p in repo[rev].parents(): - if state.get(p.rev()) == p1: - base = p.rev() - break - else: # fallback when base not found - base = None + + # Check if the merge will contain unwanted changes. That may happen if + # there are multiple special (non-changelog ancestor) merge bases, which + # cannot be handled well by the 3-way merge algorithm. For example: + # + # F + # /| + # D E # "rebase -r D+E+F -d Z", when rebasing F, if "D" was chosen + # | | # as merge base, the difference between D and F will include + # B C # C, so the rebased F will contain C surprisingly. If "E" was + # |/ # chosen, the rebased F will contain B. + # A Z + # + # But our merge base candidates (D and E in above case) could still be + # better than the default (ancestor(F, Z) == null). Therefore still + # pick one (so choose p1 above). + if sum(1 for b in bases if b != nullrev) > 1: + assert base is not None - # Raise because this function is called wrong (see issue 4106) - raise AssertionError('no base found to rebase on ' - '(defineparents called wrong)') - return rp1 or p1, p2, base + # Revisions in the side (not chosen as merge base) branch that might + # contain "surprising" contents + siderevs = list(repo.revs('((%ld-%d) %% (%d+%d))', + bases, base, base, dest)) + + # If those revisions are covered by rebaseset, the result is good. + # A merge in rebaseset would be considered to cover its ancestors. + if siderevs: + rebaseset = [r for r, d in state.items() if d > 0] + merges = [r for r in rebaseset if cl.parentrevs(r)[1] != nullrev] + unwantedrevs = list(repo.revs('%ld - (::%ld) - %ld', + siderevs, merges, rebaseset)) + + # For revs not covered, it is worth a warning. + if unwantedrevs: + repo.ui.warn( + _('warning: rebasing %d:%s may include unwanted changes ' + 'from %s\n') + % (rev, repo[rev], ', '.join('%d:%s' % (r, repo[r]) + for r in unwantedrevs))) + + return newps[0], newps[1], base def isagitpatch(repo, patchname): 'Return true if the given patch is in git format'
--- a/tests/test-rebase-brute-force.t Sat Aug 12 21:40:48 2017 -0700 +++ b/tests/test-rebase-brute-force.t Thu Aug 10 21:30:31 2017 -0700 @@ -31,8 +31,8 @@ AD: A':Z D':Z BD: B':Z D':B' ABD: A':Z B':Z D':B' - CD: CRASH: revlog index out of range - ACD: A':Z C':A'A' D':Z + CD: ABORT: cannot use revision 3 as base, result would have 3 parents + ACD: A':Z C':A'B D':Z BCD: B':Z C':B'A D':B' ABCD: A':Z B':Z C':A'B' D':B' @@ -52,4 +52,4 @@ C: ABORT: cannot use revision 3 as base, result would have 3 parents BC: B':Z C':B'A AC: - BAC: ABORT: nothing to merge + BAC: B':Z C':B'A
--- a/tests/test-rebase-newancestor.t Sat Aug 12 21:40:48 2017 -0700 +++ b/tests/test-rebase-newancestor.t Thu Aug 10 21:30:31 2017 -0700 @@ -3,7 +3,7 @@ > usegeneraldelta=yes > [extensions] > rebase= - > + > drawdag=$TESTDIR/drawdag.py > [alias] > tglog = log -G --template "{rev}: '{desc}' {branches}\n" > EOF @@ -334,3 +334,101 @@ |/ o 0: 'common' +Due to the limitation of 3-way merge algorithm (1 merge base), rebasing a merge +may include unwanted content: + + $ hg init $TESTTMP/dual-merge-base1 + $ cd $TESTTMP/dual-merge-base1 + $ hg debugdrawdag <<'EOS' + > F + > /| + > D E + > | | + > B C + > |/ + > A Z + > |/ + > R + > EOS + $ hg rebase -r D+E+F -d Z + rebasing 5:5f2c926dfecf "D" (D) + rebasing 6:b296604d9846 "E" (E) + rebasing 7:caa9781e507d "F" (F tip) + warning: rebasing 7:caa9781e507d may include unwanted changes from 4:d6003a550c2c + saved backup bundle to $TESTTMP/dual-merge-base1/.hg/strip-backup/b296604d9846-0516f6d2-rebase.hg (glob) + $ hg log -r 4 -T '{files}\n' + C + $ hg manifest -r 'desc(F)' + C + D + E + R + Z + +The warning does not get printed if there is no unwanted change detected: + + $ hg init $TESTTMP/dual-merge-base2 + $ cd $TESTTMP/dual-merge-base2 + $ hg debugdrawdag <<'EOS' + > D + > /| + > B C + > |/ + > A Z + > |/ + > R + > EOS + $ hg rebase -r B+C+D -d Z + rebasing 3:c1e6b162678d "B" (B) + rebasing 4:d6003a550c2c "C" (C) + rebasing 5:c8f78076273e "D" (D tip) + saved backup bundle to $TESTTMP/dual-merge-base2/.hg/strip-backup/d6003a550c2c-6f1424b6-rebase.hg (glob) + $ hg manifest -r 'desc(D)' + B + C + R + Z + +The merge base could be different from old p1 (changed parent becomes new p1): + + $ hg init $TESTTMP/chosen-merge-base1 + $ cd $TESTTMP/chosen-merge-base1 + $ hg debugdrawdag <<'EOS' + > F + > /| + > D E + > | | + > B C Z + > EOS + $ hg rebase -r D+F -d Z + rebasing 3:004dc1679908 "D" (D) + rebasing 5:4be4cbf6f206 "F" (F tip) + saved backup bundle to $TESTTMP/chosen-merge-base1/.hg/strip-backup/004dc1679908-06a66a3c-rebase.hg (glob) + $ hg manifest -r 'desc(F)' + C + D + E + Z + $ hg log -r `hg log -r 'desc(F)' -T '{p1node}'` -T '{desc}\n' + D + + $ hg init $TESTTMP/chosen-merge-base2 + $ cd $TESTTMP/chosen-merge-base2 + $ hg debugdrawdag <<'EOS' + > F + > /| + > D E + > | | + > B C Z + > EOS + $ hg rebase -r E+F -d Z + rebasing 4:974e4943c210 "E" (E) + rebasing 5:4be4cbf6f206 "F" (F tip) + saved backup bundle to $TESTTMP/chosen-merge-base2/.hg/strip-backup/974e4943c210-b2874da5-rebase.hg (glob) + $ hg manifest -r 'desc(F)' + B + D + E + Z + $ hg log -r `hg log -r 'desc(F)' -T '{p1node}'` -T '{desc}\n' + E
--- a/tests/test-rebase-obsolete.t Sat Aug 12 21:40:48 2017 -0700 +++ b/tests/test-rebase-obsolete.t Thu Aug 10 21:30:31 2017 -0700 @@ -488,9 +488,14 @@ > A > EOF -BROKEN: This raises an exception - $ hg rebase -d G -r 'B + D + F' 2>&1 | grep '^AssertionError' - AssertionError: no base found to rebase on (defineparents called wrong) + $ hg rebase -d G -r 'B + D + F' + rebasing 1:112478962961 "B" (B) + rebasing 2:b18e25de2cf5 "D" (D) + not rebasing ignored 4:26805aba1e60 "C" (C) + not rebasing ignored 5:4b61ff5c62e2 "E" (E) + rebasing 6:f15c3adaf214 "F" (F tip) + abort: cannot use revision 6 as base, result would have 3 parents + [255] $ cd .. @@ -962,17 +967,19 @@ > A > EOF -BROKEN: Raises an exception - $ hg rebase -d B -s E 2>&1 | grep AssertionError: - AssertionError: no base found to rebase on (defineparents called wrong) + $ hg rebase -d B -s E + note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B" + rebasing 4:66f1a38021c9 "F" (F tip) $ hg log -G - o 4:66f1a38021c9 F + o 5:aae1787dacee F |\ - | x 3:7fb047a69f22 E - | | - o | 2:b18e25de2cf5 D - |/ - | o 1:112478962961 B + | | x 4:66f1a38021c9 F + | |/| + | | x 3:7fb047a69f22 E + | | | + | o | 2:b18e25de2cf5 D + | |/ + o / 1:112478962961 B |/ o 0:426bada5c675 A @@ -994,19 +1001,19 @@ $ hg rebase -d C -s D note: not rebasing 2:b18e25de2cf5 "D" (D), already in destination as 1:112478962961 "B" rebasing 5:66f1a38021c9 "F" (F tip) -BROKEN: not rebased on top of requested destination (C) + $ hg log -G - o 6:50e9d60b99c6 F + o 6:0913febf6439 F |\ - | | x 5:66f1a38021c9 F - | |/| - +-----o 4:26805aba1e60 C + +---x 5:66f1a38021c9 F + | | | + | o | 4:26805aba1e60 C | | | - | o | 3:7fb047a69f22 E + o | | 3:7fb047a69f22 E | | | - | | x 2:b18e25de2cf5 D - | |/ - o | 1:112478962961 B + +---x 2:b18e25de2cf5 D + | | + | o 1:112478962961 B |/ o 0:426bada5c675 A @@ -1025,19 +1032,21 @@ > A > EOF -BROKEN: Raises an exception - $ hg rebase -d C -s E 2>&1 | grep AssertionError: - AssertionError: no base found to rebase on (defineparents called wrong) + $ hg rebase -d C -s E + note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B" + rebasing 5:66f1a38021c9 "F" (F tip) $ hg log -G - o 5:66f1a38021c9 F + o 6:c6ab0cc6d220 F |\ - | | o 4:26805aba1e60 C + +---x 5:66f1a38021c9 F | | | - | x | 3:7fb047a69f22 E + | o | 4:26805aba1e60 C | | | - o | | 2:b18e25de2cf5 D - |/ / - | o 1:112478962961 B + | | x 3:7fb047a69f22 E + | | | + o---+ 2:b18e25de2cf5 D + / / + o / 1:112478962961 B |/ o 0:426bada5c675 A @@ -1060,6 +1069,7 @@ rebasing 2:b18e25de2cf5 "D" (D) note: not rebasing 3:7fb047a69f22 "E" (E), already in destination as 1:112478962961 "B" rebasing 5:66f1a38021c9 "F" (F tip) + warning: rebasing 5:66f1a38021c9 may include unwanted changes from 3:7fb047a69f22 $ hg log -G o 7:9ed45af61fa0 F | @@ -1096,13 +1106,13 @@ note: not rebasing 2:b18e25de2cf5 "D" (D), already in destination as 1:112478962961 "B" rebasing 3:7fb047a69f22 "E" (E) rebasing 5:66f1a38021c9 "F" (F tip) -BROKEN: This should have resulted in a rebased F with one parent, just like in -the test case above + warning: rebasing 5:66f1a38021c9 may include unwanted changes from 2:b18e25de2cf5 + $ hg log -G - o 7:c1e6f26e339d F - |\ - | o 6:533690786a86 E - |/ + o 7:502540f44880 F + | + o 6:533690786a86 E + | | x 5:66f1a38021c9 F | |\ o | | 4:26805aba1e60 C