ancestor: a new algorithm that is faster for nodes near tip
authorBryan O'Sullivan <bryano@fb.com>
Tue, 16 Apr 2013 10:08:18 -0700
changeset 18986 2f7186400a07
parent 18982 43cb150e74f9
child 18987 3605d4e7e618
ancestor: a new algorithm that is faster for nodes near tip Instead of walking all the way to the root of the DAG, we generate a set of candidate GCA revs, then figure out which ones will win the race to the root (usually without needing to traverse all the way to the root). In the common case of nodes that are close to each other in both revision number and topology, this is usually a big win: it makes "hg --time debugancestors" up to 9 times faster than the more general ancestor function when measured on heads of the linux-2.6 hg repo. Victory is not assured, however. The older function can still win by a large margin if one node is much closer to the root than the other, or by a much smaller amount if one is an ancestor of the other. For now, we've also got a small paranoid harness function that calls both ancestor functions on every input and ensures that they give equivalent answers. Even without the checker function, the old ancestor function needs to stay alive for the time being, as its generality is used by context.filectx.merge.
mercurial/ancestor.py
mercurial/context.py
mercurial/revlog.py
--- a/mercurial/ancestor.py	Mon Apr 15 01:59:11 2013 +0200
+++ b/mercurial/ancestor.py	Tue Apr 16 10:08:18 2013 -0700
@@ -5,10 +5,132 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import heapq, util
+import error, heapq, util
 from node import nullrev
 
-def ancestor(a, b, pfunc):
+def ancestors(pfunc, *orignodes):
+    """
+    Returns the common ancestors of a and b that are furthest from a
+    root (as measured by longest path).
+
+    pfunc must return a list of parent vertices for a given vertex.
+    """
+    if not isinstance(orignodes, set):
+        orignodes = set(orignodes)
+    if nullrev in orignodes:
+        return set()
+    if len(orignodes) <= 1:
+        return orignodes
+
+    def candidates(nodes):
+        allseen = (1 << len(nodes)) - 1
+        seen = [0] * (max(nodes) + 1)
+        for i, n in enumerate(nodes):
+            seen[n] = 1 << i
+        poison = 1 << (i + 1)
+
+        gca = set()
+        interesting = left = len(nodes)
+        nv = len(seen) - 1
+        while nv >= 0 and interesting:
+            v = nv
+            nv -= 1
+            if not seen[v]:
+                continue
+            sv = seen[v]
+            if sv < poison:
+                interesting -= 1
+                if sv == allseen:
+                    gca.add(v)
+                    sv |= poison
+                    if v in nodes:
+                        left -= 1
+                        if left <= 1:
+                            # history is linear
+                            return set([v])
+            if sv < poison:
+                for p in pfunc(v):
+                    sp = seen[p]
+                    if p == nullrev:
+                        continue
+                    if sp == 0:
+                        seen[p] = sv
+                        interesting += 1
+                    elif sp != sv:
+                        seen[p] |= sv
+            else:
+                for p in pfunc(v):
+                    if p == nullrev:
+                        continue
+                    sp = seen[p]
+                    if sp and sp < poison:
+                        interesting -= 1
+                    seen[p] = sv
+        return gca
+
+    def deepest(nodes):
+        interesting = {}
+        count = max(nodes) + 1
+        depth = [0] * count
+        seen = [0] * count
+        mapping = []
+        for (i, n) in enumerate(sorted(nodes)):
+            depth[n] = 1
+            b = 1 << i
+            seen[n] = b
+            interesting[b] = 1
+            mapping.append((b, n))
+        nv = count - 1
+        while nv >= 0 and len(interesting) > 1:
+            v = nv
+            nv -= 1
+            dv = depth[v]
+            if dv == 0:
+                continue
+            sv = seen[v]
+            for p in pfunc(v):
+                if p == nullrev:
+                    continue
+                dp = depth[p]
+                nsp = sp = seen[p]
+                if dp <= dv:
+                    depth[p] = dv + 1
+                    if sp != sv:
+                        interesting[sv] += 1
+                        nsp = seen[p] = sv
+                        if sp:
+                            interesting[sp] -= 1
+                            if interesting[sp] == 0:
+                                del interesting[sp]
+                elif dv == dp - 1:
+                    nsp = sp | sv
+                    if nsp == sp:
+                        continue
+                    seen[p] = nsp
+                    interesting.setdefault(nsp, 0)
+                    interesting[nsp] += 1
+                    interesting[sp] -= 1
+                    if interesting[sp] == 0:
+                        del interesting[sp]
+            interesting[sv] -= 1
+            if interesting[sv] == 0:
+                del interesting[sv]
+
+        if len(interesting) != 1:
+            return []
+
+        k = 0
+        for i in interesting:
+            k |= i
+        return set(n for (i, n) in mapping if k & i)
+
+    gca = candidates(orignodes)
+
+    if len(gca) <= 1:
+        return gca
+    return deepest(gca)
+
+def genericancestor(a, b, pfunc):
     """
     Returns the common ancestor of a and b that is furthest from a
     root (as measured by longest path) or None if no ancestor is
@@ -30,7 +152,7 @@
     depth = {}
     while visit:
         vertex = visit[-1]
-        pl = pfunc(vertex)
+        pl = [p for p in pfunc(vertex) if p != nullrev]
         parentcache[vertex] = pl
         if not pl:
             depth[vertex] = 0
@@ -91,6 +213,51 @@
     except StopIteration:
         return None
 
+def finddepths(nodes, pfunc):
+    visit = list(nodes)
+    rootpl = [nullrev, nullrev]
+    depth = {}
+    while visit:
+        vertex = visit[-1]
+        pl = pfunc(vertex)
+        if not pl or pl == rootpl:
+            depth[vertex] = 0
+            visit.pop()
+        else:
+            for p in pl:
+                if p != nullrev and p not in depth:
+                    visit.append(p)
+            if visit[-1] == vertex:
+                dp = [depth[p] for p in pl if p != nullrev]
+                if dp:
+                    depth[vertex] = max(dp) + 1
+                else:
+                    depth[vertex] = 0
+                visit.pop()
+    return depth
+
+def ancestor(a, b, pfunc):
+    xs = ancestors(pfunc, a, b)
+    y = genericancestor(a, b, pfunc)
+    if y == -1:
+        y = None
+    if not xs:
+        if y is None:
+            return None
+        print xs, y
+        raise error.RepoError('ancestors disagree on whether a gca exists')
+    elif y is None:
+        print xs, y
+        raise error.RepoError('ancestors disagree on whether a gca exists')
+    if y in xs:
+        return y
+    xds = finddepths(xs, pfunc)
+    xds = [ds[x] for x in xs]
+    yd = finddepths([y], pfunc)[y]
+    if len([xd != yd for xd in xds]) > 0:
+        raise error.RepoError('ancestor depths do not match')
+    return xs.pop()
+
 def missingancestors(revs, bases, pfunc):
     """Return all the ancestors of revs that are not ancestors of bases.
 
--- a/mercurial/context.py	Mon Apr 15 01:59:11 2013 +0200
+++ b/mercurial/context.py	Tue Apr 16 10:08:18 2013 -0700
@@ -756,7 +756,7 @@
             return pl
 
         a, b = (self._path, self._filenode), (fc2._path, fc2._filenode)
-        v = ancestor.ancestor(a, b, parents)
+        v = ancestor.genericancestor(a, b, parents)
         if v:
             f, n = v
             return filectx(self._repo, f, fileid=n, filelog=flcache[f])
--- a/mercurial/revlog.py	Mon Apr 15 01:59:11 2013 +0200
+++ b/mercurial/revlog.py	Tue Apr 16 10:08:18 2013 -0700
@@ -711,10 +711,7 @@
         if self.descendant(start, end):
             return self.node(start)
 
-        def parents(rev):
-            return [p for p in self.parentrevs(rev) if p != nullrev]
-
-        c = ancestor.ancestor(a, b, parents)
+        c = ancestor.ancestor(a, b, self.parentrevs)
         if c is None:
             return nullid