revset: optimize "draft() & ::x" pattern
authorJun Wu <quark@fb.com>
Mon, 28 Aug 2017 14:49:00 -0700
changeset 34065 c6c8a52e28c9
parent 34064 8b659b7388c0
child 34066 871a58b5f428
revset: optimize "draft() & ::x" pattern The `draft() & ::x` type query could be common for selecting one or more draft feature branches being worked on. Before this patch, `::x` may travel through the changelog DAG for a long distance until it gets a smaller revision number than `min(draft())`. It could be very slow on long changelog with distant (in terms of revision numbers) drafts. This patch adds a fast path for this situation, and will stop traveling the changelog DAG once `::x` hits a non-draft revision. The fast path also works for `secret()` and `not public()`. To measure the performance difference, I used drawdag to create a repo that emulates distant drafts: DRAFT4 | DRAFT3 # draft / PUBLIC9999 # public | PUBLIC9998 | . DRAFT2 . | . DRAFT1 # draft | / PUBLIC0001 # public And measured the performance using the repo: (BEFORE) $ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)' ! wall 0.017132 comb 0.010000 user 0.010000 sys 0.000000 (best of 156) $ hg perfrevset 'draft() & ::(all())' ! wall 0.024221 comb 0.030000 user 0.030000 sys 0.000000 (best of 113) (AFTER) $ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)' ! wall 0.000243 comb 0.000000 user 0.000000 sys 0.000000 (best of 9303) $ hg perfrevset 'draft() & ::(all())' ! wall 0.004319 comb 0.000000 user 0.000000 sys 0.000000 (best of 655) Differential Revision: https://phab.mercurial-scm.org/D441
mercurial/dagop.py
mercurial/revset.py
mercurial/revsetlang.py
tests/test-revset.t
--- a/mercurial/dagop.py	Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/dagop.py	Mon Aug 28 14:49:00 2017 -0700
@@ -75,27 +75,49 @@
                 if prev != node.nullrev:
                     heapq.heappush(pendingheap, (heapsign * prev, pdepth))
 
-def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth):
+def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
     if followfirst:
         cut = 1
     else:
         cut = None
     cl = repo.changelog
-    def pfunc(rev):
+    def plainpfunc(rev):
         try:
             return cl.parentrevs(rev)[:cut]
         except error.WdirUnsupported:
             return (pctx.rev() for pctx in repo[rev].parents()[:cut])
+    if cutfunc is None:
+        pfunc = plainpfunc
+    else:
+        pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
+        revs = revs.filter(lambda rev: not cutfunc(rev))
     return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
 
-def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None):
+def revancestors(repo, revs, followfirst=False, startdepth=None,
+                 stopdepth=None, cutfunc=None):
     """Like revlog.ancestors(), but supports additional options, includes
     the given revs themselves, and returns a smartset
 
     Scan ends at the stopdepth (exlusive) if specified. Revisions found
     earlier than the startdepth are omitted.
+
+    If cutfunc is provided, it will be used to cut the traversal of the DAG.
+    When cutfunc(X) returns True, the DAG traversal stops - revision X and
+    X's ancestors in the traversal path will be skipped. This could be an
+    optimization sometimes.
+
+    Note: if Y is an ancestor of X, cutfunc(X) returning True does not
+    necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
+    return True in this case. For example,
+
+        D     # revancestors(repo, D, cutfunc=lambda rev: rev == B)
+        |\    # will include "A", because the path D -> C -> A was not cut.
+        B C   # If "B" gets cut, "A" might want to be cut too.
+        |/
+        A
     """
-    gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth)
+    gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
+                           cutfunc)
     return generatorset(gen, iterasc=False)
 
 def _genrevdescendants(repo, revs, followfirst):
--- a/mercurial/revset.py	Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/revset.py	Mon Aug 28 14:49:00 2017 -0700
@@ -1577,6 +1577,37 @@
     getargs(x, 0, 0, "_notpublic takes no arguments")
     return _phase(repo, subset, phases.draft, phases.secret)
 
+# for internal use
+@predicate('_phaseandancestors(phasename, set)', safe=True)
+def _phaseandancestors(repo, subset, x):
+    # equivalent to (phasename() & ancestors(set)) but more efficient
+    # phasename could be one of 'draft', 'secret', or '_notpublic'
+    args = getargs(x, 2, 2, "_phaseandancestors requires two arguments")
+    phasename = getsymbol(args[0])
+    s = getset(repo, fullreposet(repo), args[1])
+
+    draft = phases.draft
+    secret = phases.secret
+    phasenamemap = {
+        '_notpublic': draft,
+        'draft': draft, # follow secret's ancestors
+        'secret': secret,
+    }
+    if phasename not in phasenamemap:
+        raise error.ParseError('%r is not a valid phasename' % phasename)
+
+    minimalphase = phasenamemap[phasename]
+    getphase = repo._phasecache.phase
+
+    def cutfunc(rev):
+        return getphase(repo, rev) < minimalphase
+
+    revs = dagop.revancestors(repo, s, cutfunc=cutfunc)
+
+    if phasename == 'draft': # need to remove secret changesets
+        revs = revs.filter(lambda r: getphase(repo, r) == draft)
+    return subset & revs
+
 @predicate('public()', safe=True)
 def public(repo, subset, x):
     """Changeset in public phase."""
--- a/mercurial/revsetlang.py	Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/revsetlang.py	Mon Aug 28 14:49:00 2017 -0700
@@ -369,6 +369,11 @@
         wb, tb = _optimize(x[2], True)
         w = min(wa, wb)
 
+        # (draft/secret/_notpublic() & ::x) have a fast path
+        m = _match('_() & ancestors(_)', ('and', ta, tb))
+        if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
+            return w, _build('_phaseandancestors(_, _)', m[1], m[2])
+
         # (::x and not ::y)/(not ::y and ::x) have a fast path
         m = _matchonly(ta, tb) or _matchonly(tb, ta)
         if m:
--- a/tests/test-revset.t	Fri Sep 01 12:13:17 2017 -0700
+++ b/tests/test-revset.t	Mon Aug 28 14:49:00 2017 -0700
@@ -4321,3 +4321,147 @@
 
   $ hg log -r 'successors(B+A)-contentdivergent()-obsolete()' -T '{desc}\n'
   Z
+
+Test `draft() & ::x` optimization
+
+  $ hg init $TESTTMP/repo2
+  $ cd $TESTTMP/repo2
+  $ hg debugdrawdag <<'EOS'
+  >   P5 S1
+  >    |  |
+  > S2 | D3
+  >   \|/
+  >   P4
+  >    |
+  >   P3 D2
+  >    |  |
+  >   P2 D1
+  >    |/
+  >   P1
+  >    |
+  >   P0
+  > EOS
+  $ hg phase --public -r P5
+  $ hg phase --force --secret -r S1+S2
+  $ hg log -G -T '{rev} {desc} {phase}' -r 'sort(all(), topo, topo.firstbranch=P5)'
+  o  8 P5 public
+  |
+  | o  10 S1 secret
+  | |
+  | o  7 D3 draft
+  |/
+  | o  9 S2 secret
+  |/
+  o  6 P4 public
+  |
+  o  5 P3 public
+  |
+  o  3 P2 public
+  |
+  | o  4 D2 draft
+  | |
+  | o  2 D1 draft
+  |/
+  o  1 P1 public
+  |
+  o  0 P0 public
+  
+  $ hg debugrevspec --verify -p analyzed -p optimized 'draft() & ::(((S1+D1+P5)-D3)+S2)'
+  * analyzed:
+  (and
+    (func
+      ('symbol', 'draft')
+      None)
+    (func
+      ('symbol', 'ancestors')
+      (or
+        (list
+          (and
+            (or
+              (list
+                ('symbol', 'S1')
+                ('symbol', 'D1')
+                ('symbol', 'P5')))
+            (not
+              ('symbol', 'D3')))
+          ('symbol', 'S2')))))
+  * optimized:
+  (func
+    ('symbol', '_phaseandancestors')
+    (list
+      ('symbol', 'draft')
+      (or
+        (list
+          (difference
+            (func
+              ('symbol', '_list')
+              ('string', 'S1\x00D1\x00P5'))
+            ('symbol', 'D3'))
+          ('symbol', 'S2')))))
+  $ hg debugrevspec --verify -p analyzed -p optimized 'secret() & ::9'
+  * analyzed:
+  (and
+    (func
+      ('symbol', 'secret')
+      None)
+    (func
+      ('symbol', 'ancestors')
+      ('symbol', '9')))
+  * optimized:
+  (func
+    ('symbol', '_phaseandancestors')
+    (list
+      ('symbol', 'secret')
+      ('symbol', '9')))
+  $ hg debugrevspec --verify -p analyzed -p optimized '7 & ( (not public()) & ::(tag()) )'
+  * analyzed:
+  (and
+    ('symbol', '7')
+    (and
+      (not
+        (func
+          ('symbol', 'public')
+          None))
+      (func
+        ('symbol', 'ancestors')
+        (func
+          ('symbol', 'tag')
+          None))))
+  * optimized:
+  (and
+    ('symbol', '7')
+    (func
+      ('symbol', '_phaseandancestors')
+      (list
+        ('symbol', '_notpublic')
+        (func
+          ('symbol', 'tag')
+          None))))
+  $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, 1)'
+  * optimized:
+  (and
+    (func
+      ('symbol', '_notpublic')
+      None)
+    (func
+      ('symbol', 'ancestors')
+      (list
+        (func
+          ('symbol', '_list')
+          ('string', 'S1\x00D2\x00P5'))
+        ('symbol', '1'))))
+  $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, depth=1)'
+  * optimized:
+  (and
+    (func
+      ('symbol', '_notpublic')
+      None)
+    (func
+      ('symbol', 'ancestors')
+      (list
+        (func
+          ('symbol', '_list')
+          ('string', 'S1\x00D2\x00P5'))
+        (keyvalue
+          ('symbol', 'depth')
+          ('symbol', '1')))))