revset: optimize "draft() & ::x" pattern
The `draft() & ::x` type query could be common for selecting one or more
draft feature branches being worked on.
Before this patch, `::x` may travel through the changelog DAG for a long
distance until it gets a smaller revision number than `min(draft())`. It
could be very slow on long changelog with distant (in terms of revision
numbers) drafts.
This patch adds a fast path for this situation, and will stop traveling the
changelog DAG once `::x` hits a non-draft revision.
The fast path also works for `secret()` and `not public()`.
To measure the performance difference, I used drawdag to create a repo that
emulates distant drafts:
DRAFT4
|
DRAFT3 # draft
/
PUBLIC9999 # public
|
PUBLIC9998
|
. DRAFT2
. |
. DRAFT1 # draft
| /
PUBLIC0001 # public
And measured the performance using the repo:
(BEFORE)
$ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)'
! wall 0.017132 comb 0.010000 user 0.010000 sys 0.000000 (best of 156)
$ hg perfrevset 'draft() & ::(all())'
! wall 0.024221 comb 0.030000 user 0.030000 sys 0.000000 (best of 113)
(AFTER)
$ hg perfrevset 'draft() & ::(DRAFT2+DRAFT4)'
! wall 0.000243 comb 0.000000 user 0.000000 sys 0.000000 (best of 9303)
$ hg perfrevset 'draft() & ::(all())'
! wall 0.004319 comb 0.000000 user 0.000000 sys 0.000000 (best of 655)
Differential Revision: https://phab.mercurial-scm.org/D441
--- a/mercurial/dagop.py Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/dagop.py Mon Aug 28 14:49:00 2017 -0700
@@ -75,27 +75,49 @@
if prev != node.nullrev:
heapq.heappush(pendingheap, (heapsign * prev, pdepth))
-def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth):
+def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
if followfirst:
cut = 1
else:
cut = None
cl = repo.changelog
- def pfunc(rev):
+ def plainpfunc(rev):
try:
return cl.parentrevs(rev)[:cut]
except error.WdirUnsupported:
return (pctx.rev() for pctx in repo[rev].parents()[:cut])
+ if cutfunc is None:
+ pfunc = plainpfunc
+ else:
+ pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
+ revs = revs.filter(lambda rev: not cutfunc(rev))
return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
-def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None):
+def revancestors(repo, revs, followfirst=False, startdepth=None,
+ stopdepth=None, cutfunc=None):
"""Like revlog.ancestors(), but supports additional options, includes
the given revs themselves, and returns a smartset
Scan ends at the stopdepth (exlusive) if specified. Revisions found
earlier than the startdepth are omitted.
+
+ If cutfunc is provided, it will be used to cut the traversal of the DAG.
+ When cutfunc(X) returns True, the DAG traversal stops - revision X and
+ X's ancestors in the traversal path will be skipped. This could be an
+ optimization sometimes.
+
+ Note: if Y is an ancestor of X, cutfunc(X) returning True does not
+ necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
+ return True in this case. For example,
+
+ D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
+ |\ # will include "A", because the path D -> C -> A was not cut.
+ B C # If "B" gets cut, "A" might want to be cut too.
+ |/
+ A
"""
- gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth)
+ gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
+ cutfunc)
return generatorset(gen, iterasc=False)
def _genrevdescendants(repo, revs, followfirst):
--- a/mercurial/revset.py Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/revset.py Mon Aug 28 14:49:00 2017 -0700
@@ -1577,6 +1577,37 @@
getargs(x, 0, 0, "_notpublic takes no arguments")
return _phase(repo, subset, phases.draft, phases.secret)
+# for internal use
+@predicate('_phaseandancestors(phasename, set)', safe=True)
+def _phaseandancestors(repo, subset, x):
+ # equivalent to (phasename() & ancestors(set)) but more efficient
+ # phasename could be one of 'draft', 'secret', or '_notpublic'
+ args = getargs(x, 2, 2, "_phaseandancestors requires two arguments")
+ phasename = getsymbol(args[0])
+ s = getset(repo, fullreposet(repo), args[1])
+
+ draft = phases.draft
+ secret = phases.secret
+ phasenamemap = {
+ '_notpublic': draft,
+ 'draft': draft, # follow secret's ancestors
+ 'secret': secret,
+ }
+ if phasename not in phasenamemap:
+ raise error.ParseError('%r is not a valid phasename' % phasename)
+
+ minimalphase = phasenamemap[phasename]
+ getphase = repo._phasecache.phase
+
+ def cutfunc(rev):
+ return getphase(repo, rev) < minimalphase
+
+ revs = dagop.revancestors(repo, s, cutfunc=cutfunc)
+
+ if phasename == 'draft': # need to remove secret changesets
+ revs = revs.filter(lambda r: getphase(repo, r) == draft)
+ return subset & revs
+
@predicate('public()', safe=True)
def public(repo, subset, x):
"""Changeset in public phase."""
--- a/mercurial/revsetlang.py Fri Sep 01 12:13:17 2017 -0700
+++ b/mercurial/revsetlang.py Mon Aug 28 14:49:00 2017 -0700
@@ -369,6 +369,11 @@
wb, tb = _optimize(x[2], True)
w = min(wa, wb)
+ # (draft/secret/_notpublic() & ::x) have a fast path
+ m = _match('_() & ancestors(_)', ('and', ta, tb))
+ if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
+ return w, _build('_phaseandancestors(_, _)', m[1], m[2])
+
# (::x and not ::y)/(not ::y and ::x) have a fast path
m = _matchonly(ta, tb) or _matchonly(tb, ta)
if m:
--- a/tests/test-revset.t Fri Sep 01 12:13:17 2017 -0700
+++ b/tests/test-revset.t Mon Aug 28 14:49:00 2017 -0700
@@ -4321,3 +4321,147 @@
$ hg log -r 'successors(B+A)-contentdivergent()-obsolete()' -T '{desc}\n'
Z
+
+Test `draft() & ::x` optimization
+
+ $ hg init $TESTTMP/repo2
+ $ cd $TESTTMP/repo2
+ $ hg debugdrawdag <<'EOS'
+ > P5 S1
+ > | |
+ > S2 | D3
+ > \|/
+ > P4
+ > |
+ > P3 D2
+ > | |
+ > P2 D1
+ > |/
+ > P1
+ > |
+ > P0
+ > EOS
+ $ hg phase --public -r P5
+ $ hg phase --force --secret -r S1+S2
+ $ hg log -G -T '{rev} {desc} {phase}' -r 'sort(all(), topo, topo.firstbranch=P5)'
+ o 8 P5 public
+ |
+ | o 10 S1 secret
+ | |
+ | o 7 D3 draft
+ |/
+ | o 9 S2 secret
+ |/
+ o 6 P4 public
+ |
+ o 5 P3 public
+ |
+ o 3 P2 public
+ |
+ | o 4 D2 draft
+ | |
+ | o 2 D1 draft
+ |/
+ o 1 P1 public
+ |
+ o 0 P0 public
+
+ $ hg debugrevspec --verify -p analyzed -p optimized 'draft() & ::(((S1+D1+P5)-D3)+S2)'
+ * analyzed:
+ (and
+ (func
+ ('symbol', 'draft')
+ None)
+ (func
+ ('symbol', 'ancestors')
+ (or
+ (list
+ (and
+ (or
+ (list
+ ('symbol', 'S1')
+ ('symbol', 'D1')
+ ('symbol', 'P5')))
+ (not
+ ('symbol', 'D3')))
+ ('symbol', 'S2')))))
+ * optimized:
+ (func
+ ('symbol', '_phaseandancestors')
+ (list
+ ('symbol', 'draft')
+ (or
+ (list
+ (difference
+ (func
+ ('symbol', '_list')
+ ('string', 'S1\x00D1\x00P5'))
+ ('symbol', 'D3'))
+ ('symbol', 'S2')))))
+ $ hg debugrevspec --verify -p analyzed -p optimized 'secret() & ::9'
+ * analyzed:
+ (and
+ (func
+ ('symbol', 'secret')
+ None)
+ (func
+ ('symbol', 'ancestors')
+ ('symbol', '9')))
+ * optimized:
+ (func
+ ('symbol', '_phaseandancestors')
+ (list
+ ('symbol', 'secret')
+ ('symbol', '9')))
+ $ hg debugrevspec --verify -p analyzed -p optimized '7 & ( (not public()) & ::(tag()) )'
+ * analyzed:
+ (and
+ ('symbol', '7')
+ (and
+ (not
+ (func
+ ('symbol', 'public')
+ None))
+ (func
+ ('symbol', 'ancestors')
+ (func
+ ('symbol', 'tag')
+ None))))
+ * optimized:
+ (and
+ ('symbol', '7')
+ (func
+ ('symbol', '_phaseandancestors')
+ (list
+ ('symbol', '_notpublic')
+ (func
+ ('symbol', 'tag')
+ None))))
+ $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, 1)'
+ * optimized:
+ (and
+ (func
+ ('symbol', '_notpublic')
+ None)
+ (func
+ ('symbol', 'ancestors')
+ (list
+ (func
+ ('symbol', '_list')
+ ('string', 'S1\x00D2\x00P5'))
+ ('symbol', '1'))))
+ $ hg debugrevspec --verify -p optimized '(not public()) & ancestors(S1+D2+P5, depth=1)'
+ * optimized:
+ (and
+ (func
+ ('symbol', '_notpublic')
+ None)
+ (func
+ ('symbol', 'ancestors')
+ (list
+ (func
+ ('symbol', '_list')
+ ('string', 'S1\x00D2\x00P5'))
+ (keyvalue
+ ('symbol', 'depth')
+ ('symbol', '1')))))