revset: optimize not public revset
authorLaurent Charignon <lcharignon@fb.com>
Fri, 24 Apr 2015 14:30:30 -0700
changeset 25191 08d1ef09ed37
parent 25190 22438cfd11b5
child 25195 472a685a4961
revset: optimize not public revset This patvh speeds up the computation of the not public() changeset and incidentally speed up the computation of divergents() changeset on our big repo by 100x from 50% to 0.5% of the time spent in smartlog with evolve. In this patch we optimize not public() to _notpublic() (new revset) and use the work on phaseset (from the previous commit) to be able to compute _notpublic() quickly. We use a non-lazy approach making the assumption the number of notpublic change will not be in the order of magnitude of the repo size. Adopting a lazy approach gives a speedup of 5x (vs 100x) only due to the overhead of the code for lazy generation.
mercurial/revset.py
--- a/mercurial/revset.py	Wed Apr 01 11:17:17 2015 -0700
+++ b/mercurial/revset.py	Fri Apr 24 14:30:30 2015 -0700
@@ -1478,6 +1478,22 @@
     except error.RepoLookupError:
         return baseset()
 
+def _notpublic(repo, subset, x):
+    """``_notpublic()``
+    Changeset not in public phase."""
+    # i18n: "public" is a keyword
+    getargs(x, 0, 0, _("_notpublic takes no arguments"))
+    if repo._phasecache._phasesets:
+        s = set()
+        for u in repo._phasecache._phasesets[1:]:
+            s.update(u)
+        return subset & s
+    else:
+        phase = repo._phasecache.phase
+        target = phases.public
+        condition = lambda r: phase(repo, r) != target
+        return subset.filter(condition, cache=False)
+
 def public(repo, subset, x):
     """``public()``
     Changeset in public phase."""
@@ -1984,6 +2000,7 @@
     "parents": parents,
     "present": present,
     "public": public,
+    "_notpublic": _notpublic,
     "remote": remote,
     "removes": removes,
     "rev": rev,
@@ -2058,6 +2075,7 @@
     "parents",
     "present",
     "public",
+    "_notpublic",
     "remote",
     "removes",
     "rev",
@@ -2149,8 +2167,14 @@
             wb, wa = wa, wb
         return max(wa, wb), (op, ta, tb)
     elif op == 'not':
-        o = optimize(x[1], not small)
-        return o[0], (op, o[1])
+        # Optimize not public() to _notpublic() because we have a fast version
+        if x[1] == ('func', ('symbol', 'public'), None):
+            newsym =  ('func', ('symbol', '_notpublic'), None)
+            o = optimize(newsym, not small)
+            return o[0], o[1]
+        else:
+            o = optimize(x[1], not small)
+            return o[0], (op, o[1])
     elif op == 'parentpost':
         o = optimize(x[1], small)
         return o[0], (op, o[1])