changeset 49447:6dbe74669eba

sort-revset: introduce a `random` variant This new `sort` variant allows to shuffle any revset. It also allow for randomly picking element using `first`.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 25 Jul 2022 05:30:06 +0200
parents cd21f2b4226f
children cfff73cab721
files mercurial/revset.py tests/test-revset.t
diffstat 2 files changed, 60 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revset.py	Thu Aug 25 05:12:25 2022 +0200
+++ b/mercurial/revset.py	Mon Jul 25 05:30:06 2022 +0200
@@ -7,7 +7,10 @@
 
 
 import binascii
+import functools
+import random
 import re
+import sys
 
 from .i18n import _
 from .pycompat import getattr
@@ -2347,6 +2350,15 @@
     return subset & s.filter(filter, condrepr=b'<roots>')
 
 
+MAXINT = sys.maxsize
+MININT = -MAXINT - 1
+
+
+def pick_random(c, gen=random):
+    # exists as its own function to make it possible to overwrite the seed
+    return gen.randint(MININT, MAXINT)
+
+
 _sortkeyfuncs = {
     b'rev': scmutil.intrev,
     b'branch': lambda c: c.branch(),
@@ -2355,12 +2367,17 @@
     b'author': lambda c: c.user(),
     b'date': lambda c: c.date()[0],
     b'node': scmutil.binnode,
+    b'random': pick_random,
 }
 
 
 def _getsortargs(x):
     """Parse sort options into (set, [(key, reverse)], opts)"""
-    args = getargsdict(x, b'sort', b'set keys topo.firstbranch')
+    args = getargsdict(
+        x,
+        b'sort',
+        b'set keys topo.firstbranch random.seed',
+    )
     if b'set' not in args:
         # i18n: "sort" is a keyword
         raise error.ParseError(_(b'sort requires one or two arguments'))
@@ -2400,6 +2417,20 @@
                 )
             )
 
+    if b'random.seed' in args:
+        if any(k == b'random' for k, reverse in keyflags):
+            s = args[b'random.seed']
+            seed = getstring(s, _(b"random.seed must be a string"))
+            opts[b'random.seed'] = seed
+        else:
+            # i18n: "random" and "random.seed" are keywords
+            raise error.ParseError(
+                _(
+                    b'random.seed can only be used '
+                    b'when using the random sort key'
+                )
+            )
+
     return args[b'set'], keyflags, opts
 
 
@@ -2419,11 +2450,14 @@
     - ``date`` for the commit date
     - ``topo`` for a reverse topographical sort
     - ``node`` the nodeid of the revision
+    - ``random`` randomly shuffle revisions
 
     The ``topo`` sort order cannot be combined with other sort keys. This sort
     takes one optional argument, ``topo.firstbranch``, which takes a revset that
     specifies what topographical branches to prioritize in the sort.
 
+    The ``random`` sort takes one optional ``random.seed`` argument to control
+    the pseudo-randomness of the result.
     """
     s, keyflags, opts = _getsortargs(x)
     revs = getset(repo, subset, s, order)
@@ -2448,7 +2482,12 @@
     # sort() is guaranteed to be stable
     ctxs = [repo[r] for r in revs]
     for k, reverse in reversed(keyflags):
-        ctxs.sort(key=_sortkeyfuncs[k], reverse=reverse)
+        func = _sortkeyfuncs[k]
+        if k == b'random' and b'random.seed' in opts:
+            seed = opts[b'random.seed']
+            r = random.Random(seed)
+            func = functools.partial(func, gen=r)
+        ctxs.sort(key=func, reverse=reverse)
     return baseset([c.rev() for c in ctxs])
 
 
--- a/tests/test-revset.t	Thu Aug 25 05:12:25 2022 +0200
+++ b/tests/test-revset.t	Mon Jul 25 05:30:06 2022 +0200
@@ -2974,6 +2974,25 @@
   1 b11  m12  u111 112 7200
   0 b12  m111 u112 111 10800
 
+random sort
+
+  $ hg log --rev 'sort(all(), "random")' | wc -l
+  \s*8 (re)
+  $ hg log --rev 'sort(all(), "-random")' | wc -l
+  \s*8 (re)
+  $ hg log --rev 'sort(all(), "random", random.seed=celeste)'
+  6 b111 t2   tu   130 0
+  7 b111 t3   tu   130 0
+  4 b111 m112 u111 110 14400
+  3 b112 m111 u11  120 0
+  5 b111 t1   tu   130 0
+  0 b12  m111 u112 111 10800
+  1 b11  m12  u111 112 7200
+  2 b111 m11  u12  111 3600
+  $ hg log --rev 'first(sort(all(), "random", random.seed=celeste))'
+  6 b111 t2   tu   130 0
+
+
 topographical sorting can't be combined with other sort keys, and you can't
 use the topo.firstbranch option when topo sort is not active: