changeset 23814:6a5877a73141

setdiscovery: drop the 'always' argument to '_updatesample' This argument exists because of the complex code flow in '_takequicksample'. It first gets the list of heads and then calls '_updatesample' on an empty initial sample and a size limit matching the differences between the number of heads and the target sample size. Finally the heads and the sample from '_updatesample' were added. To ensure this addition result had the exact target length, the code had to ensure no elements from the heads were added to the '_updatesample' content and therefore was passing this "always included set of heads". Instead we can just update the initial heads sample directly and use the final target size as target size for the update. This removes the need for this 'always' parameter to the '_updatesample' function The test are affected because different set building order results in different random sampling.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Wed, 07 Jan 2015 10:32:17 -0800
parents 932f814bf016
children 31e75a362d44
files mercurial/setdiscovery.py
diffstat 1 files changed, 9 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/setdiscovery.py	Wed Jan 07 17:28:51 2015 -0800
+++ b/mercurial/setdiscovery.py	Wed Jan 07 10:32:17 2015 -0800
@@ -45,7 +45,7 @@
 import random
 import util, dagutil
 
-def _updatesample(dag, nodes, sample, always, quicksamplesize=0):
+def _updatesample(dag, nodes, sample, quicksamplesize=0):
     """update an existing sample to match the expected size
 
     The sample is updated with nodes exponentially distant from each head of the
@@ -58,7 +58,6 @@
     :dag: a dag object from dagutil
     :nodes:  set of nodes we want to discover (if None, assume the whole dag)
     :sample: a sample to update
-    :always: set of notable nodes that will be part of the sample anyway
     :quicksamplesize: optional target size of the sample"""
     # if nodes is empty we scan the entire graph
     if nodes:
@@ -77,10 +76,9 @@
         if d > factor:
             factor *= 2
         if d == factor:
-            if curr not in always: # need this check for the early exit below
-                sample.add(curr)
-                if quicksamplesize and (len(sample) >= quicksamplesize):
-                    return
+            sample.add(curr)
+            if quicksamplesize and (len(sample) >= quicksamplesize):
+                return
         seen.add(curr)
         for p in dag.parents(curr):
             if not nodes or p in nodes:
@@ -100,18 +98,17 @@
     always, sample, desiredlen = _setupsample(dag, nodes, size)
     if sample is None:
         return always
-    _updatesample(dag, None, sample, always, quicksamplesize=desiredlen)
-    sample.update(always)
+    sample = always
+    _updatesample(dag, None, sample, quicksamplesize=size)
     return sample
 
 def _takefullsample(dag, nodes, size):
-    sample = always = dag.headsetofconnecteds(nodes)
+    sample = dag.headsetofconnecteds(nodes)
     # update from heads
-    _updatesample(dag, nodes, sample, always)
+    _updatesample(dag, nodes, sample)
     # update from roots
-    _updatesample(dag.inverse(), nodes, sample, always)
+    _updatesample(dag.inverse(), nodes, sample)
     assert sample
-    sample.update(always)
     sample = _limitsample(sample, size)
     if len(sample) < size:
         more = size - len(sample)