Thanks for the patch! LGTM, just one minor comment. Personally I'd prefer 
'--enable-predictive-queue' rather than '--predictive-queue' as it makes it 
clear that it's a boolean flag (and it's consistent with the variable 
name). However probably it's better to keep the convention of existing 
flags (e.g. --user-shutdown).

On Monday, January 9, 2017 at 11:18:01 AM UTC, Federico Pareschi wrote:
>
> This commit adds the enabled_predictive_queue cluster parameter that 
> allows the cluster to specify if the predictive scheduler should be used 
> or not when ordering ganeti jobs in the queue. It also adds the correct 
> command line flags for the cluster init and cluster modify ganeti jobs. 
>
> As a default, if no option is specified upon cluster creation, the 
> predictive scheduler is automatically enabled. 
>
> Signed-off-by: Federico Morg Pareschi <[email protected]> 
> --- 
>  lib/bootstrap.py               |  3 ++- 
>  lib/cli_opts.py                |  8 ++++++++ 
>  lib/client/gnt_cluster.py      | 17 ++++++++++++++--- 
>  lib/cmdlib/cluster/__init__.py |  3 +++ 
>  lib/objects.py                 |  1 + 
>  man/gnt-cluster.rst            | 10 ++++++++++ 
>  src/Ganeti/JQScheduler.hs      | 20 ++++++++++++++++---- 
>  src/Ganeti/Objects.hs          |  1 + 
>  src/Ganeti/OpCodes.hs          |  1 + 
>  src/Ganeti/OpParams.hs         |  7 +++++++ 
>  src/Ganeti/Query/Server.hs     |  2 ++ 
>  test/hs/Test/Ganeti/OpCodes.hs |  1 + 
>  12 files changed, 66 insertions(+), 8 deletions(-) 
>
> diff --git a/lib/bootstrap.py b/lib/bootstrap.py 
> index 2f98fdd66..c4d987151 100644 
> --- a/lib/bootstrap.py 
> +++ b/lib/bootstrap.py 
> @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: 
> disable=R0913, R0914 
>                  use_external_mip_script=False, hv_state=None, 
> disk_state=None, 
>                  enabled_disk_templates=None, install_image=None, 
>                  zeroing_image=None, compression_tools=None, 
> -                enabled_user_shutdown=False): 
> +                enabled_user_shutdown=False, 
> enabled_predictive_queue=True): 
>    """Initialise the cluster. 
>   
>    @type candidate_pool_size: int 
> @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: 
> disable=R0913, R0914 
>      enabled_user_shutdown=enabled_user_shutdown, 
>      ssh_key_type=ssh_key_type, 
>      ssh_key_bits=ssh_key_bits, 
> +    enabled_predictive_queue=enabled_predictive_queue, 
>      ) 
>    master_node_config = objects.Node(name=hostname.name, 
>                                      primary_ip=hostname.ip, 
> diff --git a/lib/cli_opts.py b/lib/cli_opts.py 
> index f90594a4d..984536ad2 100644 
> --- a/lib/cli_opts.py 
> +++ b/lib/cli_opts.py 
> @@ -88,6 +88,7 @@ __all__ = [ 
>    "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT", 
>    "ENABLED_DISK_TEMPLATES_OPT", 
>    "ENABLED_HV_OPT", 
> +  "ENABLED_PREDICTIVE_QUEUE_OPT", 
>    "ENABLED_USER_SHUTDOWN_OPT", 
>    "ERROR_CODES_OPT", 
>    "EXT_PARAMS_OPT", 
> @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT = 
> cli_option("--enabled-disk-templates", 
>                                               "disk templates", 
>                                          type="string", default=None) 
>   
> +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue", 
> +                                          default=None, 
> +                                         
>  dest="enabled_predictive_queue", 
> +                                          help="Whether the predictive 
> queue is" 
> +                                               "enabled", 
> +                                          type="bool") 
> + 
>  ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown", 
>                                         default=None, 
>                                         dest="enabled_user_shutdown", 
> diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py 
> index cbb3af14f..15208cad7 100644 
> --- a/lib/client/gnt_cluster.py 
> +++ b/lib/client/gnt_cluster.py 
> @@ -309,6 +309,11 @@ def InitCluster(opts, args): 
>   
>    enabled_user_shutdown = bool(opts.enabled_user_shutdown) 
>   
> +  if opts.enabled_predictive_queue  is not None: 
> +    enabled_predictive_queue = bool(opts.enabled_predictive_queue) 
> +  else: 
> +    enabled_predictive_queue = True # Predictive queue is enabled by 
> default. 
> + 
>    if opts.ssh_key_type: 
>      ssh_key_type = opts.ssh_key_type 
>    else: 
> @@ -353,6 +358,7 @@ def InitCluster(opts, args): 
>                          enabled_user_shutdown=enabled_user_shutdown, 
>                          ssh_key_type=ssh_key_type, 
>                          ssh_key_bits=ssh_key_bits, 
> +                       
>  enabled_predictive_queue=enabled_predictive_queue, 
>                          ) 
>    op = opcodes.OpClusterPostInit() 
>    SubmitOpCode(op, opts=opts) 
> @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args): 
>        ("modify ssh setup", result["modify_ssh_setup"]), 
>        ("ssh_key_type", result["ssh_key_type"]), 
>        ("ssh_key_bits", result["ssh_key_bits"]), 
> +      ("enabled predictive queue", result["enabled_predictive_queue"]) 
>        ]), 
>   
>      ("Default node parameters", 
> @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args): 
>            opts.maint_balance_threshold is not None or 
>            opts.data_collector_interval or 
>            opts.diagnose_data_collector_filename is not None or 
> -          opts.enabled_data_collectors): 
> +          opts.enabled_data_collectors or 
> +          opts.enabled_predictive_queue is not None): 
>      ToStderr("Please give at least one of the parameters.") 
>      return 1 
>   
> @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args): 
>      maint_balance_threshold=opts.maint_balance_threshold, 
>      enabled_data_collectors=enabled_data_collectors, 
>      data_collector_interval=data_collector_interval, 
> -   
>  diagnose_data_collector_filename=opts.diagnose_data_collector_filename 
> +   
>  diagnose_data_collector_filename=opts.diagnose_data_collector_filename, 
> +    enabled_predictive_queue=opts.enabled_predictive_queue 
>      ) 
>    return base.GetResult(None, opts, SubmitOrSend(op, opts)) 
>   
> @@ -2506,6 +2515,7 @@ commands = { 
>       IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, 
> INSTALL_IMAGE_OPT, 
>       ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT, 
>       ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT, 
> +     ENABLED_PREDICTIVE_QUEUE_OPT, 
>       ] 
>       + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, 
>      "[opts...] <cluster_name>", "Initialises a new cluster 
> configuration"), 
> @@ -2591,7 +2601,8 @@ commands = { 
>       PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, 
>       DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + 
>       [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, 
> MODIFY_ETCHOSTS_OPT, 
> -      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] + 
> +      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT, 
> +      ENABLED_PREDICTIVE_QUEUE_OPT] + 
>       INSTANCE_POLICY_OPTS + 
>       [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT, 
>        COMPRESSION_TOOLS_OPT] + 
> diff --git a/lib/cmdlib/cluster/__init__.py 
> b/lib/cmdlib/cluster/__init__.py 
> index 28370d90a..455c1de15 100644 
> --- a/lib/cmdlib/cluster/__init__.py 
> +++ b/lib/cmdlib/cluster/__init__.py 
> @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit): 
>        self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown 
>        ensure_kvmd = True 
>   
> +    if self.op.enabled_predictive_queue is not None: 
> +      self.cluster.enabled_predictive_queue = 
> self.op.enabled_predictive_queue 
> + 
>      def helper_os(aname, mods, desc): 
>        desc += " OS list" 
>        lst = getattr(self.cluster, aname) 
> diff --git a/lib/objects.py b/lib/objects.py 
> index 7e20fc2cb..df0494c27 100644 
> --- a/lib/objects.py 
> +++ b/lib/objects.py 
> @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject): 
>      "diagnose_data_collector_filename", 
>      "ssh_key_type", 
>      "ssh_key_bits", 
> +    "enabled_predictive_queue", 
>      ] + _TIMESTAMPS + _UUID 
>   
>    def UpgradeConfig(self): 
> diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst 
> index 0469f7598..261019db8 100644 
> --- a/man/gnt-cluster.rst 
> +++ b/man/gnt-cluster.rst 
> @@ -209,6 +209,7 @@ INIT 
>  | [\--user-shutdown {yes \| no}] 
>  | [\--ssh-key-type *type*] 
>  | [\--ssh-key-bits *bits*] 
> +| [\--predictive-queue {yes \| no}] 
>  | {*clustername*} 
>   
>  This commands is only run once initially on the first node of the 
> @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are 
> currently: 
>   
>  Ganeti defaults to using 2048-bit RSA keys. 
>   
> +The ``--predictive-queue`` option enables or disables the predictive 
> +queue algorithm for the job scheduler. If this option is not specified, 
> +Ganeti defaults to enabling the predictive scheduler. 
> + 
>  MASTER-FAILOVER 
>  ~~~~~~~~~~~~~~~ 
>   
> @@ -751,6 +756,8 @@ MODIFY 
>  | [\--auto-balance-cluster {yes \| no }] 
>  | [\--auto-balance-threshold *score* ] 
>  | [\--diagnose-data-collector-filename *filename*] 
> +| [\--predictive-queue {yes \| no}] 
> + 
>   
>   
>  Modify the options for the cluster. 
> @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at 
> least 10 times that 
>  value, in which case all beneficial steps will be done if auto-balancing 
>  is enabled. 
>   
> +The ``--predictive-queue`` option enables or disables the predictive 
> +queue algorithm for the job scheduler. 
> + 
>  See **gnt-cluster init** for a description of ``--install-image`` and 
>  ``--zeroing-image``. 
>   
> diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs 
> index 5c79843a8..bfa78745f 100644 
> --- a/src/Ganeti/JQScheduler.hs 
> +++ b/src/Ganeti/JQScheduler.hs 
> @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue 
> clusterMaxRunningJobs 1 
>  getMaxTrackedJobs :: JQStatus -> IO Int 
>  getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1 
>   
> +-- | Get the boolean that specifies whether or not the predictive queue 
> +-- scheduler is enabled in the cluster. If the configuration is not 
> available, 
> +-- the predictive queue is enabled by default. 
> +getEnabledPredictiveQueue :: JQStatus -> IO Bool 
> +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue 
> True 
> + 
>  -- | Get the number of jobs currently running. 
>  getRQL :: JQStatus -> IO Int 
>  getRQL = liftM (length . qRunning) . readIORef . jqJobs 
> @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy 
> (compare `on` opWeight) 
>  -- pure function doing the scheduling. 
>  selectJobsToRun :: ConfigData 
>                  -> Int -- How many jobs are allowed to run at the same 
> time. 
> +                -> Bool -- If the predictive scheduler is enabled 
>                  -> Timestamp -- Current time 
>                  -> Set FilterRule -- Filter rules to respect for 
> scheduling 
>                  -> Queue 
>                  -> (Queue, [JobWithStat]) 
> -selectJobsToRun cfg count currTime filters queue = 
> +selectJobsToRun cfg count isPredictive currTime filters queue = 
>    let n = count - length (qRunning queue) - length (qManipulated queue) 
> +      pickScheduler = if isPredictive 
> +                         then sortByStaticLocks cfg queue currTime 
> +                         else id 
>        chosen = take n 
>                 . jobFiltering queue filters 
>                 . reasonRateLimit queue 
>                 . sortBy (comparing (calcJobPriority . jJob)) 
>                 . filter (jobEligible queue) 
> -               . sortByStaticLocks cfg queue currTime 
> +               . pickScheduler 
>                 $ qEnqueued queue 
>        remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue) 
> chosen 
>    in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen}, 
> chosen) 
> @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do 
>   
>        -- Select the jobs to run. 
>        count <- getMaxRunningJobs qstate 
> -      chosen <- atomicModifyIORef (jqJobs qstate) 
> -                                  (selectJobsToRun cfg count ts filters) 
> +      isPredictive <- getEnabledPredictiveQueue qstate 
> +      let jobsToRun = selectJobsToRun cfg count isPredictive ts filters 
> +      chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun 
> + 
>        let jobs = map jJob chosen 
>        unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin 
>          $ map (show . fromJobId . qjId) jobs 
> diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs 
> index 5be8adfe5..572dc662f 100644 
> --- a/src/Ganeti/Objects.hs 
> +++ b/src/Ganeti/Objects.hs 
> @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $ 
>        "diagnose_data_collector_filename"         [t| String               
>    |] 
>    , simpleField "ssh_key_type"                   [t| SshKeyType           
>    |] 
>    , simpleField "ssh_key_bits"                   [t| Int                 
>     |] 
> +  , simpleField "enabled_predictive_queue"       [t| Bool                 
>    |] 
>   ] 
>   ++ timeStampFields 
>   ++ uuidFields 
> diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs 
> index c4da480a8..811d59d66 100644 
> --- a/src/Ganeti/OpCodes.hs 
> +++ b/src/Ganeti/OpCodes.hs 
> @@ -271,6 +271,7 @@ $(genOpCode "OpCode" 
>       , pMaintdRoundDelay 
>       , pMaintdEnableBalancing 
>       , pMaintdBalancingThreshold 
> +     , pEnabledPredictiveQueue 
>       ], 
>       []) 
>    , ("OpClusterRedistConf", 
> diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs 
> index b5a561953..f35ae4513 100644 
> --- a/src/Ganeti/OpParams.hs 
> +++ b/src/Ganeti/OpParams.hs 
> @@ -321,6 +321,7 @@ module Ganeti.OpParams 
>    , pVerifyClutter 
>    , pLongSleep 
>    , pIsStrict 
> +  , pEnabledPredictiveQueue 
>    ) where 
>   
>  import Control.Monad (liftM, mplus) 
> @@ -2030,3 +2031,9 @@ pIsStrict = 
>    withDoc "Whether the operation is in strict mode or not." . 
>    defaultField [| True |] $ 
>    simpleField "is_strict" [t| Bool |] 
> + 
> +pEnabledPredictiveQueue :: Field 
> +pEnabledPredictiveQueue = 
> +  withDoc "Whether the predictive queue is enabled in the cluster." . 
> +  optionalField $ 
> +  simpleField "enabled_predictive_queue" [t| Bool |] 
> diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs 
> index aefe129c5..8cef6cc2b 100644 
> --- a/src/Ganeti/Query/Server.hs 
> +++ b/src/Ganeti/Query/Server.hs 
> @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo = 
>                 showJSON $ clusterModifySshSetup cluster) 
>              , ("ssh_key_type", showJSON $ clusterSshKeyType cluster) 
>              , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster) 
> +            , ("enabled_predictive_queue", 
> +               showJSON $ clusterEnabledPredictiveQueue cluster) 
>              ] 
>   
>    in case master of 
> diff --git a/test/hs/Test/Ganeti/OpCodes.hs 
> b/test/hs/Test/Ganeti/OpCodes.hs 
> index 48a468345..7d39b6619 100644 
> --- a/test/hs/Test/Ganeti/OpCodes.hs 
> +++ b/test/hs/Test/Ganeti/OpCodes.hs 
> @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg = 
>          <*> genMaybe (fromPositive <$> arbitrary) -- maintd round 
> interval 
>          <*> genMaybe arbitrary           -- enable maintd balancing 
>          <*> genMaybe arbitrary           -- maintd balancing threshold 
> +        <*> arbitrary                    -- enabled_predictive_queue 
>      "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf 
>      "OP_CLUSTER_ACTIVATE_MASTER_IP" -> 
>        pure OpCodes.OpClusterActivateMasterIp 
> -- 
> 2.11.0.390.gc69c2f50cf-goog 
>
>

Reply via email to