I agree completely, and the --user-shutdown flag is the reason why I
kept it consistent with it. I'll be pushing this onto master then,
thanks for the review.

On 9 January 2017 at 11:40, 'Viktor Bachraty' via ganeti-devel
<[email protected]> wrote:
> Thanks for the patch! LGTM, just one minor comment. Personally I'd prefer
> '--enable-predictive-queue' rather than '--predictive-queue' as it makes it
> clear that it's a boolean flag (and it's consistent with the variable name).
> However probably it's better to keep the convention of existing flags (e.g.
> --user-shutdown).
>
> On Monday, January 9, 2017 at 11:18:01 AM UTC, Federico Pareschi wrote:
>>
>> This commit adds the enabled_predictive_queue cluster parameter that
>> allows the cluster to specify if the predictive scheduler should be used
>> or not when ordering ganeti jobs in the queue. It also adds the correct
>> command line flags for the cluster init and cluster modify ganeti jobs.
>>
>> As a default, if no option is specified upon cluster creation, the
>> predictive scheduler is automatically enabled.
>>
>> Signed-off-by: Federico Morg Pareschi <[email protected]>
>> ---
>>  lib/bootstrap.py               |  3 ++-
>>  lib/cli_opts.py                |  8 ++++++++
>>  lib/client/gnt_cluster.py      | 17 ++++++++++++++---
>>  lib/cmdlib/cluster/__init__.py |  3 +++
>>  lib/objects.py                 |  1 +
>>  man/gnt-cluster.rst            | 10 ++++++++++
>>  src/Ganeti/JQScheduler.hs      | 20 ++++++++++++++++----
>>  src/Ganeti/Objects.hs          |  1 +
>>  src/Ganeti/OpCodes.hs          |  1 +
>>  src/Ganeti/OpParams.hs         |  7 +++++++
>>  src/Ganeti/Query/Server.hs     |  2 ++
>>  test/hs/Test/Ganeti/OpCodes.hs |  1 +
>>  12 files changed, 66 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/bootstrap.py b/lib/bootstrap.py
>> index 2f98fdd66..c4d987151 100644
>> --- a/lib/bootstrap.py
>> +++ b/lib/bootstrap.py
>> @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint:
>> disable=R0913, R0914
>>                  use_external_mip_script=False, hv_state=None,
>> disk_state=None,
>>                  enabled_disk_templates=None, install_image=None,
>>                  zeroing_image=None, compression_tools=None,
>> -                enabled_user_shutdown=False):
>> +                enabled_user_shutdown=False,
>> enabled_predictive_queue=True):
>>    """Initialise the cluster.
>>
>>    @type candidate_pool_size: int
>> @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint:
>> disable=R0913, R0914
>>      enabled_user_shutdown=enabled_user_shutdown,
>>      ssh_key_type=ssh_key_type,
>>      ssh_key_bits=ssh_key_bits,
>> +    enabled_predictive_queue=enabled_predictive_queue,
>>      )
>>    master_node_config = objects.Node(name=hostname.name,
>>                                      primary_ip=hostname.ip,
>> diff --git a/lib/cli_opts.py b/lib/cli_opts.py
>> index f90594a4d..984536ad2 100644
>> --- a/lib/cli_opts.py
>> +++ b/lib/cli_opts.py
>> @@ -88,6 +88,7 @@ __all__ = [
>>    "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT",
>>    "ENABLED_DISK_TEMPLATES_OPT",
>>    "ENABLED_HV_OPT",
>> +  "ENABLED_PREDICTIVE_QUEUE_OPT",
>>    "ENABLED_USER_SHUTDOWN_OPT",
>>    "ERROR_CODES_OPT",
>>    "EXT_PARAMS_OPT",
>> @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT =
>> cli_option("--enabled-disk-templates",
>>                                               "disk templates",
>>                                          type="string", default=None)
>>
>> +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue",
>> +                                          default=None,
>> +
>> dest="enabled_predictive_queue",
>> +                                          help="Whether the predictive
>> queue is"
>> +                                               "enabled",
>> +                                          type="bool")
>> +
>>  ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown",
>>                                         default=None,
>>                                         dest="enabled_user_shutdown",
>> diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py
>> index cbb3af14f..15208cad7 100644
>> --- a/lib/client/gnt_cluster.py
>> +++ b/lib/client/gnt_cluster.py
>> @@ -309,6 +309,11 @@ def InitCluster(opts, args):
>>
>>    enabled_user_shutdown = bool(opts.enabled_user_shutdown)
>>
>> +  if opts.enabled_predictive_queue  is not None:
>> +    enabled_predictive_queue = bool(opts.enabled_predictive_queue)
>> +  else:
>> +    enabled_predictive_queue = True # Predictive queue is enabled by
>> default.
>> +
>>    if opts.ssh_key_type:
>>      ssh_key_type = opts.ssh_key_type
>>    else:
>> @@ -353,6 +358,7 @@ def InitCluster(opts, args):
>>                          enabled_user_shutdown=enabled_user_shutdown,
>>                          ssh_key_type=ssh_key_type,
>>                          ssh_key_bits=ssh_key_bits,
>> +
>> enabled_predictive_queue=enabled_predictive_queue,
>>                          )
>>    op = opcodes.OpClusterPostInit()
>>    SubmitOpCode(op, opts=opts)
>> @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args):
>>        ("modify ssh setup", result["modify_ssh_setup"]),
>>        ("ssh_key_type", result["ssh_key_type"]),
>>        ("ssh_key_bits", result["ssh_key_bits"]),
>> +      ("enabled predictive queue", result["enabled_predictive_queue"])
>>        ]),
>>
>>      ("Default node parameters",
>> @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args):
>>            opts.maint_balance_threshold is not None or
>>            opts.data_collector_interval or
>>            opts.diagnose_data_collector_filename is not None or
>> -          opts.enabled_data_collectors):
>> +          opts.enabled_data_collectors or
>> +          opts.enabled_predictive_queue is not None):
>>      ToStderr("Please give at least one of the parameters.")
>>      return 1
>>
>> @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args):
>>      maint_balance_threshold=opts.maint_balance_threshold,
>>      enabled_data_collectors=enabled_data_collectors,
>>      data_collector_interval=data_collector_interval,
>> -
>> diagnose_data_collector_filename=opts.diagnose_data_collector_filename
>> +
>> diagnose_data_collector_filename=opts.diagnose_data_collector_filename,
>> +    enabled_predictive_queue=opts.enabled_predictive_queue
>>      )
>>    return base.GetResult(None, opts, SubmitOrSend(op, opts))
>>
>> @@ -2506,6 +2515,7 @@ commands = {
>>       IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT,
>> INSTALL_IMAGE_OPT,
>>       ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
>>       ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
>> +     ENABLED_PREDICTIVE_QUEUE_OPT,
>>       ]
>>       + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
>>      "[opts...] <cluster_name>", "Initialises a new cluster
>> configuration"),
>> @@ -2591,7 +2601,8 @@ commands = {
>>       PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
>>       DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
>>       [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT,
>> MODIFY_ETCHOSTS_OPT,
>> -      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] +
>> +      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT,
>> +      ENABLED_PREDICTIVE_QUEUE_OPT] +
>>       INSTANCE_POLICY_OPTS +
>>       [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
>>        COMPRESSION_TOOLS_OPT] +
>> diff --git a/lib/cmdlib/cluster/__init__.py
>> b/lib/cmdlib/cluster/__init__.py
>> index 28370d90a..455c1de15 100644
>> --- a/lib/cmdlib/cluster/__init__.py
>> +++ b/lib/cmdlib/cluster/__init__.py
>> @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit):
>>        self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown
>>        ensure_kvmd = True
>>
>> +    if self.op.enabled_predictive_queue is not None:
>> +      self.cluster.enabled_predictive_queue =
>> self.op.enabled_predictive_queue
>> +
>>      def helper_os(aname, mods, desc):
>>        desc += " OS list"
>>        lst = getattr(self.cluster, aname)
>> diff --git a/lib/objects.py b/lib/objects.py
>> index 7e20fc2cb..df0494c27 100644
>> --- a/lib/objects.py
>> +++ b/lib/objects.py
>> @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject):
>>      "diagnose_data_collector_filename",
>>      "ssh_key_type",
>>      "ssh_key_bits",
>> +    "enabled_predictive_queue",
>>      ] + _TIMESTAMPS + _UUID
>>
>>    def UpgradeConfig(self):
>> diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst
>> index 0469f7598..261019db8 100644
>> --- a/man/gnt-cluster.rst
>> +++ b/man/gnt-cluster.rst
>> @@ -209,6 +209,7 @@ INIT
>>  | [\--user-shutdown {yes \| no}]
>>  | [\--ssh-key-type *type*]
>>  | [\--ssh-key-bits *bits*]
>> +| [\--predictive-queue {yes \| no}]
>>  | {*clustername*}
>>
>>  This commands is only run once initially on the first node of the
>> @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are
>> currently:
>>
>>  Ganeti defaults to using 2048-bit RSA keys.
>>
>> +The ``--predictive-queue`` option enables or disables the predictive
>> +queue algorithm for the job scheduler. If this option is not specified,
>> +Ganeti defaults to enabling the predictive scheduler.
>> +
>>  MASTER-FAILOVER
>>  ~~~~~~~~~~~~~~~
>>
>> @@ -751,6 +756,8 @@ MODIFY
>>  | [\--auto-balance-cluster {yes \| no }]
>>  | [\--auto-balance-threshold *score* ]
>>  | [\--diagnose-data-collector-filename *filename*]
>> +| [\--predictive-queue {yes \| no}]
>> +
>>
>>
>>  Modify the options for the cluster.
>> @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at
>> least 10 times that
>>  value, in which case all beneficial steps will be done if auto-balancing
>>  is enabled.
>>
>> +The ``--predictive-queue`` option enables or disables the predictive
>> +queue algorithm for the job scheduler.
>> +
>>  See **gnt-cluster init** for a description of ``--install-image`` and
>>  ``--zeroing-image``.
>>
>> diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs
>> index 5c79843a8..bfa78745f 100644
>> --- a/src/Ganeti/JQScheduler.hs
>> +++ b/src/Ganeti/JQScheduler.hs
>> @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue
>> clusterMaxRunningJobs 1
>>  getMaxTrackedJobs :: JQStatus -> IO Int
>>  getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1
>>
>> +-- | Get the boolean that specifies whether or not the predictive queue
>> +-- scheduler is enabled in the cluster. If the configuration is not
>> available,
>> +-- the predictive queue is enabled by default.
>> +getEnabledPredictiveQueue :: JQStatus -> IO Bool
>> +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue
>> True
>> +
>>  -- | Get the number of jobs currently running.
>>  getRQL :: JQStatus -> IO Int
>>  getRQL = liftM (length . qRunning) . readIORef . jqJobs
>> @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy
>> (compare `on` opWeight)
>>  -- pure function doing the scheduling.
>>  selectJobsToRun :: ConfigData
>>                  -> Int -- How many jobs are allowed to run at the same
>> time.
>> +                -> Bool -- If the predictive scheduler is enabled
>>                  -> Timestamp -- Current time
>>                  -> Set FilterRule -- Filter rules to respect for
>> scheduling
>>                  -> Queue
>>                  -> (Queue, [JobWithStat])
>> -selectJobsToRun cfg count currTime filters queue =
>> +selectJobsToRun cfg count isPredictive currTime filters queue =
>>    let n = count - length (qRunning queue) - length (qManipulated queue)
>> +      pickScheduler = if isPredictive
>> +                         then sortByStaticLocks cfg queue currTime
>> +                         else id
>>        chosen = take n
>>                 . jobFiltering queue filters
>>                 . reasonRateLimit queue
>>                 . sortBy (comparing (calcJobPriority . jJob))
>>                 . filter (jobEligible queue)
>> -               . sortByStaticLocks cfg queue currTime
>> +               . pickScheduler
>>                 $ qEnqueued queue
>>        remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue)
>> chosen
>>    in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen},
>> chosen)
>> @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do
>>
>>        -- Select the jobs to run.
>>        count <- getMaxRunningJobs qstate
>> -      chosen <- atomicModifyIORef (jqJobs qstate)
>> -                                  (selectJobsToRun cfg count ts filters)
>> +      isPredictive <- getEnabledPredictiveQueue qstate
>> +      let jobsToRun = selectJobsToRun cfg count isPredictive ts filters
>> +      chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun
>> +
>>        let jobs = map jJob chosen
>>        unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin
>>          $ map (show . fromJobId . qjId) jobs
>> diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs
>> index 5be8adfe5..572dc662f 100644
>> --- a/src/Ganeti/Objects.hs
>> +++ b/src/Ganeti/Objects.hs
>> @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $
>>        "diagnose_data_collector_filename"         [t| String
>> |]
>>    , simpleField "ssh_key_type"                   [t| SshKeyType
>> |]
>>    , simpleField "ssh_key_bits"                   [t| Int
>> |]
>> +  , simpleField "enabled_predictive_queue"       [t| Bool
>> |]
>>   ]
>>   ++ timeStampFields
>>   ++ uuidFields
>> diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs
>> index c4da480a8..811d59d66 100644
>> --- a/src/Ganeti/OpCodes.hs
>> +++ b/src/Ganeti/OpCodes.hs
>> @@ -271,6 +271,7 @@ $(genOpCode "OpCode"
>>       , pMaintdRoundDelay
>>       , pMaintdEnableBalancing
>>       , pMaintdBalancingThreshold
>> +     , pEnabledPredictiveQueue
>>       ],
>>       [])
>>    , ("OpClusterRedistConf",
>> diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs
>> index b5a561953..f35ae4513 100644
>> --- a/src/Ganeti/OpParams.hs
>> +++ b/src/Ganeti/OpParams.hs
>> @@ -321,6 +321,7 @@ module Ganeti.OpParams
>>    , pVerifyClutter
>>    , pLongSleep
>>    , pIsStrict
>> +  , pEnabledPredictiveQueue
>>    ) where
>>
>>  import Control.Monad (liftM, mplus)
>> @@ -2030,3 +2031,9 @@ pIsStrict =
>>    withDoc "Whether the operation is in strict mode or not." .
>>    defaultField [| True |] $
>>    simpleField "is_strict" [t| Bool |]
>> +
>> +pEnabledPredictiveQueue :: Field
>> +pEnabledPredictiveQueue =
>> +  withDoc "Whether the predictive queue is enabled in the cluster." .
>> +  optionalField $
>> +  simpleField "enabled_predictive_queue" [t| Bool |]
>> diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs
>> index aefe129c5..8cef6cc2b 100644
>> --- a/src/Ganeti/Query/Server.hs
>> +++ b/src/Ganeti/Query/Server.hs
>> @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo =
>>                 showJSON $ clusterModifySshSetup cluster)
>>              , ("ssh_key_type", showJSON $ clusterSshKeyType cluster)
>>              , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster)
>> +            , ("enabled_predictive_queue",
>> +               showJSON $ clusterEnabledPredictiveQueue cluster)
>>              ]
>>
>>    in case master of
>> diff --git a/test/hs/Test/Ganeti/OpCodes.hs
>> b/test/hs/Test/Ganeti/OpCodes.hs
>> index 48a468345..7d39b6619 100644
>> --- a/test/hs/Test/Ganeti/OpCodes.hs
>> +++ b/test/hs/Test/Ganeti/OpCodes.hs
>> @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg =
>>          <*> genMaybe (fromPositive <$> arbitrary) -- maintd round
>> interval
>>          <*> genMaybe arbitrary           -- enable maintd balancing
>>          <*> genMaybe arbitrary           -- maintd balancing threshold
>> +        <*> arbitrary                    -- enabled_predictive_queue
>>      "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf
>>      "OP_CLUSTER_ACTIVATE_MASTER_IP" ->
>>        pure OpCodes.OpClusterActivateMasterIp
>> --
>> 2.11.0.390.gc69c2f50cf-goog
>>
>

Reply via email to