Hii,

We have two different cluster with slurm-13.12.0-0pre4 and slurm-2.4.1-1..


While I am able to see the starttime of a pending job with scontrol command on the cluster with slurm-2.4.1-1, scontrol shows "StartTime=Unknown" on cluster with slurm-13.12....

For both clusters we use "SchedulerType=sched/backfill" and all users have to define "--time" parameters in their batch.


Do I have missing something on our configuration?

Thanks...

Configuration data as of 2014-12-11T14:03:40
AccountingStorageBackupHost = (null)
AccountingStorageEnforce = associations,limits
AccountingStorageHost   = slurmcontroller3
AccountingStorageLoc    = N/A
AccountingStoragePort   = 6819
AccountingStorageType   = accounting_storage/slurmdbd
AccountingStorageUser   = N/A
AccountingStoreJobComment = YES
AcctGatherEnergyType    = acct_gather_energy/none
AcctGatherFilesystemType = acct_gather_filesystem/none
AcctGatherInfinibandType = acct_gather_infiniband/none
AcctGatherNodeFreq      = 0 sec
AcctGatherProfileType   = acct_gather_profile/none
AuthType                = auth/munge
BackupAddr              = (null)
BackupController        = (null)
BatchStartTimeout       = 10 sec
BOOT_TIME               = 2014-12-08T16:43:29
CacheGroups             = 1
CheckpointType          = checkpoint/blcr
ClusterName             = linux
CompleteWait            = 0 sec
ControlAddr             = slurmcontroller3
ControlMachine          = slurmcontroller3
CryptoType              = crypto/munge
DebugFlags              = NO_CONF_HASH
DefMemPerNode           = UNLIMITED
DisableRootJobs         = NO
DynAllocPort            = 0
EnforcePartLimits       = YES
Epilog                  = (null)
EpilogMsgTime           = 2000 usec
EpilogSlurmctld         = (null)
ExtSensorsType          = ext_sensors/none
ExtSensorsFreq          = 0 sec
FairShareDampeningFactor = 1
FastSchedule            = 1
FirstJobId              = 100000
GetEnvTimeout           = 2 sec
GresTypes               = gpu
GroupUpdateForce        = 0
GroupUpdateTime         = 600 sec
HASH_VAL                = Match
HealthCheckInterval     = 0 sec
HealthCheckNodeState    = ANY
HealthCheckProgram      = (null)
InactiveLimit           = 0 sec
JobAcctGatherFrequency  = 30
JobAcctGatherType       = jobacct_gather/linux
JobAcctGatherParams     = (null)
JobCheckpointDir        = /tmp/slurmcheckpoint
JobCompHost             = localhost
JobCompLoc              = /var/log/slurm/job_completions
JobCompPort             = 0
JobCompType             = jobcomp/filetxt
JobCompUser             = root
JobContainerPlugin      = job_container/none
JobCredentialPrivateKey = (null)
JobCredentialPublicCertificate = (null)
JobFileAppend           = 0
JobRequeue              = 1
JobSubmitPlugins        = lua
KeepAliveTime           = SYSTEM_DEFAULT
KillOnBadExit           = 0
KillWait                = 30 sec
LaunchType              = launch/slurm
Licenses                = (null)
LicensesUsed            = (null)
MailProg                = /bin/mail
MaxArraySize            = 65000
MaxJobCount             = 1000000
MaxJobId                = 4294901760
MaxMemPerNode           = UNLIMITED
MaxStepCount            = 40000
MaxTasksPerNode         = 128
MessageTimeout          = 10 sec
MinJobAge               = 300 sec
MpiDefault              = none
MpiParams               = (null)
NEXT_JOB_ID             = 419259
OverTimeLimit           = 0 min
PluginDir               = /usr/lib64/slurm
PlugStackConfig         = /etc/slurm/plugstack.conf
PreemptMode             = GANG,SUSPEND
PreemptType             = preempt/partition_prio
PriorityDecayHalfLife   = 00:00:00
PriorityCalcPeriod      = 00:05:00
PriorityFavorSmall      = 1
PriorityFlags           = 0
PriorityMaxAge          = 14-00:00:00
PriorityUsageResetPeriod = NONE
PriorityType            = priority/multifactor
PriorityWeightAge       = 1000
PriorityWeightFairShare = 0
PriorityWeightJobSize   = 1000
PriorityWeightPartition = 1000
PriorityWeightQOS       = 10000
PrivateData             = jobs
ProctrackType           = proctrack/cgroup
Prolog                  = (null)
PrologSlurmctld         = (null)
PropagatePrioProcess    = 0
PropagateResourceLimits = (null)
PropagateResourceLimitsExcept = MEMLOCK
RebootProgram           = (null)
ReconfigFlags           = (null)
ResumeProgram           = (null)
ResumeRate              = 300 nodes/min
ResumeTimeout           = 60 sec
ResvEpilog              = (null)
ResvOverRun             = 0 min
ResvProlog              = (null)
ReturnToService         = 2
SallocDefaultCommand    = (null)
SchedulerParameters     = (null)
SchedulerPort           = 7321
SchedulerRootFilter     = 1
SchedulerTimeSlice      = 30 sec
SchedulerType           = sched/backfill
SelectType              = select/cons_res
SelectTypeParameters    = CR_CPU_MEMORY
SlurmUser               = root(0)
SlurmctldDebug          = debug5
SlurmctldLogFile        = /var/log/slurm/slurmctld.log
SlurmSchedLogFile       = (null)
SlurmctldPort           = 6816-6817
SlurmctldTimeout        = 300 sec
SlurmdDebug             = info
SlurmdLogFile           = /var/log/slurm/slurmd.log
SlurmdPidFile           = /var/run/slurmd.pid
SlurmdPlugstack         = (null)
SlurmdPort              = 6818
SlurmdSpoolDir          = /tmp/slurmd
SlurmdTimeout           = 300 sec
SlurmdUser              = root(0)
SlurmSchedLogLevel      = 0
SlurmctldPidFile        = /var/run/slurmctld.pid
SlurmctldPlugstack      = (null)
SLURM_CONF              = /etc/slurm/slurm.conf
SLURM_VERSION           = 13.12.0-0pre4
SrunEpilog              = (null)
SrunProlog              = (null)
StateSaveLocation       = /slurm.state
SuspendExcNodes         = (null)
SuspendExcParts         = (null)
SuspendProgram          = (null)
SuspendRate             = 60 nodes/min
SuspendTime             = NONE
SuspendTimeout          = 30 sec
SwitchType              = switch/none
TaskEpilog              = (null)
TaskPlugin              = task/cgroup
TaskPluginParam         = (null type)
TaskProlog              = (null)
TmpFS                   = /tmp
TopologyPlugin          = topology/none
TrackWCKey              = 0
TreeWidth               = 50
UsePam                  = 0
UnkillableStepProgram   = (null)
UnkillableStepTimeout   = 60 sec
VSizeFactor             = 0 percent
WaitTime                = 0 sec

Reply via email to