[slurm-dev] scheduling multiple jobs on nodes with multiple GPUs

Jan Dettmer Sat, 12 Sep 2015 13:51:21 -0700

Hi, 

I am new to slurm, so I apologize if I am missing something obvious.


I have set up a rocks cluster with 8 GPU nodes, each node has a Nvidia K80 GPU 
which I have configures with a gres.conf file. I can submit jobs to the nodes 
and they run fine. However, each K80 GPU registers as two GPUs and I have this 
configured in the gres.conf with:

NodeName=compute-0-[0-7] Name=gpu File=/dev/nvidia0 CPUs=0,1,2,3,4,5,6,7,8,9
NodeName=compute-0-[0-7] Name=gpu File=/dev/nvidia1 
CPUs=10,11,12,13,14,15,16,17,18,19

My understanding is that this should allow each node to have two jobs with use 
1 GPU each. However, when I submit with 

srun --mpi=pmi2 --gres=gpu:1 -N1 -n2 --shared prjmh_temper_cuda_buck > ./out.log

Only one jobs per GPU gets scheduled. I must be doing something wrong… Any help 
would be greatly appreciated.

Thanks, Jan

$ scontrol show job 80
JobId=80 JobName=run_CUDA_poly_s04
   UserId=jorgeq(502) GroupId=jorgeq(502)
   Priority=10052 Nice=0 Account=(null) QOS=normal WCKey=*
   JobState=RUNNING Reason=None Dependency=(null)
   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
   RunTime=3-21:34:59 TimeLimit=7-00:00:00 TimeMin=N/A
   SubmitTime=2015-09-08T15:55:16 EligibleTime=2015-09-08T15:55:16
   StartTime=2015-09-08T15:55:16 EndTime=2015-09-15T15:55:16
   PreemptTime=None SuspendTime=None SecsPreSuspend=0
   Partition=CLUSTER AllocNode:Sid=kispiox:31488
   ReqNodeList=(null) ExcNodeList=(null)
   NodeList=compute-0-0
   BatchHost=compute-0-0
   NumNodes=1 NumCPUs=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
   TRES=cpu=1
   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
   MinCPUsNode=1 MinMemoryNode=32000M MinTmpDiskNode=0
   Features=(null) Gres=gpu:1 Reservation=(null)
   Shared=1 Contiguous=0 Licenses=(null) Network=(null)
   Command=/home/jorgeq/gradient_study/site04_75Hz/run_CUDA_poly_s04
   WorkDir=/home/jorgeq/gradient_study/site04_75Hz
   StdErr=/home/jorgeq/gradient_study/site04_75Hz/slurm-80.out
   StdIn=/dev/null
   StdOut=/home/jorgeq/gradient_study/site04_75Hz/slurm-80.out
   Power= SICP=0

$ scontrol show node
NodeName=compute-0-0 Arch=x86_64 CoresPerSocket=10
   CPUAlloc=1 CPUErr=0 CPUTot=20 CPULoad=2.00 Features=rack-0,20CPUs,’2GPUs’
   Gres=gpu:2
   NodeAddr=10.1.255.253 NodeHostName=compute-0-0 Version=14.11
   OS=Linux RealMemory=32000 AllocMem=32000 FreeMem=0 Sockets=2 Boards=1
   State=MIXED ThreadsPerCore=1 TmpDisk=91874 Weight=20500100 Owner=N/A
   BootTime=2015-09-07T11:55:26 SlurmdStartTime=2015-09-10T17:03:55
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

$ scontrol show partition
PartitionName=CLUSTER
   AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
   AllocNodes=kispiox Default=YES QoS=N/A
   DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
   MaxNodes=UNLIMITED MaxTime=UNLIMITED MinNodes=1 LLN=NO 
MaxCPUsPerNode=UNLIMITED
   Nodes=compute-0-[0-7,9]
   Priority=1 RootOnly=NO ReqResv=NO Shared=FORCE:4 PreemptMode=OFF
   State=UP TotalCPUs=184 TotalNodes=9 SelectTypeParameters=N/A
   DefMemPerNode=UNLIMITED MaxMemPerNode=UNLIMITED

$ cat slurm.conf
SlurmUser=root
SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
CryptoType=crypto/munge
StateSaveLocation=/var/spool/slurm.state
SlurmdSpoolDir=/var/spool/slurmd
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/linuxproc
PluginDir=/usr/lib64/slurm
CacheGroups=0
JobCheckpointDir=/var/spool/slurm.checkpoint
#SallocDefaultCommand = "xterm"
GresTypes=gpu
#FirstJobId=
ReturnToService=2
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/affinity
TrackWCKey=yes
TopologyPlugin=topology/none
#TreeWidth=50
TmpFs=/state/partition1
#UsePAM=
SlurmctldTimeout=300
SlurmdTimeout=40
InactiveLimit=30
MinJobAge=300
KillWait=30
WaitTime=30
SelectType=select/cons_res
SelectTypeParameters=CR_Core_Memory
#DefMemPerCPU=220
#MaxMemPerCPU=300
VSizeFactor=90
FastSchedule=0
PriorityType=priority/multifactor
PriorityDecayHalfLife=14-0
PriorityWeightFairshare=10000
PriorityWeightAge=1000
PriorityWeightPartition=10000
PriorityWeightJobSize=1000
PriorityMaxAge=1-0
PriorityWeightQOS=10000
SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=3
SlurmdLogFile=/var/log/slurm/slurmd.log
JobCompType=jobcomp/none
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30
NodeName=DEFAULT State=UNKNOWN
NodeName=kispiox NodeAddr=10.1.1.1
PartitionName=DEFAULT AllocNodes=kispiox State=UP
PartitionName=DEBUG
################ Do not edit below 
#############################################################
include /etc/slurm/headnode.conf
include /etc/slurm/nodenames.conf
include /etc/slurm/partitions.conf

[slurm-dev] scheduling multiple jobs on nodes with multiple GPUs

Reply via email to