Hi,
I am trying to figure out how to make the following work within our
cluster using torque/maui:
I'd like a user to be able to submit as many jobs as they like.
However, they should only be allowed up to 32cpu or 32gb of memory.
After that if there are idle resources then the rest of their jobs
can be backfilled on idle nodes.
If another user submits jobs they should get the same policy and pre-
empt any backfilled jobs (if that's required to meet the 32cpu or
memory limit).
So basically, I think this should be fairly common. I want to run as
many jobs as possible on idle resources but only guarantee the jobs
that fall under the MAXPROC/MAXMEM policy. I've implemented the
MAXPROC/MAXMEM policy but it appears backfill won't work for the
remaining jobs. So I am assuming backfill has to abide by the MAXPROC/
MAXMEM policy I have in place. Can anyone give me some pointers to
the proper way to implement this? Thanks in advance!
-Steve
[root@ maui]# cat maui.cfg (edited for some content)
# maui.cfg 3.2.6p14
# Resource Manager Definition
RMCFG[JAKE] TYPE=PBS
RMPOLLINTERVAL 00:00:30
SERVERPORT 42559
SERVERMODE NORMAL
# Admin: http://clusterresources.com/mauidocs/a.esecurity.html
LOGDIR /var/log/maui
LOGFILE maui.log
LOGFILEMAXSIZE 100000000
#LOGLEVEL 3
LOGLEVEL 2
LOGFILEROLLDEPTH 5
STATDIR /var/log/maui/stats
SERVERHOMEDIR /usr/maui/
TOOLSDIR /usr/maui/tools/
LOGDIR /var/log/maui/
STATDIR /usr/maui/stats/
#LOCKFILE /usr/maui/maui.pid
SERVERCONFIGFILE /usr/maui/maui.cfg
CHECKPOINTFILE /var/log/maui/maui.ck
# Misc configs
ENABLEMULTINODEJOBS TRUE
JOBMAXOVERRUN 00:01:00
#SYSTEMDEFAULTJOBWALLTIME 1:00:00:00
USEMACHINESPEED ON
#PREEMPTPOLICY CHECKPOINT
PREEMPTPOLICY SUSPEND
CREDWEIGHT 1
CLASSWEIGHT 1
QOSWEIGHT 1
RESCTLPOLICY ANY
# Job Priority: http://clusterresources.com/mauidocs/
5.1jobprioritization.html
QUEUETIMEWEIGHT 1
# FairShare: http://clusterresources.com/mauidocs/6.3fairshare.html
FSPOLICY DEDICATEDPS
FSDEPTH 7
FSINTERVAL 86400
FSDECAY 0.80
# Throttling Policies: http://clusterresources.com/mauidocs/
6.2throttlingpolicies.html
# NONE SPECIFIED
# Backfill: http://clusterresources.com/mauidocs/8.2backfill.html
BACKFILLPOLICY BESTFIT
RESERVATIONPOLICY CURRENTHIGHEST
#RESERVATIONPOLICY NEVER
RESERVATIONDEPTH 50
RESDEPTH 32
# Node Allocation: http://clusterresources.com/mauidocs/
5.2nodeallocation.html
NODEACCESSPOLICY SHARED
#NODEALLOCATIONPOLICY MINRESOURCE
#NODEALLOCATIONPOLICY MAXBALANCE
NODEALLOCATIONPOLICY FASTEST
#NODEAVAILABILITYPOLICY UTILIZED
NODEAVAILABILITYPOLICY COMBINED
NODEMAXLOAD 1.0
NODELOADPOLICY ADJUSTSTATE
# QOS: http://clusterresources.com/mauidocs/7.3qos.html
QOSCFG[qm] PRIORITY=100 QFLAGS=PREEMPTEE
QOSCFG[md] PRIORITY=100 QFLAGS=PREEMPTEE
QOSCFG[faculty] PRIORITY=1000 QFLAGS=PREEMPTOR
QOSFEATURES[qm] hamilton g03
QOSFEATURES[md] hamilton
# Standing Reservations: http://clusterresources.com/mauidocs/
7.1.3standingreservations.html
# SRSTARTTIME[test] 8:00:00
# SRENDTIME[test] 17:00:00
# SRDAYS[test] MON TUE WED THU FRI
# SRTASKCOUNT[test] 20
# SRMAXTIME[test] 0:30:00
# Creds: http://clusterresources.com/mauidocs/6.1fairnessoverview.html
# USERCFG[DEFAULT] FSTARGET=25.0
# USERCFG[john] PRIORITY=100 FSTARGET=10.0-
# GROUPCFG[staff] PRIORITY=1000 QLIST=hi:low QDEF=hi
#
# Groups
#
GROUPCFG[faculty] PRIORITY=1000 QLIST=faculty QDEF=faculty
GROUPCFG[hamilton] PRIORITY=10
GROUPCFG[users] PRIORITY=10
#
# Classes (queue's)
#
#CLASSCFG[main] QLIST=md:qm
CLASSCFG[main] QLIST=md:qm:mercury MAXPROC=32,64
MAXMEM=32768,65536
CLASSCFG[hamilton] QLIST=md:qm
torque config
-------------------
[root@ maui]# qmgr
Max open servers: 4
Qmgr: print server
#
# Create queues and set their attributes.
#
#
# Create and define queue main
#
create queue main
set queue main queue_type = Execution
set queue main Priority = 100
set queue main resources_default.neednodes = main
set queue main resources_default.walltime = 24:00:00
set queue main enabled = True
set queue main started = True
#
# Create and define queue hamilton
#
create queue hamilton
set queue hamilton queue_type = Execution
set queue hamilton resources_default.neednodes = hamilton
set queue hamilton resources_default.walltime = 24:00:00
set queue hamilton enabled = True
set queue hamilton started = True
#
# Set server attributes.
#
set server scheduling = True
set server default_queue = main
set server log_events = 511
set server mail_from = adm
set server query_other_jobs = True
set server resources_default.ncpus = 1
set server resources_default.walltime = 24:00:00
set server scheduler_iteration = 60
set server node_check_rate = 150
set server tcp_timeout = 6
set server job_nanny = True
_______________________________________________
mauiusers mailing list
mauiusers@supercluster.org
http://www.supercluster.org/mailman/listinfo/mauiusers