Hello All,

OK, so yesterday we upgraded to 14.11.0. Everything went rather smoothly, except for one thing: Accounting of Raw Usage seems not to be working properly.

It works partly, yesterday it seemed to be working for all users, for the moment only the Raw Usage for one user is updated, while all others stay at zero (after resetting Raw Usage with sacctmgr).

It seems like an odd problem, as SLURM apparently does succeed in updating Raw Usage in principle, but not for all users. I cannot find anything special in the logs.

Are there perhaps any special settings one should take care of after upgrading so many versions? The config files (slurm.conf, slurmdb.conf) are still the same as for version 2.2.7, and I've attached them as .txt files to this.


All help appreciated. Cheers,
    Mikael J.
    http://www.iki.fi/~mpjohans/


# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
#
# See the slurm.conf man page for more information.
#
ClusterName=SLURM_CLUSTER
SlurmUser=slurm
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
StateSaveLocation=/cm/shared/apps/slurm/current/cm/statesave
SlurmdSpoolDir=/cm/local/apps/slurm/14.11.0/spool
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/pgid
CacheGroups=0
ReturnToService=2
PrologSlurmctld=/cm/local/apps/cmd/scripts/prolog

# TIMERS
SlurmctldTimeout=10
SlurmdTimeout=10
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0

# SCHEDULING
SchedulerParameters=default_queue_depth=800,partition_job_depth=200,bf_continue,bf_window=2880,bf_resolution=3600,bf_max_job_test=50000,bf_max_job_part=50000,bf_max_job_user=1,bf_max_job_start=200,max_rpc_cnt=8

FastSchedule=0
SelectType=select/cons_res
SelectTypeParameters=CR_CPU
DefMemPerCPU=0
MaxMemPerNode=64000
PriorityType=priority/multifactor
PriorityCalcPeriod=5
PriorityDecayHalfLife=2-00:00:00
#PriorityUsageResetPeriod=NOW
PriorityWeightFairshare=20000
PriorityWeightAge=500
PriorityMaxAge=4-00:00:00
PriorityWeightPartition=0000
PriorityWeightJobSize=0000
PriorityFavorSmall=YES
PriorityWeightQOS=0000
PreemptType=preempt/partition_prio
PreemptMode=requeue

# LOGGING
SlurmctldDebug=7
SlurmctldLogFile=/var/log/slurmctld
SlurmdDebug=7
SlurmdLogFile=/var/log/slurmd
JobCompType=jobcomp/none

# ACCOUNTING
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30

AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=localhost
AccountingStorageLoc=slurm_acct_db
#AccountingStoragePass=SLURMDBD_USERPASS
AccountingStorageUser=slurm
AccountingStorageEnforce=qos

# GENERIC RESOURCES
PartitionName=backfill Nodes=node[001-026] Default=NO  MaxNodes=10 
MaxTime=168:00:00 AllowGroups=ALL Priority=1 DisableRootJobs=NO RootOnly=NO 
Hidden=NO Shared=NO PreemptMode=requeue 
PartitionName=medium   Nodes=node[009-026] Default=NO  MaxNodes=4  
MaxTime=168:00:00 AllowGroups=ALL Priority=2 DisableRootJobs=NO RootOnly=NO 
Hidden=NO Shared=NO PreemptMode=off
PartitionName=long     Nodes=node[001-004] Default=NO  MaxNodes=1  
MaxTime=744:00:00 AllowGroups=ALL Priority=2 DisableRootJobs=NO RootOnly=NO 
Hidden=NO Shared=NO PreemptMode=off
PartitionName=short    Nodes=node[005-026] Default=YES MaxNodes=6  
MaxTime=002:00:00 AllowGroups=ALL Priority=2 DisableRootJobs=NO RootOnly=NO 
Hidden=NO Shared=NO PreemptMode=off

SchedulerType=sched/builtin

# Master nodes
ControlMachine=shark
ControlAddr=shark

# Nodes
# Lower weights means the nodes will preferentially be utilised
NodeName=node001 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=26 
state=UNKNOWN
NodeName=node002 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=25 
state=UNKNOWN
NodeName=node003 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=24 
state=UNKNOWN
NodeName=node004 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=23 
state=UNKNOWN
NodeName=node005 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=1  
state=UNKNOWN
NodeName=node006 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=2  
state=UNKNOWN
NodeName=node007 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=3  
state=UNKNOWN
NodeName=node008 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=4  
state=UNKNOWN
NodeName=node009 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=5  
state=UNKNOWN
NodeName=node010 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=6  
state=UNKNOWN
NodeName=node011 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=7  
state=UNKNOWN
NodeName=node012 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=8  
state=UNKNOWN
NodeName=node013 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=9  
state=UNKNOWN
NodeName=node014 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=10 
state=UNKNOWN
NodeName=node015 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=11 
state=UNKNOWN
NodeName=node016 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=12 
state=UNKNOWN
NodeName=node017 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=13 
state=UNKNOWN
NodeName=node018 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=14 
state=UNKNOWN
NodeName=node019 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=15 
state=UNKNOWN
NodeName=node020 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=16 
state=UNKNOWN
NodeName=node021 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=17 
state=UNKNOWN
NodeName=node022 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=18 
state=UNKNOWN
NodeName=node023 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=19 
state=UNKNOWN
NodeName=node024 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=20 
state=UNKNOWN
NodeName=node025 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=21 
state=UNKNOWN
NodeName=node026 Sockets=4 CoresPerSocket=6 ThreadsPerCore=2 Weight=22 
state=UNKNOWN

#
# slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
#
# Authentication info
AuthType=auth/munge
#
# slurmDBD info
DbdAddr=DBD_ADDR
SlurmUser=slurm
DebugLevel=7
LogFile=/var/log/slurmdbd
PidFile=/var/run/slurmdbd.pid

# Database info
StorageType=accounting_storage/mysql
StorageHost=localhost
StoragePass=xxxxxxxxxxxxxxxxxxxxx
StorageUser=slurm
StorageLoc=slurm_acct_db
# BEGIN AUTOGENERATED SECTION -- DO NOT REMOVE
DbdHost=shark
# END AUTOGENERATED SECTION   -- DO NOT REMOVE

Reply via email to