On Thursday, 22 November 2018 9:26:13 PM AEDT Christoph Brüning wrote:

> Hi Chris,

Hi Christoph!

[...]
> I was wondering if constantly making and deleting XFS projects has a
> considerable impact on performance and stability. So I'd be glad if you
> could share some of your experience with that setup.

It's been pretty transparent to the users, the local disks on the nodes are 
only used for local scratch (the root filesystem is mounted from Lustre with 
some neat hacks to OneSIS and the kernel from our Lustre guru) so there's very 
little competition for the SSDs.

> Also, would you mind providing access to your prolog and epilog scripts?

Attached!

All the best,
Chris
-- 
 Chris Samuel  :  http://www.csamuel.org/  :  Melbourne, VIC
#!/bin/bash

if [ "${SLURM_RESTART_COUNT}" == "" ]; then
       SLURM_RESTART_COUNT=0
fi

JOBSCRATCH=/jobfs/local/slurm/${SLURM_JOB_ID}.${SLURM_RESTART_COUNT}

# Create a temporary directory and set an XFS quota on it to match the 
requested --tmp (or 100MB if not set)
if [ -d ${JOBSCRATCH} ]; then
        exec > >(tee "/tmp/quota.log") 2>&1
        set -x
        QUOTA=$(/apps/slurm/latest/bin/scontrol show JobId=${SLURM_JOB_ID} | 
egrep MinTmpDiskNode=[0-9] | awk -F= '{print $NF}')
        if [ "${QUOTA}" == "0" ]; then
                QUOTA=100M
        fi
        /usr/sbin/xfs_quota -x -c "project -s -p ${JOBSCRATCH} ${SLURM_JOB_ID}" 
/jobfs/local
        /usr/sbin/xfs_quota -x -c "limit -p bhard=${QUOTA} ${SLURM_JOB_ID}" 
/jobfs/local

        # Set up a directory to be used as ${JOBFS}
        /bin/mkdir ${JOBSCRATCH}/var_tmp/jobfs
        /bin/chown --reference=${JOBSCRATCH}/var_tmp/ 
${JOBSCRATCH}/var_tmp/jobfs -v
        set +x
else
        echo "$(date): TMPDIR ${JOBSCRATCH} not there" >> 
/jobfs/local/slurm/slurmdprologfail.txt
fi


exit 0
#!/bin/bash
#
# Remove job's scratch directory

if [ "${SLURM_RESTART_COUNT}" == "" ]; then
       SLURM_RESTART_COUNT=0
fi

JOBSCRATCH=/jobfs/local/slurm/${SLURM_JOB_ID}.${SLURM_RESTART_COUNT}
SHMSCRATCH=/dev/shm/slurm/${SLURM_JOB_ID}.${SLURM_RESTART_COUNT}

# Delete the scratch directory for the job (as long as it exists)
test -d ${JOBSCRATCH} && rm -rf ${JOBSCRATCH}
test -d ${SHMSCRATCH} && rm -rf ${SHMSCRATCH}

# Exit OK here to prevent the node getting marked down.

exit 0

Reply via email to