-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #422 from pneerincx/feature/slurm_qos_interactive
Feature: Slurm QoQ interactive
- Loading branch information
Showing
7 changed files
with
150 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,46 @@ | ||
#!/bin/bash | ||
|
||
# | ||
# Make sure we are successful in making tmp dirs in /local. | ||
# When this failed the job should not continue as SLURM will default to /tmp, | ||
# which is not suitable for heavy random IO nor large data sets. | ||
# Hammering /tmp may effectively result in the node going down. | ||
# When the prolog fails the node will be set to state=DRAIN instead. | ||
# | ||
|
||
if [ -z "${SLURM_JOB_ID}" ]; then | ||
if [[ -z "${SLURM_JOB_ID}" ]]; then | ||
logger -s "FATAL: SLURM_JOB_ID is empty or unset in SLURM prolog." | ||
exit 1 | ||
elif [[ -z "${SLURM_JOB_QOS}" ]]; then | ||
logger -s "FATAL: SLURM_JOB_QOS is empty or unset in SLURM prolog." | ||
exit 1 | ||
#else | ||
# logger -s "DEBUG: Found SLURM_JOB_ID ${SLURM_JOB_ID} in SLURM prolog." | ||
# logger -s "DEBUG: Found SLURM_JOB_ID ${SLURM_JOB_ID} and SLURM_JOB_QOS ${SLURM_JOB_QOS} in SLURM prolog." | ||
fi | ||
|
||
set -e | ||
set -u | ||
|
||
LOCAL_SCRATCH_DIR='/local' | ||
# | ||
# Check if local scratch dir is mountpoint and hence not a dir on the system disk. | ||
# | ||
LOCAL_SCRATCH_DIR='/local' | ||
if [ $(stat -c '%d' "${LOCAL_SCRATCH_DIR}") -eq $(stat -c '%d' "${LOCAL_SCRATCH_DIR}/..") ]; then | ||
logger -s "FATAL: local scratch disk (${LOCAL_SCRATCH_DIR}) is not mounted." | ||
exit 1 | ||
#else | ||
# logger -s "DEBUG: local scratch disk (${LOCAL_SCRATCH_DIR}) is mounted." | ||
if [[ $(stat -c '%d' "${LOCAL_SCRATCH_DIR}") -eq $(stat -c '%d' "${LOCAL_SCRATCH_DIR}/..") ]]; then | ||
if [[ "${SLURM_JOB_QOS}" =~ ^ds.* ]]; then | ||
# | ||
# For the data staging QoS "ds", which executes jobs only on the UI, | ||
# a dedicated tmp dir per job may be absent as not all UIs have a /local mount. | ||
# | ||
logger -s "WARN: local scratch disk (${LOCAL_SCRATCH_DIR}) is not mounted." | ||
else | ||
# | ||
# Make sure we can create tmp dirs in /local on compute nodes. | ||
# When this fails the job must not continue as SLURM will default to /tmp, | ||
# which is not suitable for heavy random IO nor large data sets. | ||
# Hammering /tmp may effectively result in the node going down. | ||
# When the prolog fails the node will be set to state=DRAIN instead. | ||
# | ||
logger -s "FATAL: local scratch disk (${LOCAL_SCRATCH_DIR}) is not mounted." | ||
exit 1 | ||
fi | ||
else | ||
# | ||
# Create dedicated tmp dir for this job. | ||
# | ||
TMPDIR="${LOCAL_SCRATCH_DIR}/${SLURM_JOB_ID}/" | ||
#logger -s "DEBUG: local scratch disk (${LOCAL_SCRATCH_DIR}) is mounted. Trying to create ${TMPDIR} ..." | ||
mkdir -m 700 -p "${TMPDIR}" || logger -s "FATAL: failed to create ${TMPDIR}." | ||
chown "${SLURM_JOB_USER}" "${TMPDIR}" || logger -s "FATAL: failed to chown ${TMPDIR}." | ||
fi | ||
|
||
TMPDIR="${LOCAL_SCRATCH_DIR}/${SLURM_JOB_ID}/" | ||
mkdir -m 700 -p "${TMPDIR}" || logger -s "FATAL: failed to create ${TMPDIR}." | ||
chown "${SLURM_JOB_USER}" "${TMPDIR}" || logger -s "FATAL: failed to chown ${TMPDIR}." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,44 @@ | ||
#!/bin/bash | ||
|
||
# | ||
# Make sure we have a tmp dir in /local. | ||
# When this failed the job should not continue as SLURM will default to /tmp, | ||
# which is not suitable for heavy random IO nor large data sets. | ||
# Hammering /tmp may effectively result in the node going down. | ||
# When the prolog fails the node will be set to state=DRAIN instead. | ||
# | ||
|
||
if [ -z "${SLURM_JOB_ID}" ]; then | ||
logger -s "FATAL: SLURM_JOB_ID is empty or unset in SLURM task prolog." | ||
exit 1 | ||
elif [[ -z "${SLURM_JOB_QOS}" ]]; then | ||
logger -s "FATAL: SLURM_JOB_QOS is empty or unset in SLURM task prolog." | ||
exit 1 | ||
fi | ||
|
||
set -e | ||
set -u | ||
|
||
# | ||
# Make sure we have a tmp dir in /local on compute nodes. | ||
# When this failed the job should not continue as SLURM will default to /tmp, | ||
# which is not suitable for heavy random IO nor large data sets. | ||
# Hammering /tmp may effectively result in the node going down. | ||
# When the prolog fails the node will be set to state=DRAIN instead. | ||
# | ||
# For the data staging QoS "ds", which executes jobs only on the UI, | ||
# a dedicated tmp dir per job may be absent as not all UIs have a /local mount. | ||
# | ||
TMPDIR="/local/${SLURM_JOB_ID}/" | ||
|
||
if [ ! -d "${TMPDIR}" ]; then | ||
if [[ ! -d "${TMPDIR}" ]] && [[ ! "${SLURM_JOB_QOS}" =~ ^ds.* ]]; then | ||
logger -s "FATAL: TMPDIR ${TMPDIR} is not available in SLURM task prolog." | ||
exit 1 | ||
else | ||
# | ||
# STDOUT from this task prolog is used to initialize the job task's env, | ||
# so we need to print the export statements to STDOUT. | ||
# | ||
echo "export TMPDIR=${TMPDIR}" | ||
fi | ||
|
||
# | ||
# STDOUT from this task prolog is used to initialize the job task's env, | ||
# so we need to print the export statements to STDOUT. | ||
# Set TMOUT to configure automagic logout from interactive sessions | ||
# after 30 minutes of inactivity. | ||
# | ||
echo "export TMPDIR=${TMPDIR}" | ||
if [[ "${SLURM_JOB_QOS}" =~ ^interactive.* ]]; then | ||
echo "TMOUT=1800" | ||
echo "readonly TMOUT" | ||
echo "export TMOUT" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.