Skip to content

Commit

Permalink
Added in a WILL_FAIL status for workflows that will fail. (#85)
Browse files Browse the repository at this point in the history
* Now will add a WILL_FAIL status to the status output when a workflow is running but doomed.
* Added new colors to colorized list output.
* Fixes #78
* Removed some extra turtles.
* Added in resources and testing wdl/json files.
  • Loading branch information
jonn-smith authored and lbergelson committed Jul 31, 2019
1 parent 7d466ea commit a400bd5
Show file tree
Hide file tree
Showing 6 changed files with 316 additions and 5 deletions.
51 changes: 46 additions & 5 deletions cromshell
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@ ISINTERACTIVESHELL=true
shopt -s expand_aliases

################################################################################

COLOR_NORM='\033[0m'
COLOR_UNDERLINED='\033[1;4m'
COLOR_FAILED='\033[1;37;41m'
COLOR_WILL_FAIL='\033[1;31;47m'
COLOR_SUCCEEDED='\033[1;30;42m'
COLOR_RUNNING='\033[0;30;46m'
COLOR_ABORTED='\033[0;30;43m'

TASK_COLOR_RUNNING='\033[0;34m'
TASK_COLOR_SUCCEEDED='\033[0;32m'
TASK_COLOR_FAILING='\033[0;33m'
Expand Down Expand Up @@ -732,18 +736,45 @@ function status()
r=$?
[[ $r -eq 0 ]] && retVal=1

# Hold our status string here:
local workflowStatus=$( cat $f | jq -r .status )

if [[ $retVal -eq 1 ]]; then
turtleDead
elif [[ "${workflowStatus}" == "Running" ]] ; then
# OK, status claims this workflow is running fine, but we need to check to see
# if there are any failed sub-processes.
# To do this, we use the `execution-status-count` logic with some filtering:
local tmpExecutionStatusCount=$( makeTemp )
local tmpMetadata=$( makeTemp )

# Get execution status count and filter the metadata down:
curl --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIMEOUT" --compressed -s "${2}/api/workflows/v1/${1}/metadata?$CROMWELL_SLIM_METADATA_PARAMETERS" > ${tmpMetadata}
cat ${tmpMetadata} | jq '.calls | map_values(group_by(.executionStatus) | map({(.[0].executionStatus): . | length}) | add)' > ${tmpExecutionStatusCount}

# Check for failure states:
cat ${tmpMetadata} | jq --exit-status '[ ..|.executionStatus? | values | . == "Failed" ] | any' > /dev/null
r=$?

# Check for failures:
if [[ $r -ne 0 ]] ; then
# We could not find 'Fail' in our metadata, so our original status is correct.
turtle
else
turtleDead
workflowStatus="WILL_FAIL"
f=${tmpExecutionStatusCount}
fi
else
turtle
fi

# Display status to user:
cat $f | jq .
checkPipeStatus "Could not read tmp file JSON data." "Could not parse JSON output from cromwell server."

# Update ${CROMWELL_SUBMISSIONS_FILE}:
local st=$( cat $f | jq . | grep status | sed -e 's#.*: ##g' | tr -d '",' )
sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${st}#g" ${CROMWELL_SUBMISSIONS_FILE}
sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${workflowStatus}#g" ${CROMWELL_SUBMISSIONS_FILE}

return $retVal
}
Expand Down Expand Up @@ -1025,11 +1056,21 @@ function list()
r=$?
[ $r -eq 0 ] && echo -e "${COLOR_UNDERLINED}${line}${COLOR_NORM}" && continue

# Check for jobs that WILL FAIL and color those lines:
echo "${line}" | grep -q 'WILL_FAIL'
r=$?
[ $r -eq 0 ] && echo -e "${COLOR_WILL_FAIL}${line}${COLOR_NORM}" && continue

# Check for failed jobs and color those lines:
echo "${line}" | grep -q 'Failed'
r=$?
[ $r -eq 0 ] && echo -e "${COLOR_FAILED}${line}${COLOR_NORM}" && continue

# Check for Aborted jobs and color those lines:
echo "${line}" | grep -q 'Aborted'
r=$?
[ $r -eq 0 ] && echo -e "${COLOR_ABORTED}${line}${COLOR_NORM}" && continue

# Check for successful jobs and color those lines:
echo "${line}" | grep -q 'Succeeded'
r=$?
Expand Down Expand Up @@ -1168,7 +1209,7 @@ function list-outputs()
local id=$1
local cromwellServer=$2

local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )

local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"

Expand Down Expand Up @@ -1199,7 +1240,7 @@ function fetch-logs()
local id=$1
local cromwellServer=$2

local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null| grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )

local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"

Expand Down Expand Up @@ -1233,7 +1274,7 @@ function fetch-all()
local id=$1
local cromwellServer=$2

local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )

local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"

Expand Down
3 changes: 3 additions & 0 deletions resources/options.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"monitoring_script": "gs://broad-dsp-methods-resources/cromwell_monitoring_script.sh"
}
3 changes: 3 additions & 0 deletions testing/helloWorld.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"HelloWorld.docker": "frolvlad/alpine-bash"
}
94 changes: 94 additions & 0 deletions testing/helloWorld.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Hello World!
#
# Description of inputs:
#
# Required:
# String docker - Docker image in which to run
#
# Optional:
# Int mem - Amount of memory to give to the machine running each task in this workflow.
# Int preemptible_attempts - Number of times to allow each task in this workflow to be preempted.
# Int disk_space_gb - Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
# Int cpu - Number of CPU cores to give to each machine running each task in this workflow.
# Int boot_disk_size_gb - Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
#
workflow HelloWorld {
String docker

Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

call HelloWorldTask {
input:
docker = docker,
mem = mem,
preemptible_attempts = preemptible_attempts,
disk_space_gb = disk_space_gb,
cpu = cpu,
boot_disk_size_gb = boot_disk_size_gb
}

output {
}
}

task HelloWorldTask {

# ------------------------------------------------
# Input args:
# Required:
# Runtime Options:
String docker
Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

# ------------------------------------------------
# Process input args:
# ------------------------------------------------
# Get machine settings:
Boolean use_ssd = false

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3 * 1024
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

Int default_boot_disk_size_gb = 15

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
Int command_mem = machine_mem - 1024

# ------------------------------------------------
# Run our command:
command <<<
set -e
echo 'Hello World!'
>>>

# ------------------------------------------------
# Runtime settings:
runtime {
docker: docker
memory: machine_mem + " MB"
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
preemptible: 0
cpu: select_first([cpu, 1])
}

# ------------------------------------------------
# Outputs:
output {
}
}

3 changes: 3 additions & 0 deletions testing/will_fail.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"WillFailTester.docker": "frolvlad/alpine-bash"
}
167 changes: 167 additions & 0 deletions testing/will_fail.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Will fail tester
# results in a workflow that "will fail" after 5 miunutes.
# used to test the "WILL_FAIL" status.
#
# Description of inputs:
#
# Required:
# String docker - Docker image in which to run
#
# Optional:
# Int mem - Amount of memory to give to the machine running each task in this workflow.
# Int preemptible_attempts - Number of times to allow each task in this workflow to be preempted.
# Int disk_space_gb - Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
# Int cpu - Number of CPU cores to give to each machine running each task in this workflow.
# Int boot_disk_size_gb - Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
#
workflow WillFailTester {
String docker

Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

call FailFastTask {
input:
docker = docker,
mem = mem,
preemptible_attempts = preemptible_attempts,
disk_space_gb = disk_space_gb,
cpu = cpu,
boot_disk_size_gb = boot_disk_size_gb
}

call PassRunsLong {
input:
docker = docker,
mem = mem,
preemptible_attempts = preemptible_attempts,
disk_space_gb = disk_space_gb,
cpu = cpu,
boot_disk_size_gb = boot_disk_size_gb
}

output {
}
}

task FailFastTask {

# ------------------------------------------------
# Input args:
# Required:
# Runtime Options:
String docker
Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

# ------------------------------------------------
# Process input args:
# ------------------------------------------------
# Get machine settings:
Boolean use_ssd = false

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3 * 1024
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

Int default_boot_disk_size_gb = 15

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
Int command_mem = machine_mem - 1024

# ------------------------------------------------
# Run our command:
command <<<
set -e

# Nonsense here so we will fail fast:
aojewfajefaiefiapwghaghiogewi;gsaklagdhkashghhkl
>>>

# ------------------------------------------------
# Runtime settings:
runtime {
docker: docker
memory: machine_mem + " MB"
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
preemptible: 0
cpu: select_first([cpu, 1])
}

# ------------------------------------------------
# Outputs:
output {
}
}

task PassRunsLong {

# ------------------------------------------------
# Input args:
# Required:
# Runtime Options:
String docker
Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

# ------------------------------------------------
# Process input args:
# ------------------------------------------------
# Get machine settings:
Boolean use_ssd = false

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3 * 1024
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

Int default_boot_disk_size_gb = 15

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
Int command_mem = machine_mem - 1024

# ------------------------------------------------
# Run our command:
command <<<
set -e

# Wait for 5 minutes:
sleep 300
>>>

# ------------------------------------------------
# Runtime settings:
runtime {
docker: docker
memory: machine_mem + " MB"
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
preemptible: 0
cpu: select_first([cpu, 1])
}

# ------------------------------------------------
# Outputs:
output {
}
}

0 comments on commit a400bd5

Please sign in to comment.