-
Notifications
You must be signed in to change notification settings - Fork 35
/
condor-runExperiments.submit
49 lines (34 loc) · 1.29 KB
/
condor-runExperiments.submit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
##################################
####### Run the Algorithm ##########
##################################
# Condor Universe
universe = vanilla
# Script to run
executable = condor-runExperiments.sh
# Script parameters
arguments = $(directory) $(Process)
# In our system, all the machines share the experiments directories by NFS
should_transfer_files = no
# Requirements to match tasks with machines
requirements = ((Arch == "X86_64" && OpSys == "Linux") || (Arch == "INTEL" && OpSys == "Linux")) && (Memory >= 400)
# Rank machines by available memory
Rank = Memory
# log file
# DAG makes that there will be an unique log file!
log = ./$(directory)/condor-runExperiments.log
# console output file
output = ./$(directory)/condor-runExperiments-$(Process).out
# error file
error = ./$(directory)/condor-runExperiments-$(Process).err
# Error notification email
notification = Error
notify_user = [email protected]
priority = 0
# Get all environment variables
getenv = True
# Send the job to Held state on failure.
on_exit_hold = (ExitBySignal == True) || (ExitCode != 0)
# Periodically retry the jobs every 10 seconds, up to a maximum of 25 retries.
periodic_release = (NumJobStarts < 25) && ((CurrentTime - EnteredCurrentStatus) > 10)
# Queue executions considering variable $(Process)
queue $(numRuns)