-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqscript.template
More file actions
205 lines (204 loc) · 10.4 KB
/
qscript.template
File metadata and controls
205 lines (204 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/bin/bash
#----------------------------------------------------------------------------
# Q U E U E S Y S T E M D I R E C T I V E S
#----------------------------------------------------------------------------
#PBS -N %jobtype%.%scenario%
#PBS -l walltime=%walltime%
#PBS -l nodes=%nnodes%:ppn=%ppn%
#PBS -q %queuename%
#PBS -A %account%
#PBS -o %advisdir%/%scenario%/%jobtype%.out
#PBS -V
#PBS -j oe
#PBS -m a
#PBS -M %notifyuser%
#SBATCH --job-name="%jobtype%.%scenario%"
#SBATCH --time=%walltime%
#SBATCH --ntasks-per-node=%ppn%
#SBATCH --ntasks=%totalcpu%
#SBATCH --nodes=%nnodes%
#SBATCH --partition=%queuename%
#SBATCH --reservation=%reservation%
#SBATCH --constraint=%constraint%
#SBATCH --account=%account%
#SBATCH --qos=%qos%
#SBATCH --output=%advisdir%/%scenario%/%jobtype%.out
#SBATCH --mail-type=FAIL,TIME_LIMIT
#SBATCH --mail-user=%notifyuser%
echo "------------------------------------------------------------------------"
#
#----------------------------------------------------------------------------
# I N I T I A L I Z E D I R E C T O R Y A N D F I L E N A M E S
#----------------------------------------------------------------------------
THIS=%jobtype%.%queuesyslc% # name of this script for use in log messages
SCRIPTDIR=%scriptdir%
SYSLOG=%syslog%
CYCLEDIR=%advisdir%
CYCLE=$(basename %advisdir%)
CYCLELOG=$CYCLEDIR/cycle.log
SCENARIO=%scenario%
SCENARIODIR=$CYCLEDIR/$SCENARIO
SCENARIOLOG=$SCENARIODIR/scenario.log
#
cd $SCENARIODIR 2> >(awk -v this=$THIS -v level=ERROR -f $SCRIPTDIR/monitoring/timestamp.awk | tee -a $SYSLOG | tee -a $CYCLELOG | tee -a $SCENARIOLOG )
#
#----------------------------------------------------------------------------
# L O G M E S S A G E S T O S T A R T T H E J O B
#----------------------------------------------------------------------------
echo "Starting in $SCENARIODIR with %queuesys% Job ID ${%JOBID%}; %queuesys% submit directory ${%JOBDIR%}; and %queuesys% submit host ${%JOBHOST%}." 2>&1 | awk -v this=$THIS -v level=INFO -f $SCRIPTDIR/monitoring/timestamp.awk | tee --append $SCENARIOLOG | tee --append $CYCLELOG | tee --append $SYSLOG
# record which cluster nodes we have to scenario.log
echo "INFO: $THIS: %JOBNODES%: $%JOBNODES%" #QUEUEONLY
echo "INFO: $THIS: hostname: "$(hostname)
echo "INFO: $THIS: PATH : $PATH"
echo "INFO: $THIS: LD_LIBRARY_PATH : $LD_LIBRARY_PATH"
#
#----------------------------------------------------------------------------
# W R I T E J O B P R O P E R T I E S
#----------------------------------------------------------------------------
#
# add a serial number to the properties file for use in disambiguation,
# indexing, and searching results
jobtype="%jobtype%"
if [[ ${jobtype:0:4} != "prep" && $jobtype != "partmesh" ]]; then
echo "asgs.serialnumber : $(od -An -N2 -tx1 /dev/urandom | tr -d ' \n')-$RANDOM-$(date +%s%N)" >> run.properties
fi
# associate/record adcirc build information
if [[ -e "%adcircdir%/../adcirc.bin.buildinfo.json" && ! -e "adcirc.bin.buildinfo.json" ]]; then
cp "%adcircdir%/../adcirc.bin.buildinfo.json" . 2>> $SYSLOG
echo "adcirc.file.metadata.build : adcirc.bin.buildinfo.json" >> run.properties
fi
# properties specific to the hpc
echo "time.hpc.job.%jobtype%.start : $(date +'%Y-%h-%d-T%H:%M:%S%z')" >> $SCENARIO.run.properties
echo "hpc.job.%jobtype%.jobid : ${%JOBID%}" >> $SCENARIO.run.properties
echo "hpc.job.%jobtype%.nodelist : ( %JOBNODES% )" >> $SCENARIO.run.properties #QUEUEONLY
echo "hpc.job.%jobtype%.hostname : $HOSTNAME" >> $SCENARIO.run.properties #QUEUEONLY
echo "hpc.job.%jobtype%.qnnodes : $%JOBNNODES%" >> $SCENARIO.run.properties #QUEUEONLY
echo "hpc.job.%jobtype%.qntasks-per-node : $%JOBNTASKSPERNODE%" >> $SCENARIO.run.properties #QUEUEONLY
echo "hpc.job.%jobtype%.qntasks : $%JOBNTASKS%" >> $SCENARIO.run.properties #QUEUEONLY
echo "hpc.job.%jobtype%.joblog : %advisdir%/%scenario%/%jobtype%.out" >> $SCENARIO.run.properties #QUEUEONLY
#
#----------------------------------------------------------------------------
# L A Y E R S P E C I F I C A T I O N
#----------------------------------------------------------------------------
#
createWind10mLayer="%wind10mlayer%"
declare -a layers=( $SCENARIO )
if [[ $createWind10mLayer == "yes" && $jobtype != "partmesh" && $jobtype != "prep20" && $jobtype != "prep13" ]]; then
layers=( wind10m ${layers[@]} )
fi
#
#----------------------------------------------------------------------------
# L O O P O V E R L A Y E R S
#----------------------------------------------------------------------------
#
for layer in ${layers[@]}; do
if [[ -e "fort.15" ]]; then
rm fort.15 2>> $SCENARIOLOG
fi
ln -s ${layer}.fort.15 fort.15 2>> $SCENARIOLOG
echo "Symbolically linking target '${layer}.fort.15' to name 'fort.15'."
# if this is not a prep job, untar the corresponding fort.15 files
# and link to the proper meteorological output files
if [[ ${jobtype:0:4} != "prep" && $jobtype != "partmesh" ]]; then
# unpack subdomain fort.15 files for use in generating the layer
tar xvf ${layer}.fort.15.tar 2>> $SCENARIOLOG 2>&1
# link to empty meteorological output files created in prep phase
for layerFile in $(ls ${layer}.fort.7* ${layer}.maxwvel* ${layer}.minpr*); do
if [[ -e ${layerFile#$layer.} ]]; then
rm ${layerFile#$layer.} 2>> $SCENARIOLOG
fi
ln -s $layerFile ${layerFile#$layer.} 2>> $SCENARIOLOG
echo "Symbolically linking target '$layerFile' to name '${layerFile#$layer.}'."
done
fi
#
# E X E C U T E T H E J O B
#
CMD="%cmd%"
if [[ $layer == "wind10m" || $SCENARIO == *"Wind10m" ]]; then
# the met only mode should not be run with the SWAN wave coupled executable
CMD=$(echo "$CMD" | sed 's?adcswan ?adcirc ?')
fi
echo "cycle $CYCLE: $SCENARIO: $jobtype.$layer job ${%JOBID%} starting in $SCENARIODIR with the following command: $CMD" 2>&1 | awk -v level=INFO -v this=$THIS -f $SCRIPTDIR/monitoring/timestamp.awk | tee --append $SCENARIOLOG | tee --append $CYCLELOG | tee --append $SYSLOG
echo "\"start\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\", \"jobid\" : \"${%JOBID%}\"" > $jobtype.${layer}.run.start # <-OVERWRITE
echo "\"jobtype\" : \"$jobtype.$layer\", \"submit\" : null, \"jobid\" : \"${%JOBID%}\", \"start\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\", \"finish\" : null, \"error\" : null" >> jobs.status
$CMD
ERROVALUE=$? # capture exit status
#
# C H E C K S T A T U S A N D Q U A L I T Y O F R E S U L T S
#
ERROMSG=""
RUNSUFFIX="finish"
qualityCheck=0
if [ $ERROVALUE -eq 0 ] ; then
if [[ ${jobtype:0:4} != "prep" && $jobtype != "partmesh" ]]; then
# look for numerical instability errors in the stdout/stderr files
for file in adcirc.log $SCENARIOLOG ; do
if [ -e $file ]; then
numMsg=$(grep WarnElev $file | wc -l)
if [ $numMsg = 0 ]; then
echo "cycle $CYCLE: $SCENARIO: No numerical instability detected in '$file' after completion of '$jobtype.$layer' job '${%JOBID%}'." 2>&1 | awk -v level=INFO -v this=$THIS -f $SCRIPTDIR/monitoring/timestamp.awk | tee --append $SCENARIOLOG
else
ERROVALUE=1
ERROMSG="$ERROMSG Detected '$numMsg' numerical instability messages in '$file' after completion of '$jobtype.$layer' job '${%JOBID%}'. "
RUNSUFFIX="error"
fi
fi
done
fi
else
ERROMSG="$ERROMSG The $jobtype.$layer job ended with an exit status that indicates an error occurred. "
RUNSUFFIX="error"
fi
#
# archive the subdomain fort.16 log files and adcirc.log file
if [[ ${jobtype:0:4} != "prep" && $jobtype != "partmesh" ]]; then
tar czf ${layer}.fort.16.tar.gz ./PE*/fort.16 1> tar.log 2>> $SCENARIOLOG
cp adcirc.log ${layer}.adcirc.log 2>> $SCENARIOLOG
fi
#
echo "cycle $CYCLE: $SCENARIO: $jobtype.$layer job ID '${%JOBID%}' finished in $SCENARIODIR with return value = $ERROVALUE" 2>&1 | awk -v level=INFO -v this=$THIS -f $SCRIPTDIR/monitoring/timestamp.awk | tee --append $SCENARIOLOG | tee --append $CYCLELOG | tee --append $SYSLOG
#
# R E P O R T J O B S T A T U S
#
if [[ $ERROVALUE -ne 0 || $qualityCheck -ne 0 ]]; then
echo "cycle $CYCLE: $SCENARIO: $THIS: $ERROMSG" 2>&1 | awk -v this=$THIS -v level=ERROR -f $SCRIPTDIR/monitoring/timestamp.awk | tee --append $SCENARIOLOG | tee --append $CYCLELOG | tee --append $SYSLOG
echo "\"$RUNSUFFIX\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\", \"message\" : \"$ERROMSG\"" > $jobtype.${layer}.run.${RUNSUFFIX} # <-OVERWRITE
echo "\"jobtype\" : \"$jobtype.$layer\", \"submit\" : null, \"jobid\" : \"${%JOBID%}\", \"start\" : null, \"finish\" : null, \"error\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\"" >> jobs.status
else
echo "\"jobtype\" : \"$jobtype.$layer\", \"submit\" : null, \"jobid\" : \"${%JOBID%}\", \"start\" : null, \"finish\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\", \"error\" : null" >> jobs.status
echo "\"$RUNSUFFIX\" : \"$(date +'%Y-%h-%d-T%H:%M:%S%z')\"" > $jobtype.${layer}.run.$RUNSUFFIX
fi
echo "time.hpc.job.$jobtype.$layer.${RUNSUFFIX} : $(date +'%Y-%h-%d-T%H:%M:%S%z')" >> $SCENARIO.run.properties
echo "-----------------------------------------------------------------------"
#
# if this is a prepall or prep15 job, tar up the subdomain fort.15 files
# and copy the meteorological output files to their own filenames
#
if [[ $jobtype == "prepall" || $jobtype == "prep15" ]]; then
tar cvf ${layer}.fort.15.tar PE*/fort.15 2>>$SYSLOG 2>&1
rm PE*/fort.15 2>> $SYSLOG
for file in $(ls fort.7* maxwvel* minpr*); do
mv $file ${layer}.$file
done
fi
done
#
# finalize by setting the symbolic links for the fort.15 file and
# the meteorology output files to the full output
# (i.e., those that include wind reduction)
if [[ -e "fort.15" ]]; then
rm fort.15 2>> $SCENARIOLOG
fi
ln -s $SCENARIO.fort.15 fort.15 2>> $SCENARIOLOG
echo "Symbolically linking target '$SCENARIO.fort.15' to name 'fort.15'."
#
for file in $(ls fort.7* maxwvel* minpr*); do
if [[ -e $file ]]; then
rm $file 2>> $SCENARIOLOG
fi
done
for file in $(ls ${SCENARIO}.fort.7* ${SCENARIO}.maxwvel* ${SCENARIO}.minpr*); do
ln -s $file ${file#$SCENARIO.} 2>> $SCENARIOLOG
echo "Symbolically linking target '$file' to name '${file#$SCENARIO.}'."
done