Skip to content

Commit 3706d05

Browse files
committed
slurm script.
1 parent 5051f69 commit 3706d05

File tree

1 file changed

+254
-0
lines changed

1 file changed

+254
-0
lines changed
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
#! /bin/bash
2+
3+
#SBATCH --job-name=torchaudiomodel
4+
#SBATCH --output=/checkpoint/%u/jobs/audio-%A-%a.out
5+
#SBATCH --error=/checkpoint/%u/jobs/audio-%A-%a.err
6+
#SBATCH --signal=USR1@600
7+
#SBATCH --open-mode=append
8+
#SBATCH --time=1200
9+
#SBATCH --nodes=1
10+
#SBATCH --array=1-32
11+
# number of CPUs = 2x (number of data workers + number of GPUs requested)
12+
13+
>&2 echo $SLURM_JOB_ID
14+
15+
i=$SLURM_ARRAY_TASK_ID
16+
17+
COUNT=1
18+
19+
CMD="srun"
20+
CMD="$CMD python /private/home/vincentqb/audio-pytorch/examples/pipeline_wav2letter/main.py"
21+
# CMD="$CMD --distributed --world-size $SLURM_JOB_NUM_NODES --dist-url 'env://' --dist-backend='nccl'"
22+
# CMD="$CMD --distributed --world-size $SLURM_JOB_NUM_NODES"
23+
# CMD="$CMD --distributed --world-size 8"
24+
CMD="$CMD --print-freq 1 --reduce-lr-valid --dataset-root /datasets01/librispeech/ --dataset-folder-in-archive 062419"
25+
26+
choices=(0. 0.2)
27+
name="dropout"
28+
l=${#choices[@]}
29+
j=$(($i % $l))
30+
i=$(($i / $l))
31+
item=${choices[$j]}
32+
CMD="$CMD --$name $item"
33+
COUNT=$(($COUNT * $l))
34+
35+
# choices=("mel" "mfcc" "waveform")
36+
choices=("mfcc" "waveform")
37+
# choices=("waveform")
38+
name="model-input-type"
39+
l=${#choices[@]}
40+
j=$(($i % $l))
41+
i=$(($i / $l))
42+
item=${choices[$j]}
43+
CMD="$CMD --$name $item"
44+
COUNT=$(($COUNT * $l))
45+
46+
choices=("train-clean-100" "train-clean-100 train-clean-360 train-other-500")
47+
name="dataset-train"
48+
l=${#choices[@]}
49+
j=$(($i % $l))
50+
i=$(($i / $l))
51+
item=${choices[$j]}
52+
CMD="$CMD --$name $item"
53+
COUNT=$(($COUNT * $l))
54+
55+
choices=("dev-clean")
56+
name="dataset-valid"
57+
l=${#choices[@]}
58+
j=$(($i % $l))
59+
i=$(($i / $l))
60+
item=${choices[$j]}
61+
CMD="$CMD --$name $item"
62+
COUNT=$(($COUNT * $l))
63+
64+
choices=(128)
65+
name="batch-size"
66+
l=${#choices[@]}
67+
j=$(($i % $l))
68+
i=$(($i / $l))
69+
item=${choices[$j]}
70+
CMD="$CMD --$name $item"
71+
COUNT=$(($COUNT * $l))
72+
73+
choices=(.6)
74+
name="learning-rate"
75+
l=${#choices[@]}
76+
j=$(($i % $l))
77+
i=$(($i / $l))
78+
item=${choices[$j]}
79+
CMD="$CMD --$name $item"
80+
COUNT=$(($COUNT * $l))
81+
82+
# momentums=(0. .8)
83+
choices=(.8)
84+
name="momentum"
85+
l=${#choices[@]}
86+
j=$(($i % $l))
87+
i=$(($i / $l))
88+
item=${choices[$j]}
89+
CMD="$CMD --$name $item"
90+
COUNT=$(($COUNT * $l))
91+
92+
choices=(.00001)
93+
name="weight-decay"
94+
l=${#choices[@]}
95+
j=$(($i % $l))
96+
i=$(($i / $l))
97+
item=${choices[$j]}
98+
CMD="$CMD --$name $item"
99+
COUNT=$(($COUNT * $l))
100+
101+
# grads=(0. .2)
102+
choices=(0.)
103+
name="clip-grad"
104+
l=${#choices[@]}
105+
j=$(($i % $l))
106+
i=$(($i / $l))
107+
item=${choices[$j]}
108+
CMD="$CMD --$name $item"
109+
COUNT=$(($COUNT * $l))
110+
111+
# gammas=(.98 .99)
112+
choices=(.99)
113+
name="gamma"
114+
l=${#choices[@]}
115+
j=$(($i % $l))
116+
i=$(($i / $l))
117+
item=${choices[$j]}
118+
CMD="$CMD --$name $item"
119+
COUNT=$(($COUNT * $l))
120+
121+
# choices=(80 160)
122+
choices=(160)
123+
name="hop-length"
124+
l=${#choices[@]}
125+
j=$(($i % $l))
126+
i=$(($i / $l))
127+
item=${choices[$j]}
128+
CMD="$CMD --$name $item"
129+
COUNT=$(($COUNT * $l))
130+
131+
choices=(2000 1000)
132+
name="hidden-channels"
133+
l=${#choices[@]}
134+
j=$(($i % $l))
135+
i=$(($i / $l))
136+
item=${choices[$j]}
137+
CMD="$CMD --$name $item"
138+
COUNT=$(($COUNT * $l))
139+
140+
# choices=(512 400)
141+
choices=(400)
142+
name="win-length"
143+
l=${#choices[@]}
144+
j=$(($i % $l))
145+
i=$(($i / $l))
146+
item=${choices[$j]}
147+
CMD="$CMD --$name $item"
148+
COUNT=$(($COUNT * $l))
149+
150+
# nbinss=(13 128 40)
151+
choices=(13)
152+
name="bins"
153+
l=${#choices[@]}
154+
j=$(($i % $l))
155+
i=$(($i / $l))
156+
item=${choices[$j]}
157+
CMD="$CMD --$name $item"
158+
COUNT=$(($COUNT * $l))
159+
160+
choices=("--normalize")
161+
# choices=("")
162+
l=${#choices[@]}
163+
j=$(($i % $l))
164+
i=$(($i / $l))
165+
item=${choices[$j]}
166+
CMD="$CMD $item"
167+
COUNT=$(($COUNT * $l))
168+
169+
# choices=("" "--time-mask 70 --freq-mask 7" "--time-mask 35 --freq-mask 5")
170+
choices=("")
171+
l=${#choices[@]}
172+
j=$(($i % $l))
173+
i=$(($i / $l))
174+
item=${choices[$j]}
175+
CMD="$CMD $item"
176+
COUNT=$(($COUNT * $l))
177+
178+
# choices=(0 35)
179+
# # choices=(0)
180+
# name="time-mask"
181+
# l=${#choices[@]}
182+
# j=$(($i % $l))
183+
# i=$(($i / $l))
184+
# item=${choices[$j]}
185+
# CMD="$CMD --$name $item"
186+
# COUNT=$(($COUNT * $l))
187+
#
188+
# choices=(0 5)
189+
# # choices=(0)
190+
# name="freq-mask"
191+
# l=${#choices[@]}
192+
# j=$(($i % $l))
193+
# i=$(($i / $l))
194+
# item=${choices[$j]}
195+
# CMD="$CMD --$name $item"
196+
# COUNT=$(($COUNT * $l))
197+
198+
# choices=("sgd" "adadelta" "adamw")
199+
choices=("sgd" "adadelta")
200+
name="optimizer"
201+
l=${#choices[@]}
202+
j=$(($i % $l))
203+
i=$(($i / $l))
204+
item=${choices[$j]}
205+
CMD="$CMD --$name $item"
206+
COUNT=$(($COUNT * $l))
207+
208+
# schedulers=("exponential" "reduceonplateau")
209+
choices=("reduceonplateau")
210+
name="scheduler"
211+
l=${#choices[@]}
212+
j=$(($i % $l))
213+
i=$(($i / $l))
214+
item=${choices[$j]}
215+
CMD="$CMD --$name $item"
216+
COUNT=$(($COUNT * $l))
217+
218+
# decoders=("greedy" "greedyiter" "viterbi")
219+
choices=("greedy")
220+
name="decoder"
221+
l=${#choices[@]}
222+
j=$(($i % $l))
223+
i=$(($i / $l))
224+
item=${choices[$j]}
225+
CMD="$CMD --$name $item"
226+
COUNT=$(($COUNT * $l))
227+
228+
# decoders=("greedy" "greedyiter" "viterbi")
229+
choices=(1000)
230+
name="epochs"
231+
l=${#choices[@]}
232+
j=$(($i % $l))
233+
i=$(($i / $l))
234+
item=${choices[$j]}
235+
CMD="$CMD --$name $item"
236+
COUNT=$(($COUNT * $l))
237+
238+
if [[ "$SLURM_ARRAY_TASK_COUNT" -ne $COUNT ]]; then
239+
>&2 echo "SLURM_ARRAY_TASK_COUNT = $SLURM_ARRAY_TASK_COUNT is not equal to $COUNT"
240+
exit
241+
fi
242+
243+
# The ENV below are only used in distributed training with env:// initialization
244+
# export MASTER_ADDR=${SLURM_JOB_NODELIST:0:9}${SLURM_JOB_NODELIST:10:4}
245+
# export MASTER_PORT=29500
246+
247+
# export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
248+
249+
HASH=`echo "$CMD $SLURM_JOB_ID" | md5sum | awk '{print $1}'`
250+
251+
CMD="$CMD --checkpoint /checkpoint/vincentqb/checkpoint/checkpoint-$SLURM_JOB_ID-$HASH.pth.tar"
252+
253+
>&2 echo $CMD
254+
eval $CMD

0 commit comments

Comments
 (0)