|
| 1 | +#! /bin/bash |
| 2 | + |
| 3 | +#SBATCH --job-name=torchaudiomodel |
| 4 | +#SBATCH --output=/checkpoint/%u/jobs/audio-%A-%a.out |
| 5 | +#SBATCH --error=/checkpoint/%u/jobs/audio-%A-%a.err |
| 6 | +#SBATCH --signal=USR1@600 |
| 7 | +#SBATCH --open-mode=append |
| 8 | +#SBATCH --time=1200 |
| 9 | +#SBATCH --nodes=1 |
| 10 | +#SBATCH --array=1-32 |
| 11 | +# number of CPUs = 2x (number of data workers + number of GPUs requested) |
| 12 | + |
| 13 | +>&2 echo $SLURM_JOB_ID |
| 14 | + |
| 15 | +i=$SLURM_ARRAY_TASK_ID |
| 16 | + |
| 17 | +COUNT=1 |
| 18 | + |
| 19 | +CMD="srun" |
| 20 | +CMD="$CMD python /private/home/vincentqb/audio-pytorch/examples/pipeline_wav2letter/main.py" |
| 21 | +# CMD="$CMD --distributed --world-size $SLURM_JOB_NUM_NODES --dist-url 'env://' --dist-backend='nccl'" |
| 22 | +# CMD="$CMD --distributed --world-size $SLURM_JOB_NUM_NODES" |
| 23 | +# CMD="$CMD --distributed --world-size 8" |
| 24 | +CMD="$CMD --print-freq 1 --reduce-lr-valid --dataset-root /datasets01/librispeech/ --dataset-folder-in-archive 062419" |
| 25 | + |
| 26 | +choices=(0. 0.2) |
| 27 | +name="dropout" |
| 28 | +l=${#choices[@]} |
| 29 | +j=$(($i % $l)) |
| 30 | +i=$(($i / $l)) |
| 31 | +item=${choices[$j]} |
| 32 | +CMD="$CMD --$name $item" |
| 33 | +COUNT=$(($COUNT * $l)) |
| 34 | + |
| 35 | +# choices=("mel" "mfcc" "waveform") |
| 36 | +choices=("mfcc" "waveform") |
| 37 | +# choices=("waveform") |
| 38 | +name="model-input-type" |
| 39 | +l=${#choices[@]} |
| 40 | +j=$(($i % $l)) |
| 41 | +i=$(($i / $l)) |
| 42 | +item=${choices[$j]} |
| 43 | +CMD="$CMD --$name $item" |
| 44 | +COUNT=$(($COUNT * $l)) |
| 45 | + |
| 46 | +choices=("train-clean-100" "train-clean-100 train-clean-360 train-other-500") |
| 47 | +name="dataset-train" |
| 48 | +l=${#choices[@]} |
| 49 | +j=$(($i % $l)) |
| 50 | +i=$(($i / $l)) |
| 51 | +item=${choices[$j]} |
| 52 | +CMD="$CMD --$name $item" |
| 53 | +COUNT=$(($COUNT * $l)) |
| 54 | + |
| 55 | +choices=("dev-clean") |
| 56 | +name="dataset-valid" |
| 57 | +l=${#choices[@]} |
| 58 | +j=$(($i % $l)) |
| 59 | +i=$(($i / $l)) |
| 60 | +item=${choices[$j]} |
| 61 | +CMD="$CMD --$name $item" |
| 62 | +COUNT=$(($COUNT * $l)) |
| 63 | + |
| 64 | +choices=(128) |
| 65 | +name="batch-size" |
| 66 | +l=${#choices[@]} |
| 67 | +j=$(($i % $l)) |
| 68 | +i=$(($i / $l)) |
| 69 | +item=${choices[$j]} |
| 70 | +CMD="$CMD --$name $item" |
| 71 | +COUNT=$(($COUNT * $l)) |
| 72 | + |
| 73 | +choices=(.6) |
| 74 | +name="learning-rate" |
| 75 | +l=${#choices[@]} |
| 76 | +j=$(($i % $l)) |
| 77 | +i=$(($i / $l)) |
| 78 | +item=${choices[$j]} |
| 79 | +CMD="$CMD --$name $item" |
| 80 | +COUNT=$(($COUNT * $l)) |
| 81 | + |
| 82 | +# momentums=(0. .8) |
| 83 | +choices=(.8) |
| 84 | +name="momentum" |
| 85 | +l=${#choices[@]} |
| 86 | +j=$(($i % $l)) |
| 87 | +i=$(($i / $l)) |
| 88 | +item=${choices[$j]} |
| 89 | +CMD="$CMD --$name $item" |
| 90 | +COUNT=$(($COUNT * $l)) |
| 91 | + |
| 92 | +choices=(.00001) |
| 93 | +name="weight-decay" |
| 94 | +l=${#choices[@]} |
| 95 | +j=$(($i % $l)) |
| 96 | +i=$(($i / $l)) |
| 97 | +item=${choices[$j]} |
| 98 | +CMD="$CMD --$name $item" |
| 99 | +COUNT=$(($COUNT * $l)) |
| 100 | + |
| 101 | +# grads=(0. .2) |
| 102 | +choices=(0.) |
| 103 | +name="clip-grad" |
| 104 | +l=${#choices[@]} |
| 105 | +j=$(($i % $l)) |
| 106 | +i=$(($i / $l)) |
| 107 | +item=${choices[$j]} |
| 108 | +CMD="$CMD --$name $item" |
| 109 | +COUNT=$(($COUNT * $l)) |
| 110 | + |
| 111 | +# gammas=(.98 .99) |
| 112 | +choices=(.99) |
| 113 | +name="gamma" |
| 114 | +l=${#choices[@]} |
| 115 | +j=$(($i % $l)) |
| 116 | +i=$(($i / $l)) |
| 117 | +item=${choices[$j]} |
| 118 | +CMD="$CMD --$name $item" |
| 119 | +COUNT=$(($COUNT * $l)) |
| 120 | + |
| 121 | +# choices=(80 160) |
| 122 | +choices=(160) |
| 123 | +name="hop-length" |
| 124 | +l=${#choices[@]} |
| 125 | +j=$(($i % $l)) |
| 126 | +i=$(($i / $l)) |
| 127 | +item=${choices[$j]} |
| 128 | +CMD="$CMD --$name $item" |
| 129 | +COUNT=$(($COUNT * $l)) |
| 130 | + |
| 131 | +choices=(2000 1000) |
| 132 | +name="hidden-channels" |
| 133 | +l=${#choices[@]} |
| 134 | +j=$(($i % $l)) |
| 135 | +i=$(($i / $l)) |
| 136 | +item=${choices[$j]} |
| 137 | +CMD="$CMD --$name $item" |
| 138 | +COUNT=$(($COUNT * $l)) |
| 139 | + |
| 140 | +# choices=(512 400) |
| 141 | +choices=(400) |
| 142 | +name="win-length" |
| 143 | +l=${#choices[@]} |
| 144 | +j=$(($i % $l)) |
| 145 | +i=$(($i / $l)) |
| 146 | +item=${choices[$j]} |
| 147 | +CMD="$CMD --$name $item" |
| 148 | +COUNT=$(($COUNT * $l)) |
| 149 | + |
| 150 | +# nbinss=(13 128 40) |
| 151 | +choices=(13) |
| 152 | +name="bins" |
| 153 | +l=${#choices[@]} |
| 154 | +j=$(($i % $l)) |
| 155 | +i=$(($i / $l)) |
| 156 | +item=${choices[$j]} |
| 157 | +CMD="$CMD --$name $item" |
| 158 | +COUNT=$(($COUNT * $l)) |
| 159 | + |
| 160 | +choices=("--normalize") |
| 161 | +# choices=("") |
| 162 | +l=${#choices[@]} |
| 163 | +j=$(($i % $l)) |
| 164 | +i=$(($i / $l)) |
| 165 | +item=${choices[$j]} |
| 166 | +CMD="$CMD $item" |
| 167 | +COUNT=$(($COUNT * $l)) |
| 168 | + |
| 169 | +# choices=("" "--time-mask 70 --freq-mask 7" "--time-mask 35 --freq-mask 5") |
| 170 | +choices=("") |
| 171 | +l=${#choices[@]} |
| 172 | +j=$(($i % $l)) |
| 173 | +i=$(($i / $l)) |
| 174 | +item=${choices[$j]} |
| 175 | +CMD="$CMD $item" |
| 176 | +COUNT=$(($COUNT * $l)) |
| 177 | + |
| 178 | +# choices=(0 35) |
| 179 | +# # choices=(0) |
| 180 | +# name="time-mask" |
| 181 | +# l=${#choices[@]} |
| 182 | +# j=$(($i % $l)) |
| 183 | +# i=$(($i / $l)) |
| 184 | +# item=${choices[$j]} |
| 185 | +# CMD="$CMD --$name $item" |
| 186 | +# COUNT=$(($COUNT * $l)) |
| 187 | +# |
| 188 | +# choices=(0 5) |
| 189 | +# # choices=(0) |
| 190 | +# name="freq-mask" |
| 191 | +# l=${#choices[@]} |
| 192 | +# j=$(($i % $l)) |
| 193 | +# i=$(($i / $l)) |
| 194 | +# item=${choices[$j]} |
| 195 | +# CMD="$CMD --$name $item" |
| 196 | +# COUNT=$(($COUNT * $l)) |
| 197 | + |
| 198 | +# choices=("sgd" "adadelta" "adamw") |
| 199 | +choices=("sgd" "adadelta") |
| 200 | +name="optimizer" |
| 201 | +l=${#choices[@]} |
| 202 | +j=$(($i % $l)) |
| 203 | +i=$(($i / $l)) |
| 204 | +item=${choices[$j]} |
| 205 | +CMD="$CMD --$name $item" |
| 206 | +COUNT=$(($COUNT * $l)) |
| 207 | + |
| 208 | +# schedulers=("exponential" "reduceonplateau") |
| 209 | +choices=("reduceonplateau") |
| 210 | +name="scheduler" |
| 211 | +l=${#choices[@]} |
| 212 | +j=$(($i % $l)) |
| 213 | +i=$(($i / $l)) |
| 214 | +item=${choices[$j]} |
| 215 | +CMD="$CMD --$name $item" |
| 216 | +COUNT=$(($COUNT * $l)) |
| 217 | + |
| 218 | +# decoders=("greedy" "greedyiter" "viterbi") |
| 219 | +choices=("greedy") |
| 220 | +name="decoder" |
| 221 | +l=${#choices[@]} |
| 222 | +j=$(($i % $l)) |
| 223 | +i=$(($i / $l)) |
| 224 | +item=${choices[$j]} |
| 225 | +CMD="$CMD --$name $item" |
| 226 | +COUNT=$(($COUNT * $l)) |
| 227 | + |
| 228 | +# decoders=("greedy" "greedyiter" "viterbi") |
| 229 | +choices=(1000) |
| 230 | +name="epochs" |
| 231 | +l=${#choices[@]} |
| 232 | +j=$(($i % $l)) |
| 233 | +i=$(($i / $l)) |
| 234 | +item=${choices[$j]} |
| 235 | +CMD="$CMD --$name $item" |
| 236 | +COUNT=$(($COUNT * $l)) |
| 237 | + |
| 238 | +if [[ "$SLURM_ARRAY_TASK_COUNT" -ne $COUNT ]]; then |
| 239 | + >&2 echo "SLURM_ARRAY_TASK_COUNT = $SLURM_ARRAY_TASK_COUNT is not equal to $COUNT" |
| 240 | + exit |
| 241 | +fi |
| 242 | + |
| 243 | +# The ENV below are only used in distributed training with env:// initialization |
| 244 | +# export MASTER_ADDR=${SLURM_JOB_NODELIST:0:9}${SLURM_JOB_NODELIST:10:4} |
| 245 | +# export MASTER_PORT=29500 |
| 246 | + |
| 247 | +# export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' |
| 248 | + |
| 249 | +HASH=`echo "$CMD $SLURM_JOB_ID" | md5sum | awk '{print $1}'` |
| 250 | + |
| 251 | +CMD="$CMD --checkpoint /checkpoint/vincentqb/checkpoint/checkpoint-$SLURM_JOB_ID-$HASH.pth.tar" |
| 252 | + |
| 253 | +>&2 echo $CMD |
| 254 | +eval $CMD |
0 commit comments