change underscores to hyphens to prevent sphinx substitution reference interpretation

speediedan · speediedan · commit 5c6023b1a8e5 · 2022-06-14T15:12:10.000-07:00
diff --git a/lightning_examples/finetuning-scheduler/finetuning-scheduler.py b/lightning_examples/finetuning-scheduler/finetuning-scheduler.py
@@ -79,7 +79,7 @@
 #
 # 2. Alter the schedule as desired.
 #
-# ![side_by_side_yaml](side_by_side_yaml.png){height="327px" width="800px"}
+# ![side-by-side-yaml](side_by_side_yaml.png){height="327px" width="800px"}
 #
 # 3. Once the finetuning schedule has been altered as desired, pass it to
 #    [FinetuningScheduler](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.fts.html#finetuning_scheduler.fts.FinetuningScheduler) to commence scheduled training:
@@ -105,7 +105,7 @@
 #
 # **Tip:** Use of regex expressions can be convenient for specifying more complex schedules. Also, a per-phase base maximum lr can be specified:
 #
-# ![emphasized_yaml](emphasized_yaml.png){height="380px" width="800px"}
+# ![emphasized-yaml](emphasized_yaml.png){height="380px" width="800px"}
 #
 # </div>
 #
@@ -645,8 +645,8 @@ def train() -> None:
 # produced in the scenarios [here](https://drive.google.com/file/d/1t7myBgcqcZ9ax_IT9QVk-vFH_l_o5UXB/view?usp=sharing)
 # (caution, ~3.5GB).
 #
-# [![fts_explicit_accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)
-# [![nofts_baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)
+# [![fts-explicit-accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)
+# [![nofts-baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)
 #
 # Note there could be around ~1% variation in performance from the tensorboard summaries generated by this notebook
 # which uses DP and 1 GPU.
@@ -656,7 +656,7 @@ def train() -> None:
 # greater finetuning flexibility for model exploration in research. For example, glancing at DeBERTa-v3's implicit training
 # run, a critical tuning transition point is immediately apparent:
 #
-# [![implicit_training_transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D)
+# [![implicit-training-transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D)
 #
 # Our `val_loss` begins a precipitous decline at step 3119 which corresponds to phase 17 in the schedule. Referring to our
 # schedule, in phase 17 we're beginning tuning the attention parameters of our 10th encoder layer (of 11). Interesting!

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@`
`79`	`79`	`#`
`80`	`80`	`# 2. Alter the schedule as desired.`
`81`	`81`	`#`
`82`		`-# ![side_by_side_yaml](side_by_side_yaml.png){height="327px" width="800px"}`
	`82`	`+# ![side-by-side-yaml](side_by_side_yaml.png){height="327px" width="800px"}`
`83`	`83`	`#`
`84`	`84`	`# 3. Once the finetuning schedule has been altered as desired, pass it to`
`85`	`85`	`# [FinetuningScheduler](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.fts.html#finetuning_scheduler.fts.FinetuningScheduler) to commence scheduled training:`
`@@ -105,7 +105,7 @@`
`105`	`105`	`#`
`106`	`106`	`# Tip: Use of regex expressions can be convenient for specifying more complex schedules. Also, a per-phase base maximum lr can be specified:`
`107`	`107`	`#`
`108`		`-# ![emphasized_yaml](emphasized_yaml.png){height="380px" width="800px"}`
	`108`	`+# ![emphasized-yaml](emphasized_yaml.png){height="380px" width="800px"}`
`109`	`109`	`#`
`110`	`110`	`# </div>`
`111`	`111`	`#`
`@@ -645,8 +645,8 @@ def train() -> None:`
`645`	`645`	`# produced in the scenarios [here](https://drive.google.com/file/d/1t7myBgcqcZ9ax_IT9QVk-vFH_l_o5UXB/view?usp=sharing)`
`646`	`646`	`# (caution, ~3.5GB).`
`647`	`647`	`#`
`648`		`-# [![fts_explicit_accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)`
`649`		`-# [![nofts_baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)`
	`648`	`+# [![fts-explicit-accuracy](fts_explicit_accuracy.png){height="315px" width="492px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOnRydWUsIm5vZnRzX2Jhc2VsaW5lIjpmYWxzZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)`
	`649`	`+# [![nofts-baseline](nofts_baseline_accuracy.png){height="316px" width="505px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6dHJ1ZSwiZnRzX2ltcGxpY2l0IjpmYWxzZX0%3D)`
`650`	`650`	`#`
`651`	`651`	`# Note there could be around ~1% variation in performance from the tensorboard summaries generated by this notebook`
`652`	`652`	`# which uses DP and 1 GPU.`
`@@ -656,7 +656,7 @@ def train() -> None:`
`656`	`656`	`# greater finetuning flexibility for model exploration in research. For example, glancing at DeBERTa-v3's implicit training`
`657`	`657`	`# run, a critical tuning transition point is immediately apparent:`
`658`	`658`	`#`
`659`		`-# [![implicit_training_transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D)`
	`659`	`+# [![implicit-training-transition](implicit_training_transition.png){height="272px" width="494px"}](https://tensorboard.dev/experiment/n7U8XhrzRbmvVzC4SQSpWw/#scalars&_smoothingWeight=0&runSelectionState=eyJmdHNfZXhwbGljaXQiOmZhbHNlLCJub2Z0c19iYXNlbGluZSI6ZmFsc2UsImZ0c19pbXBsaWNpdCI6dHJ1ZX0%3D)`
`660`	`660`	`#`
`661`	`661`	# Our `val_loss` begins a precipitous decline at step 3119 which corresponds to phase 17 in the schedule. Referring to our
`662`	`662`	`# schedule, in phase 17 we're beginning tuning the attention parameters of our 10th encoder layer (of 11). Interesting!`