Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 4425a4d

Browse files
Use best epoch for model comparison (#495)
* Use best epoch for model comparison
1 parent ef85f08 commit 4425a4d

File tree

11 files changed

+191
-32
lines changed

11 files changed

+191
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ gets uploaded to AzureML, by skipping all test folders.
2323

2424
### Fixed
2525

26+
- ([#495](https://github.com/microsoft/InnerEye-DeepLearning/pull/495)) Fix model comparison.
2627
- ([#482](https://github.com/microsoft/InnerEye-DeepLearning/pull/482)) Check bool parameter is either true or false.
2728
- ([#475](https://github.com/microsoft/InnerEye-DeepLearning/pull/475)) Bug in AML SDK meant that we could not train
2829
any large models anymore because data loaders ran out of memory.

InnerEye/Common/common_util.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
empty_string_to_none = lambda x: None if (x is None or len(x.strip()) == 0) else x
2626
string_to_path = lambda x: None if (x is None or len(x.strip()) == 0) else Path(x)
2727

28-
# File name pattern that will match anything returned by epoch_folder_name.
29-
EPOCH_FOLDER_NAME_PATTERN = "epoch_[0-9][0-9][0-9]"
3028

3129
SUBJECT_METRICS_FILE_NAME = "metrics.csv"
3230
EPOCH_METRICS_FILE_NAME = "epoch_metrics.csv"

InnerEye/ML/baselines_util.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020
from InnerEye.Common import common_util
2121
from InnerEye.Common.Statistics import wilcoxon_signed_rank_test
2222
from InnerEye.Common.Statistics.wilcoxon_signed_rank_test import WilcoxonTestConfig
23-
from InnerEye.Common.common_util import BASELINE_WILCOXON_RESULTS_FILE, ENSEMBLE_SPLIT_NAME, \
24-
EPOCH_FOLDER_NAME_PATTERN, FULL_METRICS_DATAFRAME_FILE, ModelProcessing, OTHER_RUNS_SUBDIR_NAME, \
25-
SUBJECT_METRICS_FILE_NAME, remove_file_or_directory
23+
from InnerEye.Common.common_util import BASELINE_WILCOXON_RESULTS_FILE, FULL_METRICS_DATAFRAME_FILE, ModelProcessing, \
24+
SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, remove_file_or_directory
2625
from InnerEye.Common.fixed_paths import DEFAULT_AML_UPLOAD_DIR
2726
from InnerEye.ML.common import DATASET_CSV_FILE_NAME, ModelExecutionMode
2827
from InnerEye.ML.config import SegmentationModelBase
@@ -65,27 +64,18 @@ def compare_scores_against_baselines(model_config: SegmentationModelBase, azure_
6564
the Wilcoxon results file.
6665
"""
6766
# The attribute will only be present for a segmentation model; and it might be None or empty even for that.
68-
comparison_blob_storage_paths = getattr(model_config, 'comparison_blob_storage_paths')
67+
comparison_blob_storage_paths = model_config.comparison_blob_storage_paths
6968
if not comparison_blob_storage_paths:
7069
return
71-
outputs_path = model_config.outputs_folder
72-
if model_proc == ModelProcessing.ENSEMBLE_CREATION:
73-
outputs_path = outputs_path / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME
74-
model_epoch_paths = sorted(outputs_path.glob(EPOCH_FOLDER_NAME_PATTERN))
75-
if not model_epoch_paths:
76-
logging.warning("Cannot compare scores against baselines: no matches found for "
77-
f"{outputs_path}/{EPOCH_FOLDER_NAME_PATTERN}")
78-
return
79-
# Use the last (highest-numbered) epoch path for the current run.
80-
model_epoch_path = model_epoch_paths[-1]
81-
model_metrics_path = model_epoch_path / ModelExecutionMode.TEST.value / SUBJECT_METRICS_FILE_NAME
82-
model_dataset_path = model_epoch_path / ModelExecutionMode.TEST.value / DATASET_CSV_FILE_NAME
70+
outputs_path = model_config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TEST, model_proc)
71+
if not outputs_path.is_dir():
72+
raise FileNotFoundError(f"Cannot compare scores against baselines: no best epoch results found at {outputs_path}")
73+
model_metrics_path = outputs_path / SUBJECT_METRICS_FILE_NAME
74+
model_dataset_path = outputs_path / DATASET_CSV_FILE_NAME
8375
if not model_dataset_path.exists():
84-
logging.warning(f"Not comparing with baselines because no {model_dataset_path} file found for this run")
85-
return
76+
raise FileNotFoundError(f"Not comparing with baselines because no {model_dataset_path} file found for this run")
8677
if not model_metrics_path.exists():
87-
logging.warning(f"Not comparing with baselines because no {model_metrics_path} file found for this run")
88-
return
78+
raise FileNotFoundError(f"Not comparing with baselines because no {model_metrics_path} file found for this run")
8979
model_metrics_df = pd.read_csv(model_metrics_path)
9080
model_dataset_df = pd.read_csv(model_dataset_path)
9181
comparison_result = download_and_compare_scores(outputs_path,

InnerEye/ML/configs/segmentation/BasicModel2Epochs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@
1313

1414
fg_classes = ["spinalcord", "lung_r", "lung_l"]
1515

16+
default_single_comparison_blob = "refs_pull_483_merge_1624269679_90b1d23c/outputs/best_validation_epoch/Test"
17+
1618

1719
class BasicModel2Epochs(SegmentationModelBase):
1820
def __init__(self, **kwargs: Any) -> None:
21+
comparison_blob_storage_paths = kwargs.pop("comparison_blob_storage_paths",
22+
[("Single", default_single_comparison_blob)])
1923
super().__init__(
2024
should_validate=False,
2125
architecture="Basic",
@@ -39,6 +43,7 @@ def __init__(self, **kwargs: Any) -> None:
3943
recovery_checkpoint_save_interval=1,
4044
use_mixed_precision=True,
4145
azure_dataset_id=AZURE_DATASET_ID,
46+
comparison_blob_storage_paths=comparison_blob_storage_paths,
4247
dataset_mountpoint="/tmp/innereye",
4348
# Use an LR scheduler with a pronounced and clearly visible decay, to be able to easily see if that
4449
# is applied correctly in run recovery.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
There were not enough data points for any statistically meaningful comparisons.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
,Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm,seriesId,institutionId,split,mode
2+
0,5,lung_l,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
3+
6,5,lung_r,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
4+
12,5,spinalcord,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
5+
0,5,lung_l,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
6+
6,5,lung_r,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
7+
12,5,spinalcord,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
Lines changed: 3 additions & 0 deletions
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
There were not enough data points for any statistically meaningful comparisons.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
,Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm,seriesId,institutionId,split,mode
2+
0,4,lung_l,0.0,inf,inf,aee46d8ee00ed5a5b5d763957e58b8d8cd27f36b9722ada79937fead949f8581,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
3+
6,4,lung_r,0.0,inf,inf,aee46d8ee00ed5a5b5d763957e58b8d8cd27f36b9722ada79937fead949f8581,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
4+
12,4,spinalcord,0.0,inf,inf,aee46d8ee00ed5a5b5d763957e58b8d8cd27f36b9722ada79937fead949f8581,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
5+
18,5,lung_l,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
6+
24,5,lung_r,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
7+
30,5,spinalcord,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
8+
36,6,lung_l,0.0,inf,inf,dfa85a2f00323404eac4fd784713e6e2dc980dc9e33fd9f5d3cda7b82c7e6ab2,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
9+
42,6,lung_r,0.0,inf,inf,dfa85a2f00323404eac4fd784713e6e2dc980dc9e33fd9f5d3cda7b82c7e6ab2,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
10+
48,6,spinalcord,0.0,inf,inf,dfa85a2f00323404eac4fd784713e6e2dc980dc9e33fd9f5d3cda7b82c7e6ab2,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
11+
54,7,lung_l,0.0,inf,inf,8e1c5884be4d4d6581242e862641b08ecb8b2a13d71b4a708f35a951db12793c,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
12+
60,7,lung_r,0.0,inf,inf,8e1c5884be4d4d6581242e862641b08ecb8b2a13d71b4a708f35a951db12793c,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
13+
66,7,spinalcord,0.0,inf,inf,8e1c5884be4d4d6581242e862641b08ecb8b2a13d71b4a708f35a951db12793c,b7f757fb-12e0-489e-a6da-f64895cdd229,CURRENT,Test
14+
0,5,lung_l,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
15+
6,5,lung_r,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
16+
12,5,spinalcord,0.0,inf,inf,402ba5d42f37357f18f29af17b0846cbca4c430fea5b15a6baad5ec29dd6c9ba,b7f757fb-12e0-489e-a6da-f64895cdd229,Single,Test
Lines changed: 3 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)