Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ any large models anymore because data loaders ran out of memory.

### Added

- ([#488](https://github.com/microsoft/InnerEye-DeepLearning/pull/488)) Better handling of missing seriesId in segmentation cross validation reports.
- ([#454](https://github.com/microsoft/InnerEye-DeepLearning/pull/454)) Checking that labels are mutually exclusive.
- ([#447](https://github.com/microsoft/InnerEye-DeepLearning/pull/447/)) Added a sanity check to ensure there are no
missing channels, nor missing files. If missing channels in the csv file or filenames associated with channels are
Expand Down
3 changes: 3 additions & 0 deletions InnerEye/ML/visualizers/plot_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,9 @@ def convert_rows_for_comparisons(split_column_value: Optional[str],
:return: augmented subset of the rows in df, as described
"""
pre_len = len(df)
# If series id is not present, add a default value
if CSV_SERIES_HEADER not in dataset_df.columns:
dataset_df[CSV_SERIES_HEADER] = ''
# We need the institution column to compare subjects across institutions, if it is not present with add a default
# value
if CSV_INSTITUTION_HEADER not in dataset_df.columns:
Expand Down
60 changes: 58 additions & 2 deletions Tests/ML/visualizers/test_plot_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
import shutil
from typing import Callable, Dict, List, Optional, Set, Tuple

import pandas as pd
import pytest
Expand Down Expand Up @@ -103,17 +104,72 @@ def create_file_list_for_segmentation_recovery_run(test_config_ensemble: PlotCro
folder="main_1570466706163110")


def copy_run_result_files(files: List[RunResultFiles], src_prefix_path: Path,
dst_prefix_path: Path, transformer: Callable) -> List[RunResultFiles]:
"""
Copy dataset_csv_files from a list of RunResultFiles to a working directory, and then
transform them using a callback.

:param files: List of RunResultFiles to copy.
:param src_prefix_path: Shared prefix path for the dataset_csv_files to be removed.
:param dst_prefix_path: Shared prefix path to use for the copied dataset_csv_files.
:param transformer: Callback function to apply to the copied dataset_csv_files.
:return: New list of RunResultFiles pointing at the copied files.
"""
file_copies = []
files_copied = []

for file in files:
if not file.dataset_csv_file:
dataset_csv_file: Optional[Path] = None
else:
# Replace prefix path
dst_dataset_csv_file = dst_prefix_path / file.dataset_csv_file.relative_to(src_prefix_path)
if dst_dataset_csv_file not in files_copied:
dst_dataset_csv_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(file.dataset_csv_file, dst_dataset_csv_file)
files_copied.append(dst_dataset_csv_file)
transformer(dst_dataset_csv_file)
dataset_csv_file = dst_dataset_csv_file

file_copy = RunResultFiles(execution_mode=file.execution_mode,
metrics_file=file.metrics_file,
dataset_csv_file=dataset_csv_file,
run_recovery_id=file.run_recovery_id,
split_index=file.split_index)
file_copies.append(file_copy)

return file_copies


@pytest.mark.after_training_ensemble_run
def test_metrics_preparation_for_segmentation(test_config: PlotCrossValidationConfig) -> None:
@pytest.mark.parametrize("drop_column", [None, CSV_INSTITUTION_HEADER, CSV_SERIES_HEADER])
def test_metrics_preparation_for_segmentation(drop_column: Optional[str],
test_config: PlotCrossValidationConfig,
test_output_dirs: OutputFolderForTests) -> None:
"""
Test if metrics dataframes can be loaded and prepared. The files in question are checked in, but
were downloaded from a run, ID given in DEFAULT_ENSEMBLE_RUN_RECOVERY_ID.
Additionally test that CSV_INSTITUTION_HEADER or CSV_SERIES_HEADER can be dropped from the dataset_csv_file.
"""
files = create_file_list_for_segmentation_recovery_run(test_config)
if drop_column:
def drop_csv_column(path: Path) -> None:
"""
Load a csv file, drop a column, and save the csv file.
:param path: Path to csv file.
"""
df = pd.read_csv(path)
dropped_df = df.drop(drop_column, axis=1)
dropped_df.to_csv(path)
files = copy_run_result_files(files, full_ml_test_data_path(), test_output_dirs.root_dir, drop_csv_column)
downloaded_metrics = load_dataframes(files, test_config)
assert test_config.run_recovery_id
for mode in test_config.execution_modes_to_download():
expected_df = _get_metrics_df(test_config.run_recovery_id, mode)
if drop_column:
# If dropped a column from dataset_csv_file, remove it from expected dataframe.
expected_df[drop_column] = ''
# Drop the "mode" column, because that was added after creating the test data
metrics = downloaded_metrics[mode]
assert metrics is not None
Expand Down