From 42072a61772f5ee2f24a84593d04ad81a5c6cb69 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 18:04:11 +0100
Subject: [PATCH 01/23] extending _get_transforms to accept new datasets

---
 .../SSL/lightning_containers/ssl_container.py | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 6ad2bda8c..89c9a039e 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -166,8 +166,8 @@ def create_model(self) -> LightningModule:
                 f"Found {self.ssl_training_type.value}")
         model.hparams.update({'ssl_type': self.ssl_training_type.value,
                               "num_classes": self.data_module.num_classes})
-        self.encoder_output_dim = get_encoder_output_dim(model, self.data_module)
 
+        self.encoder_output_dim = get_encoder_output_dim(model, self.data_module)
         return model
 
     def get_data_module(self) -> InnerEyeDataModuleTypes:
@@ -210,6 +210,7 @@ def _create_ssl_data_modules(self, is_ssl_encoder_module: bool) -> InnerEyeVisio
                                       batch_size=batch_size_per_gpu,
                                       num_workers=self.num_workers,
                                       seed=self.random_seed)
+        #import pdb; pdb.set_trace()
         dm.prepare_data()
         dm.setup()
         return dm
@@ -232,16 +233,26 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
                             SSLDatasetName.CheXpert.value,
                             SSLDatasetName.Covid.value]:
             assert augmentation_config is not None
-            train_transforms, val_transforms = get_cxr_ssl_transforms(augmentation_config,
-                                                                      return_two_views_per_sample=is_ssl_encoder_module,
-                                                                      use_training_augmentations_for_validation=is_ssl_encoder_module)
+            train_transforms, val_transforms = get_cxr_ssl_transforms(
+                augmentation_config,
+                return_two_views_per_sample=is_ssl_encoder_module,
+                use_training_augmentations_for_validation=is_ssl_encoder_module
+            )
         elif dataset_name in [SSLDatasetName.CIFAR10.value, SSLDatasetName.CIFAR100.value]:
             train_transforms = \
                 InnerEyeCIFARTrainTransform(32) if is_ssl_encoder_module else InnerEyeCIFARLinearHeadTransform(32)
             val_transforms = \
                 InnerEyeCIFARTrainTransform(32) if is_ssl_encoder_module else InnerEyeCIFARLinearHeadTransform(32)
+        elif  augmentation_config:
+            train_transforms, val_transforms = get_cxr_ssl_transforms(
+                augmentation_config,
+                return_two_views_per_sample=is_ssl_encoder_module,
+                use_training_augmentations_for_validation=is_ssl_encoder_module
+            )
+            logging.warning(f"Dataset {dataset_name} unknown. The config will be consumed by "
+                            f"get_cxr_ssl_transforms() to create the augmentation pipeline.")
         else:
-            raise ValueError(f"Dataset {dataset_name} unknown.")
+            raise ValueError(f"Dataset {dataset_name} unknown and no config has been passed")
 
         return train_transforms, val_transforms
 

From a8ebe13423518d848495c744785bbce3683e2ae4 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 18:17:57 +0100
Subject: [PATCH 02/23] expand get_cxr_ssl_transform to avoid hidden channel
 expansion

---
 .../SSL/datamodules_and_datasets/transforms_utils.py  | 11 ++++++++---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py |  3 ++-
 InnerEye/ML/augmentations/transform_pipeline.py       | 10 ++++++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
index e6b28d264..e22000131 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
@@ -15,7 +15,8 @@
 
 def get_cxr_ssl_transforms(config: CfgNode,
                            return_two_views_per_sample: bool,
-                           use_training_augmentations_for_validation: bool = False) -> Tuple[Any, Any]:
+                           use_training_augmentations_for_validation: bool = False,
+                           expand_channels: bool = True) -> Tuple[Any, Any]:
     """
     Returns training and validation transforms for CXR.
     Transformations are constructed in the following way:
@@ -33,10 +34,14 @@ def get_cxr_ssl_transforms(config: CfgNode,
     :param use_training_augmentations_for_validation: If True, use augmentation at validation time too.
     This is required for SSL validation loss to be meaningful. If False, only apply basic processing step
     (no augmentations)
+    :param expand_channels: if True the expand channel transformation from InnerEye.ML.augmentations.image_transforms
+    will be added to the transformation passed through the config. This is needed for single channel images as CXR.
     """
-    train_transforms = create_cxr_transforms_from_config(config, apply_augmentations=True)
+    train_transforms = create_cxr_transforms_from_config(config, apply_augmentations=True,
+                                                         expand_channels=expand_channels)
     val_transforms = create_cxr_transforms_from_config(config,
-                                                       apply_augmentations=use_training_augmentations_for_validation)
+                                                       apply_augmentations=use_training_augmentations_for_validation,
+                                                       expand_channels=expand_channels)
     if return_two_views_per_sample:
         train_transforms = DualViewTransformWrapper(train_transforms)  # type: ignore
         val_transforms = DualViewTransformWrapper(val_transforms)  # type: ignore
diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 89c9a039e..daf2fe300 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -247,7 +247,8 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
             train_transforms, val_transforms = get_cxr_ssl_transforms(
                 augmentation_config,
                 return_two_views_per_sample=is_ssl_encoder_module,
-                use_training_augmentations_for_validation=is_ssl_encoder_module
+                use_training_augmentations_for_validation=is_ssl_encoder_module,
+                expand_channels=False,
             )
             logging.warning(f"Dataset {dataset_name} unknown. The config will be consumed by "
                             f"get_cxr_ssl_transforms() to create the augmentation pipeline.")
diff --git a/InnerEye/ML/augmentations/transform_pipeline.py b/InnerEye/ML/augmentations/transform_pipeline.py
index cc2d65df9..9687e7f07 100644
--- a/InnerEye/ML/augmentations/transform_pipeline.py
+++ b/InnerEye/ML/augmentations/transform_pipeline.py
@@ -87,15 +87,21 @@ def __call__(self, data: ImageData) -> torch.Tensor:
 
 
 def create_cxr_transforms_from_config(config: CfgNode,
-                                      apply_augmentations: bool) -> ImageTransformationPipeline:
+                                      apply_augmentations: bool,
+                                      expand_channels: bool = True) -> ImageTransformationPipeline:
     """
     Defines the image transformations pipeline used in Chest-Xray datasets. Can be used for other types of
     images data, type of augmentations to use and strength are expected to be defined in the config.
     :param config: config yaml file fixing strength and type of augmentation to apply
     :param apply_augmentations: if True return transformation pipeline with augmentations. Else,
     disable augmentations i.e. only resize and center crop the image.
+    :param expand_channels: if True the expand channel transformation from InnerEye.ML.augmentations.image_transforms
+    will be added to the transformation passed through the config. This is needed for single channel images as CXR.
     """
-    transforms: List[Any] = [ExpandChannels()]
+    if expand_channels:
+        transforms: List[Any] = [ExpandChannels()]
+    else:
+        transforms: List[Any] = []
     if apply_augmentations:
         if config.augmentation.use_random_affine:
             transforms.append(RandomAffine(

From c2c5fe71c91c97e95076369f764339bdd41ffada Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 18:40:51 +0100
Subject: [PATCH 03/23] drop_last set as parameter of InnerEyeVisionDataModule

---
 InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py | 8 +++++---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py   | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
index 19b4864ba..7d5a97c47 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
@@ -29,6 +29,7 @@ def __init__(self,
                  num_workers: int = 6,
                  batch_size: int = 32,
                  seed: int = 42,
+                 drop_last = True,
                  *args: Any, **kwargs: Any) -> None:
         """
         Wrapper around VisionDatamodule to load torchvision dataset into a pytorch-lightning module.
@@ -42,16 +43,17 @@ def __init__(self,
         :param val_transforms: transforms to use at validation time
         :param data_dir: data directory where to find the data
         :param val_split: proportion of training dataset to use for validation
-        :param num_workers: number of processes for dataloaders.
-        :param batch_size: batch size for training & validation.
+        :param num_workers: number of processes for dataloaders
+        :param batch_size: batch size for training & validation
         :param seed: random seed for dataset splitting
+        :param drop_last: bool, if true it drops the last incomplete batch
         """
         data_dir = data_dir if data_dir is not None else os.getcwd()
         super().__init__(data_dir=data_dir,
                          val_split=val_split,
                          num_workers=num_workers,
                          batch_size=batch_size,
-                         drop_last=True,
+                         drop_last=drop_last,
                          train_transforms=train_transforms,
                          val_transforms=val_transforms,
                          seed=seed,
diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index daf2fe300..c8640f904 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -210,7 +210,6 @@ def _create_ssl_data_modules(self, is_ssl_encoder_module: bool) -> InnerEyeVisio
                                       batch_size=batch_size_per_gpu,
                                       num_workers=self.num_workers,
                                       seed=self.random_seed)
-        #import pdb; pdb.set_trace()
         dm.prepare_data()
         dm.setup()
         return dm

From 682d2abfbda4122710b3a8f42c147fd0b0bfa6d3 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 18:57:43 +0100
Subject: [PATCH 04/23] drop_last is now a SSLContainer parameter

---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index c8640f904..93937c865 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -96,6 +96,7 @@ class SSLContainer(LightningContainer):
     learning_rate_linear_head_during_ssl_training = param.Number(default=1e-4,
                                                                  doc="Learning rate for linear head training during "
                                                                      "SSL training.")
+    drop_last = param.Boolean(default=True, doc="If True drops the last incomplete batch")
 
     def setup(self) -> None:
         from InnerEye.ML.SSL.lightning_containers.ssl_image_classifier import SSLClassifierContainer
@@ -209,7 +210,8 @@ def _create_ssl_data_modules(self, is_ssl_encoder_module: bool) -> InnerEyeVisio
                                       data_dir=str(datamodule_args.dataset_path),
                                       batch_size=batch_size_per_gpu,
                                       num_workers=self.num_workers,
-                                      seed=self.random_seed)
+                                      seed=self.random_seed,
+                                      drop_last=self.drop_last)
         dm.prepare_data()
         dm.setup()
         return dm

From 68cb45ca7fe2cee0b51831bf6b92db9716665cf0 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 19:20:57 +0100
Subject: [PATCH 05/23] Updating Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35f86f040..89dfeb526 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ jobs that run in AzureML.
   ensemble) using the parameter `model_id`.
 ### Changed
 - ([#531](https://github.com/microsoft/InnerEye-DeepLearning/pull/531)) Updated PL to 1.3.8, torchmetrics and pl-bolts and changed relevant metrics and SSL code API.
+- ([#555](https://github.com/microsoft/InnerEye-DeepLearning/pull/555)) Make the SSLContainer compatible with new datasets
 - ([#533](https://github.com/microsoft/InnerEye-DeepLearning/pull/533)) Better defaults for inference on ensemble children.
 - ([#536](https://github.com/microsoft/InnerEye-DeepLearning/pull/536)) Inference will not run on the validation set by default, this can be turned on
 via the `--inference_on_val_set` flag.

From bdf4ca632e83d43b75ceaaa74181d1ddbda53614 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 24 Aug 2021 19:25:49 +0100
Subject: [PATCH 06/23] Fix PEP8

---
 InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py | 2 +-
 InnerEye/ML/SSL/lightning_containers/ssl_container.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
index 7d5a97c47..3641a0c98 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
@@ -29,7 +29,7 @@ def __init__(self,
                  num_workers: int = 6,
                  batch_size: int = 32,
                  seed: int = 42,
-                 drop_last = True,
+                 drop_last: bool = True,
                  *args: Any, **kwargs: Any) -> None:
         """
         Wrapper around VisionDatamodule to load torchvision dataset into a pytorch-lightning module.
diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 93937c865..a47b381b7 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -244,7 +244,7 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
                 InnerEyeCIFARTrainTransform(32) if is_ssl_encoder_module else InnerEyeCIFARLinearHeadTransform(32)
             val_transforms = \
                 InnerEyeCIFARTrainTransform(32) if is_ssl_encoder_module else InnerEyeCIFARLinearHeadTransform(32)
-        elif  augmentation_config:
+        elif augmentation_config:
             train_transforms, val_transforms = get_cxr_ssl_transforms(
                 augmentation_config,
                 return_two_views_per_sample=is_ssl_encoder_module,

From fcf27ed9a463b55d0dfc121cb1d7287117044f33 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 25 Aug 2021 09:08:14 +0100
Subject: [PATCH 07/23] fixing mypy error

---
 InnerEye/ML/augmentations/transform_pipeline.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/InnerEye/ML/augmentations/transform_pipeline.py b/InnerEye/ML/augmentations/transform_pipeline.py
index 9687e7f07..f38274685 100644
--- a/InnerEye/ML/augmentations/transform_pipeline.py
+++ b/InnerEye/ML/augmentations/transform_pipeline.py
@@ -98,10 +98,9 @@ def create_cxr_transforms_from_config(config: CfgNode,
     :param expand_channels: if True the expand channel transformation from InnerEye.ML.augmentations.image_transforms
     will be added to the transformation passed through the config. This is needed for single channel images as CXR.
     """
+    transforms: List[Any] = []
     if expand_channels:
-        transforms: List[Any] = [ExpandChannels()]
-    else:
-        transforms: List[Any] = []
+       transforms.append(ExpandChannels())
     if apply_augmentations:
         if config.augmentation.use_random_affine:
             transforms.append(RandomAffine(

From bccdb6b44ca83b78d5bc8c415db0c9f431ec94ce Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 25 Aug 2021 09:17:22 +0100
Subject: [PATCH 08/23] still one fix

---
 InnerEye/ML/augmentations/transform_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/augmentations/transform_pipeline.py b/InnerEye/ML/augmentations/transform_pipeline.py
index f38274685..0a3a6eb37 100644
--- a/InnerEye/ML/augmentations/transform_pipeline.py
+++ b/InnerEye/ML/augmentations/transform_pipeline.py
@@ -100,7 +100,7 @@ def create_cxr_transforms_from_config(config: CfgNode,
     """
     transforms: List[Any] = []
     if expand_channels:
-       transforms.append(ExpandChannels())
+            transforms.append(ExpandChannels())
     if apply_augmentations:
         if config.augmentation.use_random_affine:
             transforms.append(RandomAffine(

From 26522c86cccd59898d86fdd707504392b3e2b22e Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 25 Aug 2021 16:46:02 +0100
Subject: [PATCH 09/23] Updating to main

---
 .amlignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.amlignore b/.amlignore
index d39026640..f8701da5b 100644
--- a/.amlignore
+++ b/.amlignore
@@ -35,4 +35,8 @@ tensorboard_runs
 InnerEyeTestVariables.txt
 InnerEyePrivateSettings.yml
 cifar-10-batches-py
-cifar-100-python
\ No newline at end of file
+cifar-100-python
+Tests
+TestsOutsidePackage
+TestSubmodule
+RegressionTestResults
\ No newline at end of file

From 68dd10c12a15941b5bc6ee1dae5ef200f3de440e Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 10:09:01 +0100
Subject: [PATCH 10/23] generalize function names for readibility

---
 .../transforms_utils.py                       | 22 +++++++++----------
 .../SSL/lightning_containers/ssl_container.py | 11 +++++-----
 .../ML/augmentations/transform_pipeline.py    | 13 ++++++-----
 .../ML/configs/classification/CovidModel.py   |  6 ++---
 .../augmentations/test_transform_pipeline.py  |  6 ++---
 Tests/SSL/test_data_modules.py                | 10 ++++-----
 6 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
index e22000131..70f8bac9f 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
@@ -10,17 +10,17 @@
 from pl_bolts.models.self_supervised.simclr import SimCLRTrainDataTransform
 from yacs.config import CfgNode
 
-from InnerEye.ML.augmentations.transform_pipeline import create_cxr_transforms_from_config
+from InnerEye.ML.augmentations.transform_pipeline import create_transforms_from_config
 
 
-def get_cxr_ssl_transforms(config: CfgNode,
-                           return_two_views_per_sample: bool,
-                           use_training_augmentations_for_validation: bool = False,
-                           expand_channels: bool = True) -> Tuple[Any, Any]:
+def get_ssl_transforms_from_config(config: CfgNode,
+                                   return_two_views_per_sample: bool,
+                                   use_training_augmentations_for_validation: bool = False,
+                                   expand_channels: bool = True) -> Tuple[Any, Any]:
     """
     Returns training and validation transforms for CXR.
     Transformations are constructed in the following way:
-    1. Construct the pipeline of augmentations in create_chest_xray_transform (e.g. resize, flip, affine) as defined
+    1. Construct the pipeline of augmentations in create_transform_from_config (e.g. resize, flip, affine) as defined
     by the config.
     2. If we just want to construct the transformation pipeline for a classification model or for the linear evaluator
     of the SSL module, return this pipeline.
@@ -37,11 +37,11 @@ def get_cxr_ssl_transforms(config: CfgNode,
     :param expand_channels: if True the expand channel transformation from InnerEye.ML.augmentations.image_transforms
     will be added to the transformation passed through the config. This is needed for single channel images as CXR.
     """
-    train_transforms = create_cxr_transforms_from_config(config, apply_augmentations=True,
-                                                         expand_channels=expand_channels)
-    val_transforms = create_cxr_transforms_from_config(config,
-                                                       apply_augmentations=use_training_augmentations_for_validation,
-                                                       expand_channels=expand_channels)
+    train_transforms = create_transforms_from_config(config, apply_augmentations=True,
+                                                     expand_channels=expand_channels)
+    val_transforms = create_transforms_from_config(config,
+                                                   apply_augmentations=use_training_augmentations_for_validation,
+                                                   expand_channels=expand_channels)
     if return_two_views_per_sample:
         train_transforms = DualViewTransformWrapper(train_transforms)  # type: ignore
         val_transforms = DualViewTransformWrapper(val_transforms)  # type: ignore
diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index a47b381b7..2682f14bc 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -17,7 +17,7 @@
 from InnerEye.ML.SSL.datamodules_and_datasets.datamodules import CombinedDataModule, InnerEyeVisionDataModule
 from InnerEye.ML.SSL.datamodules_and_datasets.transforms_utils import InnerEyeCIFARLinearHeadTransform, \
     InnerEyeCIFARTrainTransform, \
-    get_cxr_ssl_transforms
+    get_ssl_transforms_from_config
 from InnerEye.ML.SSL.encoders import get_encoder_output_dim
 from InnerEye.ML.SSL.lightning_modules.byol.byol_module import BYOLInnerEye
 from InnerEye.ML.SSL.lightning_modules.simclr_module import SimCLRInnerEye
@@ -234,7 +234,7 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
                             SSLDatasetName.CheXpert.value,
                             SSLDatasetName.Covid.value]:
             assert augmentation_config is not None
-            train_transforms, val_transforms = get_cxr_ssl_transforms(
+            train_transforms, val_transforms = get_ssl_transforms_from_config(
                 augmentation_config,
                 return_two_views_per_sample=is_ssl_encoder_module,
                 use_training_augmentations_for_validation=is_ssl_encoder_module
@@ -245,16 +245,17 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
             val_transforms = \
                 InnerEyeCIFARTrainTransform(32) if is_ssl_encoder_module else InnerEyeCIFARLinearHeadTransform(32)
         elif augmentation_config:
-            train_transforms, val_transforms = get_cxr_ssl_transforms(
+            train_transforms, val_transforms = get_ssl_transforms_from_config(
                 augmentation_config,
                 return_two_views_per_sample=is_ssl_encoder_module,
                 use_training_augmentations_for_validation=is_ssl_encoder_module,
                 expand_channels=False,
             )
             logging.warning(f"Dataset {dataset_name} unknown. The config will be consumed by "
-                            f"get_cxr_ssl_transforms() to create the augmentation pipeline.")
+                            f"get_ssl_transforms() to create the augmentation pipeline, make sure"
+                            f"the transformations in your configs are compatible. ")
         else:
-            raise ValueError(f"Dataset {dataset_name} unknown and no config has been passed")
+            raise ValueError(f"Dataset {dataset_name} unknown and no config has been passed.")
 
         return train_transforms, val_transforms
 
diff --git a/InnerEye/ML/augmentations/transform_pipeline.py b/InnerEye/ML/augmentations/transform_pipeline.py
index 0a3a6eb37..c57aa67a1 100644
--- a/InnerEye/ML/augmentations/transform_pipeline.py
+++ b/InnerEye/ML/augmentations/transform_pipeline.py
@@ -86,12 +86,13 @@ def __call__(self, data: ImageData) -> torch.Tensor:
         return self.transform_image(data)
 
 
-def create_cxr_transforms_from_config(config: CfgNode,
-                                      apply_augmentations: bool,
-                                      expand_channels: bool = True) -> ImageTransformationPipeline:
+def create_transforms_from_config(config: CfgNode,
+                                  apply_augmentations: bool,
+                                  expand_channels: bool = True) -> ImageTransformationPipeline:
     """
-    Defines the image transformations pipeline used in Chest-Xray datasets. Can be used for other types of
-    images data, type of augmentations to use and strength are expected to be defined in the config.
+    Defines the image transformations pipeline from a config file. It has been designed for Chest X-Ray
+    images but it can be used for other types of images data, type of augmentations to use and strength are
+    expected to be defined in the config. The channel expansion is needed for gray images.
     :param config: config yaml file fixing strength and type of augmentation to apply
     :param apply_augmentations: if True return transformation pipeline with augmentations. Else,
     disable augmentations i.e. only resize and center crop the image.
@@ -100,7 +101,7 @@ def create_cxr_transforms_from_config(config: CfgNode,
     """
     transforms: List[Any] = []
     if expand_channels:
-            transforms.append(ExpandChannels())
+        transforms.append(ExpandChannels())
     if apply_augmentations:
         if config.augmentation.use_random_affine:
             transforms.append(RandomAffine(
diff --git a/InnerEye/ML/configs/classification/CovidModel.py b/InnerEye/ML/configs/classification/CovidModel.py
index ef64ad634..46b68661e 100644
--- a/InnerEye/ML/configs/classification/CovidModel.py
+++ b/InnerEye/ML/configs/classification/CovidModel.py
@@ -23,7 +23,7 @@
 
 from InnerEye.ML.SSL.lightning_modules.ssl_classifier_module import SSLClassifier
 from InnerEye.ML.SSL.utils import create_ssl_encoder, create_ssl_image_classifier, load_yaml_augmentation_config
-from InnerEye.ML.augmentations.transform_pipeline import create_cxr_transforms_from_config
+from InnerEye.ML.augmentations.transform_pipeline import create_transforms_from_config
 from InnerEye.ML.common import ModelExecutionMode
 
 from InnerEye.ML.configs.ssl.CXR_SSL_configs import path_linear_head_augmentation_cxr
@@ -137,9 +137,9 @@ def get_model_train_test_dataset_splits(self, dataset_df: pd.DataFrame) -> Datas
     def get_image_transform(self) -> ModelTransformsPerExecutionMode:
         config = load_yaml_augmentation_config(path_linear_head_augmentation_cxr)
         train_transforms = Compose(
-            [DicomPreparation(), create_cxr_transforms_from_config(config, apply_augmentations=True)])
+            [DicomPreparation(), create_transforms_from_config(config, apply_augmentations=True)])
         val_transforms = Compose(
-            [DicomPreparation(), create_cxr_transforms_from_config(config, apply_augmentations=False)])
+            [DicomPreparation(), create_transforms_from_config(config, apply_augmentations=False)])
 
         return ModelTransformsPerExecutionMode(train=train_transforms,
                                                val=val_transforms,
diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index ba8b6a69c..2048fc2da 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -13,7 +13,7 @@
 
 from InnerEye.ML.augmentations.image_transforms import AddGaussianNoise, ElasticTransform, ExpandChannels, RandomGamma
 from InnerEye.ML.augmentations.transform_pipeline import ImageTransformationPipeline, \
-    create_cxr_transforms_from_config
+    create_transforms_from_config
 
 from Tests.SSL.test_data_modules import cxr_augmentation_config
 
@@ -111,7 +111,7 @@ def test_create_transform_pipeline_from_config() -> None:
     """
     Tests that the pipeline returned by create_transform_pipeline_from_config returns the expected transformation.
     """
-    transformation_pipeline = create_cxr_transforms_from_config(cxr_augmentation_config, apply_augmentations=True)
+    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=True)
     fake_cxr_as_array = np.ones([256, 256]) * 255.
     fake_cxr_as_array[100:150, 100:200] = 1
     fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
@@ -154,7 +154,7 @@ def test_create_transform_pipeline_from_config() -> None:
     assert torch.isclose(expected_transformed, transformed_image).all()
 
     # Test the evaluation pipeline
-    transformation_pipeline = create_cxr_transforms_from_config(cxr_augmentation_config, apply_augmentations=False)
+    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=False)
     transformed_image = transformation_pipeline(image)
     assert isinstance(transformed_image, torch.Tensor)
     all_transforms = [ExpandChannels(), Resize(size=256), CenterCrop(size=224)]
diff --git a/Tests/SSL/test_data_modules.py b/Tests/SSL/test_data_modules.py
index 97ab9d8b9..f3c319257 100644
--- a/Tests/SSL/test_data_modules.py
+++ b/Tests/SSL/test_data_modules.py
@@ -16,7 +16,7 @@
 from InnerEye.ML.SSL.datamodules_and_datasets.cxr_datasets import RSNAKaggleCXR
 from InnerEye.ML.SSL.datamodules_and_datasets.datamodules import CombinedDataModule, InnerEyeVisionDataModule
 from InnerEye.ML.SSL.datamodules_and_datasets.transforms_utils import InnerEyeCIFARLinearHeadTransform, \
-    InnerEyeCIFARTrainTransform, get_cxr_ssl_transforms
+    InnerEyeCIFARTrainTransform, get_ssl_transforms_from_config
 from InnerEye.ML.SSL.lightning_containers.ssl_container import SSLContainer, SSLDatasetName
 from InnerEye.ML.SSL.utils import SSLDataModuleType, load_yaml_augmentation_config
 from InnerEye.ML.configs.ssl.CXR_SSL_configs import path_encoder_augmentation_cxr
@@ -32,8 +32,8 @@ def test_weights_innereye_module() -> None:
     """
     Tests if weights in CXR data module are correctly initialized
     """
-    transforms = get_cxr_ssl_transforms(cxr_augmentation_config,
-                                        return_two_views_per_sample=True)
+    transforms = get_ssl_transforms_from_config(cxr_augmentation_config,
+                                                return_two_views_per_sample=True)
     data_module = InnerEyeVisionDataModule(dataset_cls=RSNAKaggleCXR,
                                            return_index=False,
                                            train_transforms=transforms[0],
@@ -179,8 +179,8 @@ def test_combined_data_module() -> None:
     """
     Tests the behavior of CombinedDataModule
     """
-    _, val_transform = get_cxr_ssl_transforms(cxr_augmentation_config,
-                                              return_two_views_per_sample=False)
+    _, val_transform = get_ssl_transforms_from_config(cxr_augmentation_config,
+                                                      return_two_views_per_sample=False)
 
     # Datamodule expected to have 12 training batches - 3 val
     long_data_module = InnerEyeVisionDataModule(dataset_cls=RSNAKaggleCXR,

From d72a36b562043eb55df3c1c939bedecf722be1a2 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 10:30:04 +0100
Subject: [PATCH 11/23] Updating documentation

---
 docs/self_supervised_models.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/self_supervised_models.md b/docs/self_supervised_models.md
index 159814e41..002010de2 100644
--- a/docs/self_supervised_models.md
+++ b/docs/self_supervised_models.md
@@ -125,12 +125,15 @@ To use this code with your own data, you will need to:
    and `InnerEyeDataClassBaseWithReturnIndex`. See for example how we constructed `RSNAKaggleCXR`
    class. WARNING: the first positional argument of your dataset class constructor MUST be the data directory ("root"),
    as VisionDataModule expects this in the prepare_data step.
-2. Add a member to the `SSLDatasetName` Enum with your new dataset and update the `_SSLDataClassMappings` member of the
-   class so that the code knows which data class to associate to your new dataset name.
-3. Update the `_get_transforms` methods to add the transform specific to your new dataset. To simplify this step, we
-   have defined a series of standard transforms parametrized by an augmentation yaml file in `SSL/transforms_utils.py` (
-   see next paragraph for more details). You could for example construct a transform pipeline similar to the one created
-   with `get_cxr_ssl_transforms` for our CXR examples.
+2. Define your own Lightening Container that inherits from `SSLContainer` as described in the paragraph above.
+3. In your own container update the `_SSLDataClassMappings` member of the class so that the code knows which data class 
+   to associate to your new dataset name.
+3. Create a yaml configuration file that contains the augmentations specific to your dataset. The yaml file will be 
+   consumed by the `create_transforms_from_config` function defined in the 
+   `InnerEye.ML.augmentations.transform_pipeline` module (see next paragraph for more details). Alternatively, overwrite
+   the `_get_transforms` method. To simplify this step, we have defined a series of standard operations in 
+   `SSL/transforms_utils.py` . You could for example construct a transform pipeline similar to the one created
+   inside `create_transform_from_config` inside your own method.
 4. Update all necessary parameters in the model config (cf. previous paragraph)
 
 Once all these steps are updated, the code in the base SSLContainer class will take care of creating the corresponding

From daeaab1bddfb27e194d299b120e0e46c2ea0e9d8 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 11:45:54 +0100
Subject: [PATCH 12/23] Updating documentation

---
 docs/self_supervised_models.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/self_supervised_models.md b/docs/self_supervised_models.md
index 002010de2..c93afee2b 100644
--- a/docs/self_supervised_models.md
+++ b/docs/self_supervised_models.md
@@ -117,24 +117,29 @@ with the following available arguments:
 * `random_seed`: seed for the run,
 * `num_epochs`: number of epochs to train for.
 
+In case you wish to first test your model locally, here some optional arguments that can be useful:
+* `local_dataset`: path to local dataset, if passed the azure dataset will be ignored
+* `is_debug_model`: if True it will only run on the first batch of each epoch
+* `drop_last`: if False (True by default) it will keep the last batch also if incomplete
+
 ### Creating your own datamodules:
 
 To use this code with your own data, you will need to:
 
-1. Create a dataset class that reads your new dataset, inheriting from both `VisionDataset`
+1. Define your own Lightening Container that inherits from `SSLContainer` as described in the paragraph above.
+2. Create a dataset class that reads your new dataset, inheriting from both `VisionDataset`
    and `InnerEyeDataClassBaseWithReturnIndex`. See for example how we constructed `RSNAKaggleCXR`
    class. WARNING: the first positional argument of your dataset class constructor MUST be the data directory ("root"),
    as VisionDataModule expects this in the prepare_data step.
-2. Define your own Lightening Container that inherits from `SSLContainer` as described in the paragraph above.
 3. In your own container update the `_SSLDataClassMappings` member of the class so that the code knows which data class 
    to associate to your new dataset name.
-3. Create a yaml configuration file that contains the augmentations specific to your dataset. The yaml file will be 
+4. Create a yaml configuration file that contains the augmentations specific to your dataset. The yaml file will be 
    consumed by the `create_transforms_from_config` function defined in the 
    `InnerEye.ML.augmentations.transform_pipeline` module (see next paragraph for more details). Alternatively, overwrite
    the `_get_transforms` method. To simplify this step, we have defined a series of standard operations in 
    `SSL/transforms_utils.py` . You could for example construct a transform pipeline similar to the one created
    inside `create_transform_from_config` inside your own method.
-4. Update all necessary parameters in the model config (cf. previous paragraph)
+5. Update all necessary parameters in the model config (cf. previous paragraph)
 
 Once all these steps are updated, the code in the base SSLContainer class will take care of creating the corresponding
 datamodules for SSL training and linear head monitoring.

From 6509e4f3a18c6ad02088d478d7f4d69032ad96b3 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 11:52:18 +0100
Subject: [PATCH 13/23] removing unexpected changes in amlignore

---
 .amlignore | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.amlignore b/.amlignore
index f8701da5b..d39026640 100644
--- a/.amlignore
+++ b/.amlignore
@@ -35,8 +35,4 @@ tensorboard_runs
 InnerEyeTestVariables.txt
 InnerEyePrivateSettings.yml
 cifar-10-batches-py
-cifar-100-python
-Tests
-TestsOutsidePackage
-TestSubmodule
-RegressionTestResults
\ No newline at end of file
+cifar-100-python
\ No newline at end of file

From bc5a81c1917824cb245e407cc39b2b232287e542 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 12:48:42 +0100
Subject: [PATCH 14/23] Adding test

---
 Tests/ML/augmentations/test_transform_pipeline.py | 11 +++++++----
 Tests/SSL/test_data_modules.py                    |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index 2048fc2da..bc40e2050 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -107,17 +107,18 @@ def test_custom_tf_on_various_input(use_different_transformation_per_channel: bo
     assert torch.isclose(transformed[0, 0], transformed[1, 1]).all() != use_different_transformation_per_channel
 
 
-def test_create_transform_pipeline_from_config() -> None:
+@pytest.mark.parametrize("expand_channels", [True, False])
+def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
     """
     Tests that the pipeline returned by create_transform_pipeline_from_config returns the expected transformation.
     """
-    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=True)
+    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=True,
+                                                            expand_channels=expand_channels)
     fake_cxr_as_array = np.ones([256, 256]) * 255.
     fake_cxr_as_array[100:150, 100:200] = 1
     fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
 
-    all_transforms = [ExpandChannels(),
-                      RandomAffine(degrees=180, translate=(0, 0), shear=40),
+    all_transforms = [RandomAffine(degrees=180, translate=(0, 0), shear=40),
                       RandomResizedCrop(scale=(0.4, 1.0), size=256),
                       RandomHorizontalFlip(p=0.5),
                       RandomGamma(scale=(0.5, 1.5)),
@@ -127,6 +128,8 @@ def test_create_transform_pipeline_from_config() -> None:
                       RandomErasing(scale=(0.15, 0.4), ratio=(0.33, 3)),
                       AddGaussianNoise(std=0.05, p_apply=0.5)
                       ]
+    if expand_channels:
+        all_transforms.insert(0, ExpandChannels())
 
     np.random.seed(3)
     torch.manual_seed(3)
diff --git a/Tests/SSL/test_data_modules.py b/Tests/SSL/test_data_modules.py
index f3c319257..97b745a71 100644
--- a/Tests/SSL/test_data_modules.py
+++ b/Tests/SSL/test_data_modules.py
@@ -70,7 +70,8 @@ def test_innereye_vision_module() -> None:
                                            data_dir=None,
                                            batch_size=5,
                                            shuffle=False,
-                                           num_workers=0)
+                                           num_workers=0,
+                                           drop_last=True)
     data_module.prepare_data()
     data_module.setup()
     assert len(data_module.dataset_train) == 45000

From fc22df9928da5de2a61229032a2b602056e6df0f Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 26 Aug 2021 15:40:52 +0100
Subject: [PATCH 15/23] Adding bits to the test

---
 .../augmentations/test_transform_pipeline.py  | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index bc40e2050..56ae24130 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -116,7 +116,6 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
                                                             expand_channels=expand_channels)
     fake_cxr_as_array = np.ones([256, 256]) * 255.
     fake_cxr_as_array[100:150, 100:200] = 1
-    fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
 
     all_transforms = [RandomAffine(degrees=180, translate=(0, 0), shear=40),
                       RandomResizedCrop(scale=(0.4, 1.0), size=256),
@@ -129,7 +128,12 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
                       AddGaussianNoise(std=0.05, p_apply=0.5)
                       ]
     if expand_channels:
+        # expand channels is used for single-channel input images
+        fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
         all_transforms.insert(0, ExpandChannels())
+    else:
+        fake_3d_array = np.stack([fake_cxr_as_array for i in range(3)])
+        fake_cxr_image = PIL.Image.fromarray(fake_3d_array).convert("RGB")
 
     np.random.seed(3)
     torch.manual_seed(3)
@@ -137,12 +141,10 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
 
     transformed_image = transformation_pipeline(fake_cxr_image)
     assert isinstance(transformed_image, torch.Tensor)
+
     # Expected pipeline
-    image = np.ones([256, 256]) * 255.
-    image[100:150, 100:200] = 1
-    image = PIL.Image.fromarray(image).convert("L")
     # In the pipeline the image is converted to tensor before applying the transformations. Do the same here.
-    image = ToTensor()(image).reshape([1, 1, 256, 256])
+    image = ToTensor()(fake_cxr_image).reshape([1, 1, 256, 256])
 
     np.random.seed(3)
     torch.manual_seed(3)
@@ -157,10 +159,14 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
     assert torch.isclose(expected_transformed, transformed_image).all()
 
     # Test the evaluation pipeline
-    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=False)
+    transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=False,
+                                                            expand_channels=expand_channels)
     transformed_image = transformation_pipeline(image)
     assert isinstance(transformed_image, torch.Tensor)
-    all_transforms = [ExpandChannels(), Resize(size=256), CenterCrop(size=224)]
+    all_transforms = [Resize(size=256), CenterCrop(size=224)]
+    if expand_channels:
+        all_transforms.insert(0, ExpandChannels())
+
     expected_transformed = image
     for t in all_transforms:
         expected_transformed = t(expected_transformed)

From d74eaf47b5b72c7f0268dd6241b6994116454f66 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 1 Sep 2021 09:53:03 +0100
Subject: [PATCH 16/23] committing to switch branch, test_transform pipeline
 still to be fixed

---
 .../ML/SSL/datamodules_and_datasets/transforms_utils.py   | 2 +-
 InnerEye/ML/SSL/lightning_containers/ssl_container.py     | 8 ++++----
 Tests/ML/augmentations/test_transform_pipeline.py         | 2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
index 70f8bac9f..89a197b90 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/transforms_utils.py
@@ -30,7 +30,7 @@ def get_ssl_transforms_from_config(config: CfgNode,
 
     :param config: configuration defining which augmentations to apply as well as their intensities.
     :param return_two_views_per_sample: if True the resulting transforms will return two versions of each sample they
-    are called on. If False, simply return one transformed version of the sample.
+    are called on. If False, simply return one transformed version of the sample centered and cropped.
     :param use_training_augmentations_for_validation: If True, use augmentation at validation time too.
     This is required for SSL validation loss to be meaningful. If False, only apply basic processing step
     (no augmentations)
diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 2682f14bc..2940da1d1 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -187,7 +187,7 @@ def _create_ssl_data_modules(self, is_ssl_encoder_module: bool) -> InnerEyeVisio
         """
         Returns torch lightning data module for encoder or linear head
 
-        :param is_ssl_encoder_module: whether to return the data module for SSL training or for linear heard. If true,
+        :param is_ssl_encoder_module: whether to return the data module for SSL training or for linear head. If true,
         :return transforms with two views per sample (batch like (img_v1, img_v2, label)). If False, return only one
         view per sample but also return the index of the sample in the dataset (to make sure we don't use twice the same
         batch in one training epoch (batch like (index, img_v1, label), as classifier dataloader expected to be shorter
@@ -225,8 +225,8 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
         examples.
         :param dataset_name: name of the dataset, value has to be in SSLDatasetName, determines which transformation
         pipeline to return.
-        :param is_ssl_encoder_module: if True the transformation pipeline will yield two version of the image it is
-        applied on. If False, return only one transformation.
+        :param is_ssl_encoder_module: if True the transformation pipeline will yield two versions of the image it is
+        applied on and it applies the same transformations for validation. If False, return only one transformation.
         :return: training transformation pipeline and validation transformation pipeline.
         """
         if dataset_name in [SSLDatasetName.RSNAKaggleCXR.value,
@@ -252,7 +252,7 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
                 expand_channels=False,
             )
             logging.warning(f"Dataset {dataset_name} unknown. The config will be consumed by "
-                            f"get_ssl_transforms() to create the augmentation pipeline, make sure"
+                            f"get_ssl_transforms() to create the augmentation pipeline, make sure "
                             f"the transformations in your configs are compatible. ")
         else:
             raise ValueError(f"Dataset {dataset_name} unknown and no config has been passed.")
diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index 56ae24130..fce8b5cd7 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -133,6 +133,7 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
         all_transforms.insert(0, ExpandChannels())
     else:
         fake_3d_array = np.stack([fake_cxr_as_array for i in range(3)])
+        # TODO this is raising an error - understands what shapes/values you need in here
         fake_cxr_image = PIL.Image.fromarray(fake_3d_array).convert("RGB")
 
     np.random.seed(3)
@@ -159,6 +160,7 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
     assert torch.isclose(expected_transformed, transformed_image).all()
 
     # Test the evaluation pipeline
+    # TODO why this is not parametrized?
     transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=False,
                                                             expand_channels=expand_channels)
     transformed_image = transformation_pipeline(image)

From 0cc78932d905c9a055236f15196dc2ee2110eb8c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 14 Sep 2021 12:09:48 +0000
Subject: [PATCH 17/23] fixing test

---
 .../augmentations/test_transform_pipeline.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index fce8b5cd7..639adbf11 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -127,26 +127,26 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
                       RandomErasing(scale=(0.15, 0.4), ratio=(0.33, 3)),
                       AddGaussianNoise(std=0.05, p_apply=0.5)
                       ]
+
     if expand_channels:
-        # expand channels is used for single-channel input images
-        fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
         all_transforms.insert(0, ExpandChannels())
+        # expand channels is used for single-channel input images
+        fake_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L")
+        # In the pipeline the image is converted to tensor before applying the transformations. Do the same here.
+        image = ToTensor()(fake_image).reshape([1, 1, 256, 256])
     else:
-        fake_3d_array = np.stack([fake_cxr_as_array for i in range(3)])
-        # TODO this is raising an error - understands what shapes/values you need in here
-        fake_cxr_image = PIL.Image.fromarray(fake_3d_array).convert("RGB")
-
+        fake_3d_array = np.dstack([fake_cxr_as_array, fake_cxr_as_array, fake_cxr_as_array])
+        fake_image = PIL.Image.fromarray(fake_3d_array.astype(np.uint8)).convert("RGB")
+        # In the pipeline the image is converted to tensor before applying the transformations. Do the same here.
+        image = ToTensor()(fake_image).reshape([1, 3, 256, 256])
+    
     np.random.seed(3)
     torch.manual_seed(3)
     random.seed(3)
-
-    transformed_image = transformation_pipeline(fake_cxr_image)
+    transformed_image = transformation_pipeline(fake_image)
     assert isinstance(transformed_image, torch.Tensor)
 
     # Expected pipeline
-    # In the pipeline the image is converted to tensor before applying the transformations. Do the same here.
-    image = ToTensor()(fake_cxr_image).reshape([1, 1, 256, 256])
-
     np.random.seed(3)
     torch.manual_seed(3)
     random.seed(3)

From c474713575c32dcd7ebda2f66ade3ce2a2b0251e Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 14 Sep 2021 12:11:32 +0000
Subject: [PATCH 18/23] remove TODO

---
 Tests/ML/augmentations/test_transform_pipeline.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index 639adbf11..c0ffdacf6 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -160,7 +160,6 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
     assert torch.isclose(expected_transformed, transformed_image).all()
 
     # Test the evaluation pipeline
-    # TODO why this is not parametrized?
     transformation_pipeline = create_transforms_from_config(cxr_augmentation_config, apply_augmentations=False,
                                                             expand_channels=expand_channels)
     transformed_image = transformation_pipeline(image)

From 9cda0745b462cfe72004261877509d7958dba48c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 14 Sep 2021 14:46:31 +0000
Subject: [PATCH 19/23] fixing flake8

---
 InnerEye/ML/configs/classification/CovidModel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/classification/CovidModel.py b/InnerEye/ML/configs/classification/CovidModel.py
index da5d07dba..88a102e2b 100644
--- a/InnerEye/ML/configs/classification/CovidModel.py
+++ b/InnerEye/ML/configs/classification/CovidModel.py
@@ -22,7 +22,7 @@
 from InnerEye.ML.SSL.encoders import SSLEncoder
 from InnerEye.ML.SSL.lightning_containers.ssl_container import EncoderName
 from InnerEye.ML.SSL.lightning_modules.ssl_classifier_module import SSLClassifier
-from InnerEye.ML.SSL.utils import create_ssl_encoder, create_ssl_image_classifier, load_yaml_augmentation_config
+from InnerEye.ML.SSL.utils import create_ssl_image_classifier, load_yaml_augmentation_config
 from InnerEye.ML.augmentations.transform_pipeline import create_transforms_from_config
 
 from InnerEye.ML.common import ModelExecutionMode

From 7fa0dbd1be49c0da1d9fb3a3bdcbf5a1d2501d6c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 14 Sep 2021 15:17:18 +0000
Subject: [PATCH 20/23] fixing flake8 for real

---
 Tests/ML/augmentations/test_transform_pipeline.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index c0ffdacf6..23d54a0ec 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -7,11 +7,12 @@
 import PIL
 import pytest
 import torch
-from torchvision.transforms import CenterCrop, ColorJitter, RandomAffine, RandomErasing, RandomHorizontalFlip, \
-    RandomResizedCrop, Resize, ToTensor
+from torchvision.transforms import (CenterCrop, ColorJitter, RandomAffine, RandomErasing, RandomHorizontalFlip,
+RandomResizedCrop, Resize, ToTensor)
 from torchvision.transforms.functional import to_tensor
 
-from InnerEye.ML.augmentations.image_transforms import AddGaussianNoise, ElasticTransform, ExpandChannels, RandomGamma
+from InnerEye.ML.augmentations.image_transforms import (AddGaussianNoise, ElasticTransform,
+ExpandChannels, RandomGamma)
 from InnerEye.ML.augmentations.transform_pipeline import ImageTransformationPipeline, \
     create_transforms_from_config
 
@@ -31,7 +32,6 @@
 test_4d_scan_as_tensor = torch.ones([5, 4, *image_size]) * 255.
 test_4d_scan_as_tensor[..., 10:15, 10:20] = 1
 
-
 @pytest.mark.parametrize("use_different_transformation_per_channel", [True, False])
 def test_torchvision_on_various_input(use_different_transformation_per_channel: bool) -> None:
     """
@@ -116,7 +116,6 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
                                                             expand_channels=expand_channels)
     fake_cxr_as_array = np.ones([256, 256]) * 255.
     fake_cxr_as_array[100:150, 100:200] = 1
-
     all_transforms = [RandomAffine(degrees=180, translate=(0, 0), shear=40),
                       RandomResizedCrop(scale=(0.4, 1.0), size=256),
                       RandomHorizontalFlip(p=0.5),
@@ -139,7 +138,7 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
         fake_image = PIL.Image.fromarray(fake_3d_array.astype(np.uint8)).convert("RGB")
         # In the pipeline the image is converted to tensor before applying the transformations. Do the same here.
         image = ToTensor()(fake_image).reshape([1, 3, 256, 256])
-    
+
     np.random.seed(3)
     torch.manual_seed(3)
     random.seed(3)
@@ -150,7 +149,6 @@ def test_create_transform_pipeline_from_config(expand_channels: bool) -> None:
     np.random.seed(3)
     torch.manual_seed(3)
     random.seed(3)
-
     expected_transformed = image
     for t in all_transforms:
         expected_transformed = t(expected_transformed)

From 1b978dd063cd8d2ee753112d406ede536d9d1dbf Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 14 Sep 2021 15:31:25 +0000
Subject: [PATCH 21/23] fixing more flake8

---
 Tests/ML/augmentations/test_transform_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Tests/ML/augmentations/test_transform_pipeline.py b/Tests/ML/augmentations/test_transform_pipeline.py
index 23d54a0ec..d6f2d4092 100644
--- a/Tests/ML/augmentations/test_transform_pipeline.py
+++ b/Tests/ML/augmentations/test_transform_pipeline.py
@@ -8,11 +8,11 @@
 import pytest
 import torch
 from torchvision.transforms import (CenterCrop, ColorJitter, RandomAffine, RandomErasing, RandomHorizontalFlip,
-RandomResizedCrop, Resize, ToTensor)
+                                    RandomResizedCrop, Resize, ToTensor)
 from torchvision.transforms.functional import to_tensor
 
 from InnerEye.ML.augmentations.image_transforms import (AddGaussianNoise, ElasticTransform,
-ExpandChannels, RandomGamma)
+                                                        ExpandChannels, RandomGamma)
 from InnerEye.ML.augmentations.transform_pipeline import ImageTransformationPipeline, \
     create_transforms_from_config
 

From 7af305cda4cdca6978f28412c535f933e9a22e0c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 15 Sep 2021 08:52:40 +0000
Subject: [PATCH 22/23] docstring changed

---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 08733a96e..a491db297 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -226,7 +226,9 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
         :param dataset_name: name of the dataset, value has to be in SSLDatasetName, determines which transformation
         pipeline to return.
         :param is_ssl_encoder_module: if True the transformation pipeline will yield two versions of the image it is
-        applied on and it applies the same transformations for validation. If False, return only one transformation.
+        applied on and it applies the same transformations for training and validation. Note that if your transformation 
+        does not contain any randomness, the pipeline will return two identical copies. If False, it will return only one 
+        transformation.
         :return: training transformation pipeline and validation transformation pipeline.
         """
         if dataset_name in [SSLDatasetName.RSNAKaggleCXR.value,

From 0c255a577b64ab91653a9ab0463f067310845dcd Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 15 Sep 2021 09:02:27 +0000
Subject: [PATCH 23/23] docstring changed, thanks Mel

---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index a491db297..d3f934042 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -226,7 +226,7 @@ def _get_transforms(self, augmentation_config: Optional[CfgNode],
         :param dataset_name: name of the dataset, value has to be in SSLDatasetName, determines which transformation
         pipeline to return.
         :param is_ssl_encoder_module: if True the transformation pipeline will yield two versions of the image it is
-        applied on and it applies the same transformations for training and validation. Note that if your transformation 
+        applied on and it applies the training transformations also at validation time. Note that if your transformation 
         does not contain any randomness, the pipeline will return two identical copies. If False, it will return only one 
         transformation.
         :return: training transformation pipeline and validation transformation pipeline.