diff --git a/.github/workflows/check_changelog.yml b/.github/workflows/check_changelog.yml
index 7cced82dc..0ca7fb52e 100644
--- a/.github/workflows/check_changelog.yml
+++ b/.github/workflows/check_changelog.yml
@@ -6,7 +6,8 @@ name: Check Changelog
 on:
   pull_request:
 jobs:
-  check:
+  check_changelog:
+    name: Check Changelog
     runs-on: ubuntu-latest
     if: ${{ contains(github.event.pull_request.labels.*.name, 'no changelog needed') == 0 }}
     steps:
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 5a81e8493..592674cb2 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -10,8 +10,8 @@ on:
     - cron: '45 4 * * 1'
 
 jobs:
-  analyze:
-    name: Analyze
+  codeql_analyze:
+    name: CodeQL Analyze
     runs-on: ubuntu-latest
 
     strategy:
diff --git a/.github/workflows/issues_to_ado.yml b/.github/workflows/issues_to_ado.yml
index ec24f968f..e7a2728f0 100644
--- a/.github/workflows/issues_to_ado.yml
+++ b/.github/workflows/issues_to_ado.yml
@@ -6,7 +6,8 @@ on:
       [opened, edited, deleted, closed, reopened, labeled, unlabeled, assigned]
 
 jobs:
-  alert:
+  issues_to_ado:
+    name: Sync issues with Azure DevOps
     runs-on: ubuntu-latest
     steps:
       - uses: danhellem/github-actions-issue-to-work-item@master
diff --git a/.github/workflows/linting_and_hello_world.yml b/.github/workflows/linting_and_hello_world.yml
index 9ed6405bd..b1a5c3410 100644
--- a/.github/workflows/linting_and_hello_world.yml
+++ b/.github/workflows/linting_and_hello_world.yml
@@ -7,7 +7,8 @@ on:
   pull_request:
 
 jobs:
-  linux:
+  flake_mypy_helloworld_linux:
+    name: Flake8, MyPy, HelloWorld on Linux
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -53,7 +54,8 @@ jobs:
           PYTHONPATH: ${{ github.workspace }}
         if: always()
 
-  windows:
+  hello_world_windows:
+    name: HelloWorld on Windows
     runs-on: windows-latest
     steps:
       - uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b8a7b7a71..c18a62698 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -119,6 +119,7 @@ in inference-only runs when using lightning containers.
 - ([#628](https://github.com/microsoft/InnerEye-DeepLearning/pull/628)) SSL SimCLR using the wrong LR schedule when running on multiple nodes
 - ([#638](https://github.com/microsoft/InnerEye-DeepLearning/pull/638)) SimClr cosine LR scheduler was using wrong length information when using with long linear head datasets
 - ([#612](https://github.com/microsoft/InnerEye-DeepLearning/pull/612)) SSL online evaluator was not doing distributed training
+- ([#652](https://github.com/microsoft/InnerEye-DeepLearning/pull/652)) Run pytest build on Windows after Linux agent version upgrade
 
 ### Removed
 
diff --git a/InnerEye/ML/Histopathology/datasets/panda_dataset.py b/InnerEye/ML/Histopathology/datasets/panda_dataset.py
index b84571257..baf2379b9 100644
--- a/InnerEye/ML/Histopathology/datasets/panda_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/panda_dataset.py
@@ -2,12 +2,11 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
-
+import logging
 from pathlib import Path
 from typing import Any, Dict, Union, Optional
 
 import pandas as pd
-from cucim import CuImage
 from health_ml.utils import box_utils
 from monai.config import KeysCollection
 from monai.data.image_reader import ImageReader, WSIReader
@@ -15,6 +14,11 @@
 
 from InnerEye.ML.Histopathology.datasets.base_dataset import SlidesDataset
 
+try:
+    from cucim import CuImage
+except:
+    logging.warning("cucim library not available, code may fail.")
+
 
 class PandaDataset(SlidesDataset):
     """Dataset class for loading files from the PANDA challenge dataset.
@@ -48,6 +52,7 @@ def __init__(self,
 # MONAI's convention is that dictionary transforms have a 'd' suffix in the class name
 class ReadImaged(MapTransform):
     """Basic transform to read image files."""
+
     def __init__(self, reader: ImageReader, keys: KeysCollection,
                  allow_missing_keys: bool = False, **kwargs: Any) -> None:
         super().__init__(keys, allow_missing_keys=allow_missing_keys)
@@ -71,6 +76,7 @@ class LoadPandaROId(MapTransform):
     - `'level'` (int): chosen magnification level
     - `'scale'` (float): corresponding scale, loaded from the file
     """
+
     def __init__(self, reader: WSIReader, image_key: str = 'image', mask_key: str = 'mask',
                  level: int = 0, margin: int = 0, **kwargs: Any) -> None:
         """
@@ -88,7 +94,7 @@ def __init__(self, reader: WSIReader, image_key: str = 'image', mask_key: str =
         self.margin = margin
         self.kwargs = kwargs
 
-    def _get_bounding_box(self, mask_obj: CuImage) -> box_utils.Box:
+    def _get_bounding_box(self, mask_obj: 'CuImage') -> box_utils.Box:
         # Estimate bounding box at the lowest resolution (i.e. highest level)
         highest_level = mask_obj.resolutions['level_count'] - 1
         scale = mask_obj.resolutions['level_downsamples'][highest_level]
diff --git a/InnerEye/ML/Histopathology/preprocessing/loading.py b/InnerEye/ML/Histopathology/preprocessing/loading.py
index 77942e555..03de14165 100644
--- a/InnerEye/ML/Histopathology/preprocessing/loading.py
+++ b/InnerEye/ML/Histopathology/preprocessing/loading.py
@@ -1,14 +1,19 @@
+import logging
 from typing import Dict, Optional, Tuple
 
 import numpy as np
 import skimage.filters
-from cucim import CuImage
 from health_ml.utils import box_utils
 from monai.data.image_reader import WSIReader
 from monai.transforms import MapTransform
 
 from InnerEye.ML.Histopathology.utils.naming import SlideKey
 
+try:
+    from cucim import CuImage
+except:
+    logging.warning("cucim library not available, code may fail.")
+
 
 def get_luminance(slide: np.ndarray) -> np.ndarray:
     """Compute a grayscale version of the input slide.
@@ -35,7 +40,7 @@ def segment_foreground(slide: np.ndarray, threshold: Optional[float] = None) \
     return luminance < threshold, threshold
 
 
-def load_slide_at_level(reader: WSIReader, slide_obj: CuImage, level: int) -> np.ndarray:
+def load_slide_at_level(reader: WSIReader, slide_obj: 'CuImage', level: int) -> np.ndarray:
     """Load full slide array at the given magnification level.
 
     This is a manual workaround for a MONAI bug (https://github.com/Project-MONAI/MONAI/issues/3415)
@@ -60,6 +65,7 @@ class LoadROId(MapTransform):
     - `SlideKey.SCALE` (float): corresponding scale, loaded from the file
     - `SlideKey.FOREGROUND_THRESHOLD` (float): threshold used to segment the foreground
     """
+
     def __init__(self, reader: WSIReader, image_key: str = SlideKey.IMAGE, level: int = 0,
                  margin: int = 0, foreground_threshold: Optional[float] = None) -> None:
         """
@@ -77,7 +83,7 @@ def __init__(self, reader: WSIReader, image_key: str = SlideKey.IMAGE, level: in
         self.margin = margin
         self.foreground_threshold = foreground_threshold
 
-    def _get_bounding_box(self, slide_obj: CuImage) -> Tuple[box_utils.Box, float]:
+    def _get_bounding_box(self, slide_obj: 'CuImage') -> Tuple[box_utils.Box, float]:
         # Estimate bounding box at the lowest resolution (i.e. highest level)
         highest_level = slide_obj.resolutions['level_count'] - 1
         scale = slide_obj.resolutions['level_downsamples'][highest_level]
@@ -88,6 +94,7 @@ def _get_bounding_box(self, slide_obj: CuImage) -> Tuple[box_utils.Box, float]:
         return bbox, threshold
 
     def __call__(self, data: Dict) -> Dict:
+        from cucim import CuImage
         image_obj: CuImage = self.reader.read(data[self.image_key])
 
         level0_bbox, threshold = self._get_bounding_box(image_obj)
diff --git a/Tests/Azure/test_azure_config.py b/Tests/Azure/test_azure_config.py
index b889779f7..6a375eb30 100644
--- a/Tests/Azure/test_azure_config.py
+++ b/Tests/Azure/test_azure_config.py
@@ -9,6 +9,7 @@
 
 from InnerEye.Azure.azure_config import AzureConfig
 from InnerEye.Azure.azure_runner import create_dataset_configs
+from InnerEye.Common.common_util import is_linux
 from InnerEye.ML.deep_learning_config import DatasetParams
 from Tests.ML.util import get_default_azure_config
 
@@ -65,8 +66,10 @@ def test_dataset_consumption2() -> None:
     assert datasets[1].name == "2"
     assert datasets[0].local_folder == Path("l1")
     assert datasets[1].local_folder == Path("l2")
-    assert datasets[0].target_folder == PosixPath("mp1")
-    assert datasets[1].target_folder == PosixPath("mp2")
+    if is_linux():
+        # PosixPath cannot be instantiated on Windows
+        assert datasets[0].target_folder == PosixPath("mp1")
+        assert datasets[1].target_folder == PosixPath("mp2")
 
 
 def test_dataset_consumption3() -> None:
diff --git a/Tests/ML/histopathology/models/test_deepmil.py b/Tests/ML/histopathology/models/test_deepmil.py
index 797b3080c..96ccd3fc8 100644
--- a/Tests/ML/histopathology/models/test_deepmil.py
+++ b/Tests/ML/histopathology/models/test_deepmil.py
@@ -7,6 +7,7 @@
 from typing import Callable, Dict, List, Type  # noqa
 
 import pytest
+import torch
 from torch import Tensor, argmax, nn, rand, randint, randn, round, stack, allclose
 from torchvision.models import resnet18
 
@@ -29,7 +30,7 @@
 )
 from InnerEye.ML.Histopathology.models.deepmil import DeepMILModule
 from InnerEye.ML.Histopathology.models.encoders import ImageNetEncoder, TileEncoder
-from InnerEye.ML.Histopathology.utils.naming import ResultsKey
+from InnerEye.ML.Histopathology.utils.naming import MetricsKey, ResultsKey
 
 
 def get_supervised_imagenet_encoder() -> TileEncoder:
@@ -38,10 +39,10 @@ def get_supervised_imagenet_encoder() -> TileEncoder:
 
 @pytest.mark.parametrize("n_classes", [1, 3])
 @pytest.mark.parametrize("pooling_layer", [AttentionLayer, GatedAttentionLayer])
-@pytest.mark.parametrize("batch_size", [1, 15])
-@pytest.mark.parametrize("max_bag_size", [1, 7])
-@pytest.mark.parametrize("pool_hidden_dim", [1, 5])
-@pytest.mark.parametrize("pool_out_dim", [1, 6])
+@pytest.mark.parametrize("batch_size", [1, 2])
+@pytest.mark.parametrize("max_bag_size", [1, 3])
+@pytest.mark.parametrize("pool_hidden_dim", [1, 4])
+@pytest.mark.parametrize("pool_out_dim", [1, 5])
 def test_lightningmodule(
     n_classes: int,
     pooling_layer: Callable[[int, int, int], nn.Module],
@@ -108,9 +109,12 @@ def test_lightningmodule(
     assert preds.shape[0] == batch_size
 
     for metric_name, metric_object in module.train_metrics.items():
-        if (batch_size > 1) or (not metric_name == "auroc"):
+        if metric_name == MetricsKey.CONF_MATRIX or metric_name == MetricsKey.AUROC:
+            continue
+        if batch_size > 1:
             score = metric_object(preds.view(-1, 1), bag_labels.view(-1, 1))
-            assert score >= 0 and score <= 1
+            assert torch.all(score >= 0)
+            assert torch.all(score <= 1)
 
 
 def move_batch_to_expected_device(batch: Dict[str, List], use_gpu: bool) -> Dict:
diff --git a/Tests/ML/histopathology/preprocessing/test_slide_loading.py b/Tests/ML/histopathology/preprocessing/test_slide_loading.py
index 60d9717ed..a090eecd0 100644
--- a/Tests/ML/histopathology/preprocessing/test_slide_loading.py
+++ b/Tests/ML/histopathology/preprocessing/test_slide_loading.py
@@ -2,21 +2,24 @@
 
 import numpy as np
 import pytest
-from cucim import CuImage
 from monai.data.image_reader import WSIReader
 
+from InnerEye.Common.common_util import is_windows
 from InnerEye.Common.fixed_paths_for_tests import tests_root_directory
 from InnerEye.ML.Histopathology.preprocessing.tiling import tile_array_2d
-from InnerEye.ML.Histopathology.preprocessing.loading import LoadROId, get_luminance, load_slide_at_level, segment_foreground
+from InnerEye.ML.Histopathology.preprocessing.loading import (LoadROId, get_luminance, load_slide_at_level,
+                                                              segment_foreground)
 from InnerEye.ML.Histopathology.utils.naming import SlideKey
 from Tests.ML.histopathology.datasets.test_slides_dataset import MockSlidesDataset
 
 TEST_IMAGE_PATH = str(tests_root_directory("ML/histopathology/test_data/panda_wsi_example.tiff"))
 
 
+@pytest.mark.skipif(is_windows(), reason="cucim package is not available on Windows")
 def test_load_slide() -> None:
     level = 2
     reader = WSIReader('cuCIM')
+    from cucim import CuImage
     slide_obj: CuImage = reader.read(TEST_IMAGE_PATH)
     dims = slide_obj.resolutions['level_dimensions'][level][::-1]
 
@@ -39,9 +42,11 @@ def test_load_slide() -> None:
     assert np.array_equiv(larger_slide[:, :, dims[1]:], empty_fill_value)
 
 
+@pytest.mark.skipif(is_windows(), reason="cucim package is not available on Windows")
 def test_get_luminance() -> None:
     level = 2  # here we only need to test at a single resolution
     reader = WSIReader('cuCIM')
+    from cucim import CuImage
     slide_obj: CuImage = reader.read(TEST_IMAGE_PATH)
 
     slide = load_slide_at_level(reader, slide_obj, level)
@@ -61,9 +66,11 @@ def test_get_luminance() -> None:
     assert np.array_equal(slide_luminance_tiles.squeeze(1), tiles_luminance)
 
 
+@pytest.mark.skipif(is_windows(), reason="cucim package is not available on Windows")
 def test_segment_foreground() -> None:
     level = 2  # here we only need to test at a single resolution
     reader = WSIReader('cuCIM')
+    from cucim import CuImage
     slide_obj: CuImage = reader.read(TEST_IMAGE_PATH)
     slide = load_slide_at_level(reader, slide_obj, level)
 
@@ -95,11 +102,13 @@ def test_segment_foreground() -> None:
 
 @pytest.mark.parametrize('level', [1, 2])
 @pytest.mark.parametrize('foreground_threshold', [None, 215])
+@pytest.mark.skipif(is_windows(), reason="cucim package is not available on Windows")
 def test_get_bounding_box(level: int, foreground_threshold: Optional[float]) -> None:
     margin = 0
     reader = WSIReader('cuCIM')
     loader = LoadROId(reader, image_key=SlideKey.IMAGE, level=level, margin=margin,
                       foreground_threshold=foreground_threshold)
+    from cucim import CuImage
     slide_obj: CuImage = reader.read(TEST_IMAGE_PATH)
     level0_bbox, _ = loader._get_bounding_box(slide_obj)
 
@@ -130,6 +139,7 @@ def test_get_bounding_box(level: int, foreground_threshold: Optional[float]) ->
 @pytest.mark.parametrize('level', [1, 2])
 @pytest.mark.parametrize('margin', [0, 42])
 @pytest.mark.parametrize('foreground_threshold', [None, 215])
+@pytest.mark.skipif(is_windows(), reason="cucim package is not available on Windows")
 def test_load_roi(level: int, margin: int, foreground_threshold: Optional[float]) -> None:
     dataset = MockSlidesDataset()
     sample = dataset[0]
diff --git a/Tests/ML/histopathology/utils/test_metrics_utils.py b/Tests/ML/histopathology/utils/test_metrics_utils.py
index c226519d8..b1cc4c1ef 100644
--- a/Tests/ML/histopathology/utils/test_metrics_utils.py
+++ b/Tests/ML/histopathology/utils/test_metrics_utils.py
@@ -13,7 +13,9 @@
 from torch.functional import Tensor
 import pytest
 
-from InnerEye.ML.Histopathology.utils.metrics_utils import plot_scores_hist, select_k_tiles, plot_slide, plot_heatmap_overlay, plot_normalized_confusion_matrix
+from InnerEye.Common.common_util import is_windows
+from InnerEye.ML.Histopathology.utils.metrics_utils import plot_scores_hist, select_k_tiles, plot_slide, \
+    plot_heatmap_overlay, plot_normalized_confusion_matrix
 from InnerEye.ML.Histopathology.utils.naming import ResultsKey
 from InnerEye.ML.Histopathology.utils.heatmap_utils import location_selected_tiles
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path
@@ -44,22 +46,23 @@ def assert_equal_lists(pred: List, expected: List) -> None:
              ResultsKey.PROB: [Tensor([0.5]), Tensor([0.7]), Tensor([0.4]), Tensor([1.0])],
              ResultsKey.TRUE_LABEL: [0, 1, 1, 1],
              ResultsKey.BAG_ATTN:
-                  [Tensor([[0.1, 0.0, 0.2, 0.15]]),
+                 [Tensor([[0.1, 0.0, 0.2, 0.15]]),
                   Tensor([[0.10, 0.18, 0.15, 0.13]]),
                   Tensor([[0.25, 0.23, 0.20, 0.21]]),
                   Tensor([[0.33, 0.31, 0.37, 0.35]])],
              ResultsKey.TILE_X:
-                  [Tensor([200, 200, 424, 424]), 
+                 [Tensor([200, 200, 424, 424]),
+                  Tensor([200, 200, 424, 424]),
                   Tensor([200, 200, 424, 424]),
-                  Tensor([200, 200, 424, 424]), 
                   Tensor([200, 200, 424, 424])],
-             ResultsKey.TILE_Y: 
-                  [Tensor([200, 424, 200, 424]),
+             ResultsKey.TILE_Y:
+                 [Tensor([200, 424, 200, 424]),
+                  Tensor([200, 200, 424, 424]),
                   Tensor([200, 200, 424, 424]),
-                  Tensor([200, 200, 424, 424]), 
                   Tensor([200, 200, 424, 424])]
              }
 
+
 def test_select_k_tiles() -> None:
     top_tn = select_k_tiles(test_dict, n_slides=1, label=0, n_tiles=2, select=('lowest_pred', 'highest_att'))
     assert_equal_lists(top_tn, [(1, 0.5, [3, 4], [Tensor([0.2]), Tensor([0.15])])])
@@ -67,16 +70,24 @@ def test_select_k_tiles() -> None:
     nslides = 2
     ntiles = 2
     top_fn = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles, select=('lowest_pred', 'highest_att'))
-    bottom_fn = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles, select=('lowest_pred', 'lowest_att'))
-    assert_equal_lists(top_fn, [(3, 0.4, [1, 2], [Tensor([0.25]), Tensor([0.23])]), (2, 0.7, [2, 3], [Tensor([0.18]), Tensor([0.15])])])
-    assert_equal_lists(bottom_fn, [(3, 0.4, [3, 4], [Tensor([0.20]), Tensor([0.21])]), (2, 0.7, [1, 4], [Tensor([0.10]), Tensor([0.13])])])
-
-    top_tp = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles, select=('highest_pred', 'highest_att'))
-    bottom_tp = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles, select=('highest_pred', 'lowest_att'))
-    assert_equal_lists(top_tp, [(4, 1.0, [3, 4], [Tensor([0.37]), Tensor([0.35])]), (2, 0.7, [2, 3], [Tensor([0.18]), Tensor([0.15])])])
-    assert_equal_lists(bottom_tp, [(4, 1.0, [2, 1], [Tensor([0.31]), Tensor([0.33])]), (2, 0.7, [1, 4], [Tensor([0.10]), Tensor([0.13])])])
-
-
+    bottom_fn = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles,
+                               select=('lowest_pred', 'lowest_att'))
+    assert_equal_lists(top_fn, [(3, 0.4, [1, 2], [Tensor([0.25]), Tensor([0.23])]),
+                                (2, 0.7, [2, 3], [Tensor([0.18]), Tensor([0.15])])])
+    assert_equal_lists(bottom_fn, [(3, 0.4, [3, 4], [Tensor([0.20]), Tensor([0.21])]),
+                                   (2, 0.7, [1, 4], [Tensor([0.10]), Tensor([0.13])])])
+
+    top_tp = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles,
+                            select=('highest_pred', 'highest_att'))
+    bottom_tp = select_k_tiles(test_dict, n_slides=nslides, label=1, n_tiles=ntiles,
+                               select=('highest_pred', 'lowest_att'))
+    assert_equal_lists(top_tp, [(4, 1.0, [3, 4], [Tensor([0.37]), Tensor([0.35])]),
+                                (2, 0.7, [2, 3], [Tensor([0.18]), Tensor([0.15])])])
+    assert_equal_lists(bottom_tp, [(4, 1.0, [2, 1], [Tensor([0.31]), Tensor([0.33])]),
+                                   (2, 0.7, [1, 4], [Tensor([0.10]), Tensor([0.13])])])
+
+
+@pytest.mark.skipif(is_windows(), reason="Rendering is different on Windows")
 def test_plot_scores_hist(test_output_dirs: OutputFolderForTests) -> None:
     fig = plot_scores_hist(test_dict)
     assert isinstance(fig, matplotlib.figure.Figure)
@@ -104,16 +115,17 @@ def test_plot_slide(test_output_dirs: OutputFolderForTests, scale: int) -> None:
     assert_binary_files_match(file, expected)
 
 
+@pytest.mark.skipif(is_windows(), reason="Rendering is different on Windows")
 def test_plot_heatmap_overlay(test_output_dirs: OutputFolderForTests) -> None:
     set_random_seed(0)
     slide_image = np.random.rand(3, 1000, 2000)
     location_bbox = [100, 100]
-    slide = 1 
+    slide = 1
     tile_size = 224
     level = 0
-    fig = plot_heatmap_overlay(slide=slide,                                             # type: ignore
+    fig = plot_heatmap_overlay(slide=slide,  # type: ignore
                                slide_image=slide_image,
-                               results=test_dict,                                       # type: ignore
+                               results=test_dict,  # type: ignore
                                location_bbox=location_bbox,
                                tile_size=tile_size,
                                level=level)
@@ -128,15 +140,16 @@ def test_plot_heatmap_overlay(test_output_dirs: OutputFolderForTests) -> None:
 
 
 @pytest.mark.parametrize("n_classes", [1, 3])
+@pytest.mark.skipif(is_windows(), reason="Rendering is different on Windows")
 def test_plot_normalized_confusion_matrix(test_output_dirs: OutputFolderForTests, n_classes: int) -> None:
     set_random_seed(0)
     if n_classes > 1:
         cm = np.random.randint(1, 1000, size=(n_classes, n_classes))
         class_names = [str(i) for i in range(n_classes)]
     else:
-        cm = np.random.randint(1, 1000, size=(n_classes+1, n_classes+1))
-        class_names = [str(i) for i in range(n_classes+1)]
-    cm_n = cm/cm.sum(axis=1, keepdims=True)
+        cm = np.random.randint(1, 1000, size=(n_classes + 1, n_classes + 1))
+        class_names = [str(i) for i in range(n_classes + 1)]
+    cm_n = cm / cm.sum(axis=1, keepdims=True)
     assert (cm_n <= 1).all()
 
     fig = plot_normalized_confusion_matrix(cm=cm_n, class_names=class_names)
@@ -153,26 +166,27 @@ def test_plot_normalized_confusion_matrix(test_output_dirs: OutputFolderForTests
 @pytest.mark.parametrize("level", [0, 1, 2])
 def test_location_selected_tiles(level: int) -> None:
     set_random_seed(0)
-    slide = 1 
+    slide = 1
     location_bbox = [100, 100]
     slide_image = np.random.rand(3, 1000, 2000)
 
     coords = []
-    slide_ids = [item[0] for item in test_dict[ResultsKey.SLIDE_ID]]                                            # type: ignore
+    slide_ids = [item[0] for item in test_dict[ResultsKey.SLIDE_ID]]  # type: ignore
     slide_idx = slide_ids.index(slide)
-    for tile_idx in range(len(test_dict[ResultsKey.IMAGE_PATH][slide_idx])):                                    # type: ignore
-        tile_coords = np.transpose(np.array([test_dict[ResultsKey.TILE_X][slide_idx][tile_idx].cpu().numpy(),   # type: ignore
-                                    test_dict[ResultsKey.TILE_Y][slide_idx][tile_idx].cpu().numpy()]))          # type: ignore
+    for tile_idx in range(len(test_dict[ResultsKey.IMAGE_PATH][slide_idx])):  # type: ignore
+        tile_coords = np.transpose(
+            np.array([test_dict[ResultsKey.TILE_X][slide_idx][tile_idx].cpu().numpy(),  # type: ignore
+                      test_dict[ResultsKey.TILE_Y][slide_idx][tile_idx].cpu().numpy()]))  # type: ignore
         coords.append(tile_coords)
 
     coords = np.array(coords)
-    tile_coords_transformed = location_selected_tiles(tile_coords=coords, 
-                                                          location_bbox=location_bbox,
-                                                          level=level)
+    tile_coords_transformed = location_selected_tiles(tile_coords=coords,
+                                                      location_bbox=location_bbox,
+                                                      level=level)
     tile_xs, tile_ys = tile_coords_transformed.T
     level_dict = {0: 1, 1: 4, 2: 16}
     factor = level_dict[level]
-    assert min(tile_xs) >= 0 
-    assert max(tile_xs) <= slide_image.shape[2]//factor
-    assert min(tile_ys) >= 0 
-    assert max(tile_ys) <= slide_image.shape[1]//factor
+    assert min(tile_xs) >= 0
+    assert max(tile_xs) <= slide_image.shape[2] // factor
+    assert min(tile_ys) >= 0
+    assert max(tile_ys) <= slide_image.shape[1] // factor
diff --git a/Tests/SSL/test_ssl_containers.py b/Tests/SSL/test_ssl_containers.py
index 1a55a53e6..49569378b 100644
--- a/Tests/SSL/test_ssl_containers.py
+++ b/Tests/SSL/test_ssl_containers.py
@@ -634,7 +634,24 @@ def test_simclr_dataloader_type() -> None:
     """ This test checks if the transform pipeline of a SSL job can handle different
     data types coming from the dataloader.
     """
-    def check_types_in_dataloader(dataloader: CombinedLoader) -> None:
+    # TODO: Once the pytorch lightning bug is fixed the following test can be removed.
+    # The training and val loader will be both CombinedLoaders
+    def check_types_in_train_dataloader(dataloader: dict) -> None:
+        for i, batch in enumerate(dataloader[SSLDataModuleType.ENCODER]):
+            assert isinstance(batch[0][0], torch.Tensor)
+            assert isinstance(batch[0][1], torch.Tensor)
+            assert isinstance(batch[1], torch.Tensor)
+            if i == 1:
+                break
+
+        for i, batch in enumerate(dataloader[SSLDataModuleType.LINEAR_HEAD]):
+            assert isinstance(batch[0], torch.Tensor)
+            assert isinstance(batch[1], torch.Tensor)
+            assert isinstance(batch[2], torch.Tensor)
+            if i == 1:
+                break
+
+    def check_types_in_val_dataloader(dataloader: CombinedLoader) -> None:
         for i, batch in enumerate(dataloader):
             assert isinstance(batch[SSLDataModuleType.ENCODER][0][0], torch.Tensor)
             assert isinstance(batch[SSLDataModuleType.ENCODER][0][1], torch.Tensor)
@@ -646,8 +663,8 @@ def check_types_in_dataloader(dataloader: CombinedLoader) -> None:
                 break
 
     def check_types_in_train_and_val(data: CombinedDataModule) -> None:
-        check_types_in_dataloader(data.train_dataloader())
-        check_types_in_dataloader(data.val_dataloader())
+        check_types_in_train_dataloader(data.train_dataloader())
+        check_types_in_val_dataloader(data.val_dataloader())
 
     container = DummySimCLR()
     container.setup()
diff --git a/azure-pipelines/build-pr.yml b/azure-pipelines/build-pr.yml
index da6454f42..1294d6a10 100644
--- a/azure-pipelines/build-pr.yml
+++ b/azure-pipelines/build-pr.yml
@@ -23,15 +23,29 @@ jobs:
     steps:
       - template: cancel_aml_jobs.yml
 
-  - job: Windows
+  - job: CredScan_ComponentGov
     pool:
       vmImage: 'windows-2019'
     steps:
       - template: build_windows.yaml
 
-  - job: Linux
+  # Run jobs that only build the environment. These jobs have a high chance of succeeding and filling the build
+  # cache. Pytest, etc legs will only fill the cache if they succeed.
+  - job: CreateCondaEnvCache_Windows
     pool:
-      vmImage: 'ubuntu-20.04'
+      vmImage: 'windows-2019'
+    steps:
+      - template: inner_eye_env.yml
+
+  - job: CreateCondaEnvAndCache_Linux
+    pool:
+      vmImage: 'ubuntu-18.04'
+    steps:
+      - template: inner_eye_env.yml
+
+  - job: PyTest
+    pool:
+      vmImage: 'windows-2019'
     steps:
       - template: build.yaml
 
diff --git a/azure-pipelines/inner_eye_env.yml b/azure-pipelines/inner_eye_env.yml
index 49020f994..8b225bea3 100644
--- a/azure-pipelines/inner_eye_env.yml
+++ b/azure-pipelines/inner_eye_env.yml
@@ -5,23 +5,31 @@ steps:
 
   - template: prepare_conda.yml
 
+  - bash: echo "##vso[task.setvariable variable=conda_env_dir]/usr/share/miniconda/envs"
+    displayName: "Set the Conda environment folder (Linux)"
+    condition: eq(variables['Agent.OS'], 'Linux')
+
+  - bash: echo "##vso[task.setvariable variable=conda_env_dir]C:/Miniconda/envs"
+    displayName: "Set the Conda environment folder(Windows)"
+    condition: eq(variables['Agent.OS'], 'Windows_NT')
+
+  - bash: echo $(conda_env_dir)
+    displayName: 'Printing Conda environment folder'
+
   # https://docs.microsoft.com/en-us/azure/devops/pipelines/release/caching?view=azure-devops#pythonanaconda
   - task: Cache@2
     displayName: Use cached Conda environment
     inputs:
       # Beware of changing the cache key or path independently, safest to change in sync
-      key: 'usr_share_miniconda_envs | "$(Agent.OS)" | environment.yml'
-      path: /usr/share/miniconda/envs
+      key: 'conda_env | "$(Agent.OS)" | environment.yml'
       cacheHitVar: CONDA_CACHE_RESTORED
+      path: $(conda_env_dir)
 
-  - script: conda env create --file environment.yml
+  - bash: conda env create --file environment.yml
     displayName: Create Anaconda environment
     failOnStderr: false # Conda env create does not have an option to suppress warnings generated in wheel.py
     condition: eq(variables.CONDA_CACHE_RESTORED, 'false')
 
-  - script: source activate InnerEye
-    displayName: Check if InnerEye is present
-
   - bash: |
       source activate InnerEye
       which python
@@ -29,3 +37,7 @@ steps:
       pip freeze
     failOnStderr: false
     displayName: Print package list and Conda info
+    condition: succeededOrFailed()
+
+  - bash: source activate InnerEye
+    displayName: Check if InnerEye environment is present
diff --git a/azure-pipelines/prepare_conda.yml b/azure-pipelines/prepare_conda.yml
index 55b8e27c5..634c2aa10 100644
--- a/azure-pipelines/prepare_conda.yml
+++ b/azure-pipelines/prepare_conda.yml
@@ -1,6 +1,9 @@
 steps:
   - bash: |
-      subdir=bin
+      if [ $(Agent.OS) = 'Windows_NT' ]
+      then subdir=Scripts
+      else subdir=bin
+      fi
       echo "Adding this directory to PATH: $CONDA/$subdir"
       echo "##vso[task.prependpath]$CONDA/$subdir"
     displayName: Add conda to PATH
@@ -9,4 +12,4 @@ steps:
   - bash: |
       sudo chown -R $USER /usr/share/miniconda
     condition: and(succeeded(), eq( variables['Agent.OS'], 'Linux' ))
-    displayName: Take ownership of conda installation
+    displayName: Take ownership of conda installation (Linux only)