diff --git a/CHANGELOG.md b/CHANGELOG.md
index aa0c76e0a..de0ae157a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,6 @@ created.
 ## Upcoming
 
 ### Added
-- ([#649](https://github.com/microsoft/InnerEye-DeepLearning/pull/649)) Fix for the _convert_to_tensor_if_necessary method so that PIL.Image as well as np.array get converted to torch.Tensor.
 - ([#643](https://github.com/microsoft/InnerEye-DeepLearning/pull/643)) Test for recovery of SSL job. Tracks learning rate and train
 loss.
 - ([#594](https://github.com/microsoft/InnerEye-DeepLearning/pull/594)) When supplying a "--tag" argument, the AzureML jobs use that value as the display name, to more easily distinguish run.
@@ -51,6 +50,7 @@ jobs that run in AzureML.
 - ([#650](https://github.com/microsoft/InnerEye-DeepLearning/pull/650)) Enable fine-tuning in DeepMIL using PANDA as the classification task.
 
 ### Changed
+- ([#659](https://github.com/microsoft/InnerEye-DeepLearning/pull/659)) Update cudatoolkit version from 11.1 to 11.3.
 - ([#588](https://github.com/microsoft/InnerEye-DeepLearning/pull/588)) Replace SciPy with PIL.PngImagePlugin.PngImageFile to load png files.
 - ([#585](https://github.com/microsoft/InnerEye-DeepLearning/pull/585)) Switching to PyTorch 1.10.0 and torchvision 0.11.1
 - ([#576](https://github.com/microsoft/InnerEye-DeepLearning/pull/576)) The console output is no longer written to stdout.txt because AzureML handles that better now
@@ -87,6 +87,8 @@ gets uploaded to AzureML, by skipping all test folders.
 - ([#632](https://github.com/microsoft/InnerEye-DeepLearning/pull/632)) Nifti test data is no longer stored in Git LFS
 
 ### Fixed
+- ([#659](https://github.com/microsoft/InnerEye-DeepLearning/pull/659)) Fix caching and checkpointing for TCGA CRCk dataset.
+- ([#649](https://github.com/microsoft/InnerEye-DeepLearning/pull/649)) Fix for the _convert_to_tensor_if_necessary method so that PIL.Image as well as np.array get converted to torch.Tensor.
 - ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
 - ([#646](https://github.com/microsoft/InnerEye-DeepLearning/pull/646)) Workaround for bug in PL: CombinedLoader cannot be used for training data when using DDP
 - ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
index 01384469b..78d0bd1f1 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
@@ -13,22 +13,25 @@
 - Schirris (2021). DeepSMILE: Self-supervised heterogeneity-aware multiple instance learning for DNA
 damage response defect classification directly from H&E whole-slide images. arXiv:2107.09405
 """
-from pathlib import Path
 from typing import Any, List
+from pathlib import Path
 import os
-
 from monai.transforms import Compose
 from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 from pytorch_lightning.callbacks import Callback
 
-from health_ml.networks.layers.attention_layers import GatedAttentionLayer
-from health_azure.utils import get_workspace
 from health_azure.utils import CheckpointDownloader
+from health_azure.utils import get_workspace
+from health_ml.networks.layers.attention_layers import AttentionLayer
 from InnerEye.Common import fixed_paths
-from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
+from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, CacheLocation
 from InnerEye.ML.Histopathology.datamodules.base_module import TilesDataModule
-from InnerEye.ML.Histopathology.datamodules.tcga_crck_module import (
-    TcgaCrckTilesDataModule,
+from InnerEye.ML.Histopathology.datamodules.tcga_crck_module import TcgaCrckTilesDataModule
+from InnerEye.ML.common import get_best_checkpoint_path
+
+from InnerEye.ML.Histopathology.models.transforms import (
+    EncodeTilesBatchd,
+    LoadTilesBatchd,
 )
 from InnerEye.ML.Histopathology.models.encoders import (
     HistoSSLEncoder,
@@ -36,13 +39,8 @@
     ImageNetSimCLREncoder,
     InnerEyeSSLEncoder,
 )
-from InnerEye.ML.Histopathology.models.transforms import (
-    EncodeTilesBatchd,
-    LoadTilesBatchd,
-)
-from InnerEye.ML.Histopathology.datasets.tcga_crck_tiles_dataset import (
-    TcgaCrck_TilesDataset,
-)
+from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
+from InnerEye.ML.Histopathology.datasets.tcga_crck_tiles_dataset import TcgaCrck_TilesDataset
 
 
 class DeepSMILECrck(BaseMIL):
@@ -50,13 +48,16 @@ def __init__(self, **kwargs: Any) -> None:
         # Define dictionary with default params that can be overriden from subclasses or CLI
         default_kwargs = dict(
             # declared in BaseMIL:
-            pooling_type=GatedAttentionLayer.__name__,
+            pooling_type=AttentionLayer.__name__,
+            encoding_chunk_size=60,
+            cache_mode=CacheMode.MEMORY,
+            precache_location=CacheLocation.CPU,
             # declared in DatasetParams:
             local_dataset=Path("/tmp/datasets/TCGA-CRCk"),
             azure_dataset_id="TCGA-CRCk",
             # To mount the dataset instead of downloading in AML, pass --use_dataset_mount in the CLI
             # declared in TrainerParams:
-            num_epochs=16,
+            num_epochs=50,
             # declared in WorkflowParams:
             number_of_cross_validation_splits=5,
             cross_validation_split_index=0,
@@ -120,7 +121,7 @@ def get_data_module(self) -> TilesDataModule:
             batch_size=self.batch_size,
             transform=transform,
             cache_mode=self.cache_mode,
-            save_precache=self.save_precache,
+            precache_location=self.precache_location,
             cache_dir=self.cache_dir,
             number_of_cross_validation_splits=self.number_of_cross_validation_splits,
             cross_validation_split_index=self.cross_validation_split_index,
@@ -135,11 +136,23 @@ def get_path_to_best_checkpoint(self) -> Path:
         was applied there.
         """
         # absolute path is required for registering the model.
-        return (
-            fixed_paths.repository_root_directory()
-            / self.checkpoint_folder_path
-            / self.best_checkpoint_filename_with_suffix
-        )
+        absolute_checkpoint_path = Path(fixed_paths.repository_root_directory(),
+                                        self.checkpoint_folder_path,
+                                        self.best_checkpoint_filename_with_suffix)
+        if absolute_checkpoint_path.is_file():
+            return absolute_checkpoint_path
+
+        absolute_checkpoint_path_parent = Path(fixed_paths.repository_parent_directory(),
+                                    self.checkpoint_folder_path,
+                                    self.best_checkpoint_filename_with_suffix)
+        if absolute_checkpoint_path_parent.is_file():
+            return absolute_checkpoint_path_parent
+
+        checkpoint_path = get_best_checkpoint_path(Path(self.checkpoint_folder_path))
+        if checkpoint_path.is_file():
+            return checkpoint_path
+
+        raise ValueError("Path to best checkpoint not found")
 
 
 class TcgaCrckImageNetMIL(DeepSMILECrck):
diff --git a/azure-pipelines/build-pr.yml b/azure-pipelines/build-pr.yml
index 86d225103..887871572 100644
--- a/azure-pipelines/build-pr.yml
+++ b/azure-pipelines/build-pr.yml
@@ -122,7 +122,7 @@ jobs:
       - name: tag
         value: 'TrainEnsemble'
       - name: more_switches
-        value: '--pl_deterministic --log_level=DEBUG --regression_test_folder=RegressionTestResults/PR_TrainEnsemble'
+        value: '--pl_deterministic --log_level=DEBUG --regression_test_folder=RegressionTestResults/PR_TrainEnsemble  --regression_test_csv_tolerance=1e-5'
     pool:
       vmImage: 'ubuntu-20.04'
     steps:
diff --git a/environment.yml b/environment.yml
index 374f17b3c..bd4456ed4 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - pytorch
   - conda-forge
 dependencies:
-  - cudatoolkit=11.1
+  - cudatoolkit=11.3
   - pip=20.1.1
   - python=3.7.3
   - pytorch=1.10.0
@@ -28,6 +28,7 @@ dependencies:
       - gputil==1.4.0
       - h5py==2.10.0
       - ipython==7.31.1
+      - imageio==2.15.0
       - InnerEye-DICOM-RT==1.0.1
       - joblib==0.16.0
       - jupyter==1.0.0