diff --git a/.github/unittest/linux_libs/scripts_ataridqn/environment.yml b/.github/unittest/linux_libs/scripts_ataridqn/environment.yml
new file mode 100644
index 00000000000..b88860dddde
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/environment.yml
@@ -0,0 +1,25 @@
+channels:
+  - pytorch
+  - defaults
+  - conda-forge
+dependencies:
+  - pip
+  - gsutil
+  - pip:
+    - hypothesis
+    - future
+    - cloudpickle
+    - pytest
+    - pytest-cov
+    - pytest-mock
+    - pytest-instafail
+    - pytest-rerunfailures
+    - pytest-error-for-skips
+    - expecttest
+    - pyyaml
+    - scipy
+    - hydra-core
+    - tqdm
+    - h5py
+    - datasets
+    - pillow
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/install.sh b/.github/unittest/linux_libs/scripts_ataridqn/install.sh
new file mode 100755
index 00000000000..1be476425a6
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/install.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+# For unittest, nightly PyTorch is used as the following section,
+# so no need to set PYTORCH_VERSION.
+# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+apt-get update && apt-get install -y git wget gcc g++
+#apt-get update && apt-get install -y git wget freeglut3 freeglut3-dev
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    version="cpu"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+fi
+
+
+# submodules
+git submodule sync && git submodule update --init --recursive
+
+printf "Installing PyTorch with %s\n" "${CU_VERSION}"
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    # conda install -y pytorch torchvision cpuonly -c pytorch-nightly
+    # use pip to install pytorch as conda can frequently pick older release
+#    conda install -y pytorch cpuonly -c pytorch-nightly
+    pip3 install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
+else
+    pip3 install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
+fi
+
+# install tensordict
+pip install git+https://github.com/pytorch/tensordict.git
+
+# smoke test
+python -c "import functorch;import tensordict"
+
+printf "* Installing torchrl\n"
+python setup.py develop
+
+# smoke test
+python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/post_process.sh b/.github/unittest/linux_libs/scripts_ataridqn/post_process.sh
new file mode 100755
index 00000000000..e97bf2a7b1b
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/post_process.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/run-clang-format.py b/.github/unittest/linux_libs/scripts_ataridqn/run-clang-format.py
new file mode 100755
index 00000000000..5783a885d86
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/run-clang-format.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python
+"""
+MIT License
+
+Copyright (c) 2017 Guillaume Papin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+A wrapper script around clang-format, suitable for linting multiple files
+and to use for continuous integration.
+
+This is an alternative API for the clang-format command line.
+It runs over multiple files and directories in parallel.
+A diff output is produced and a sensible exit code is returned.
+
+"""
+
+import argparse
+import difflib
+import fnmatch
+import multiprocessing
+import os
+import signal
+import subprocess
+import sys
+import traceback
+from functools import partial
+
+try:
+    from subprocess import DEVNULL  # py3k
+except ImportError:
+    DEVNULL = open(os.devnull, "wb")
+
+
+DEFAULT_EXTENSIONS = "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu"
+
+
+class ExitStatus:
+    SUCCESS = 0
+    DIFF = 1
+    TROUBLE = 2
+
+
+def list_files(files, recursive=False, extensions=None, exclude=None):
+    if extensions is None:
+        extensions = []
+    if exclude is None:
+        exclude = []
+
+    out = []
+    for file in files:
+        if recursive and os.path.isdir(file):
+            for dirpath, dnames, fnames in os.walk(file):
+                fpaths = [os.path.join(dirpath, fname) for fname in fnames]
+                for pattern in exclude:
+                    # os.walk() supports trimming down the dnames list
+                    # by modifying it in-place,
+                    # to avoid unnecessary directory listings.
+                    dnames[:] = [
+                        x
+                        for x in dnames
+                        if not fnmatch.fnmatch(os.path.join(dirpath, x), pattern)
+                    ]
+                    fpaths = [x for x in fpaths if not fnmatch.fnmatch(x, pattern)]
+                for f in fpaths:
+                    ext = os.path.splitext(f)[1][1:]
+                    if ext in extensions:
+                        out.append(f)
+        else:
+            out.append(file)
+    return out
+
+
+def make_diff(file, original, reformatted):
+    return list(
+        difflib.unified_diff(
+            original,
+            reformatted,
+            fromfile=f"{file}\t(original)",
+            tofile=f"{file}\t(reformatted)",
+            n=3,
+        )
+    )
+
+
+class DiffError(Exception):
+    def __init__(self, message, errs=None):
+        super().__init__(message)
+        self.errs = errs or []
+
+
+class UnexpectedError(Exception):
+    def __init__(self, message, exc=None):
+        super().__init__(message)
+        self.formatted_traceback = traceback.format_exc()
+        self.exc = exc
+
+
+def run_clang_format_diff_wrapper(args, file):
+    try:
+        ret = run_clang_format_diff(args, file)
+        return ret
+    except DiffError:
+        raise
+    except Exception as e:
+        raise UnexpectedError(f"{file}: {e.__class__.__name__}: {e}", e)
+
+
+def run_clang_format_diff(args, file):
+    try:
+        with open(file, encoding="utf-8") as f:
+            original = f.readlines()
+    except OSError as exc:
+        raise DiffError(str(exc))
+    invocation = [args.clang_format_executable, file]
+
+    # Use of utf-8 to decode the process output.
+    #
+    # Hopefully, this is the correct thing to do.
+    #
+    # It's done due to the following assumptions (which may be incorrect):
+    # - clang-format will returns the bytes read from the files as-is,
+    #   without conversion, and it is already assumed that the files use utf-8.
+    # - if the diagnostics were internationalized, they would use utf-8:
+    #   > Adding Translations to Clang
+    #   >
+    #   > Not possible yet!
+    #   > Diagnostic strings should be written in UTF-8,
+    #   > the client can translate to the relevant code page if needed.
+    #   > Each translation completely replaces the format string
+    #   > for the diagnostic.
+    #   > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation
+
+    try:
+        proc = subprocess.Popen(
+            invocation,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+            encoding="utf-8",
+        )
+    except OSError as exc:
+        raise DiffError(
+            f"Command '{subprocess.list2cmdline(invocation)}' failed to start: {exc}"
+        )
+    proc_stdout = proc.stdout
+    proc_stderr = proc.stderr
+
+    # hopefully the stderr pipe won't get full and block the process
+    outs = list(proc_stdout.readlines())
+    errs = list(proc_stderr.readlines())
+    proc.wait()
+    if proc.returncode:
+        raise DiffError(
+            "Command '{}' returned non-zero exit status {}".format(
+                subprocess.list2cmdline(invocation), proc.returncode
+            ),
+            errs,
+        )
+    return make_diff(file, original, outs), errs
+
+
+def bold_red(s):
+    return "\x1b[1m\x1b[31m" + s + "\x1b[0m"
+
+
+def colorize(diff_lines):
+    def bold(s):
+        return "\x1b[1m" + s + "\x1b[0m"
+
+    def cyan(s):
+        return "\x1b[36m" + s + "\x1b[0m"
+
+    def green(s):
+        return "\x1b[32m" + s + "\x1b[0m"
+
+    def red(s):
+        return "\x1b[31m" + s + "\x1b[0m"
+
+    for line in diff_lines:
+        if line[:4] in ["--- ", "+++ "]:
+            yield bold(line)
+        elif line.startswith("@@ "):
+            yield cyan(line)
+        elif line.startswith("+"):
+            yield green(line)
+        elif line.startswith("-"):
+            yield red(line)
+        else:
+            yield line
+
+
+def print_diff(diff_lines, use_color):
+    if use_color:
+        diff_lines = colorize(diff_lines)
+    sys.stdout.writelines(diff_lines)
+
+
+def print_trouble(prog, message, use_colors):
+    error_text = "error:"
+    if use_colors:
+        error_text = bold_red(error_text)
+    print(f"{prog}: {error_text} {message}", file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--clang-format-executable",
+        metavar="EXECUTABLE",
+        help="path to the clang-format executable",
+        default="clang-format",
+    )
+    parser.add_argument(
+        "--extensions",
+        help=f"comma separated list of file extensions (default: {DEFAULT_EXTENSIONS})",
+        default=DEFAULT_EXTENSIONS,
+    )
+    parser.add_argument(
+        "-r",
+        "--recursive",
+        action="store_true",
+        help="run recursively over directories",
+    )
+    parser.add_argument("files", metavar="file", nargs="+")
+    parser.add_argument("-q", "--quiet", action="store_true")
+    parser.add_argument(
+        "-j",
+        metavar="N",
+        type=int,
+        default=0,
+        help="run N clang-format jobs in parallel (default number of cpus + 1)",
+    )
+    parser.add_argument(
+        "--color",
+        default="auto",
+        choices=["auto", "always", "never"],
+        help="show colored diff (default: auto)",
+    )
+    parser.add_argument(
+        "-e",
+        "--exclude",
+        metavar="PATTERN",
+        action="append",
+        default=[],
+        help="exclude paths matching the given glob-like pattern(s) from recursive search",
+    )
+
+    args = parser.parse_args()
+
+    # use default signal handling, like diff return SIGINT value on ^C
+    # https://bugs.python.org/issue14229#msg156446
+    signal.signal(signal.SIGINT, signal.SIG_DFL)
+    try:
+        signal.SIGPIPE
+    except AttributeError:
+        # compatibility, SIGPIPE does not exist on Windows
+        pass
+    else:
+        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+    colored_stdout = False
+    colored_stderr = False
+    if args.color == "always":
+        colored_stdout = True
+        colored_stderr = True
+    elif args.color == "auto":
+        colored_stdout = sys.stdout.isatty()
+        colored_stderr = sys.stderr.isatty()
+
+    version_invocation = [args.clang_format_executable, "--version"]
+    try:
+        subprocess.check_call(version_invocation, stdout=DEVNULL)
+    except subprocess.CalledProcessError as e:
+        print_trouble(parser.prog, str(e), use_colors=colored_stderr)
+        return ExitStatus.TROUBLE
+    except OSError as e:
+        print_trouble(
+            parser.prog,
+            f"Command '{subprocess.list2cmdline(version_invocation)}' failed to start: {e}",
+            use_colors=colored_stderr,
+        )
+        return ExitStatus.TROUBLE
+
+    retcode = ExitStatus.SUCCESS
+    files = list_files(
+        args.files,
+        recursive=args.recursive,
+        exclude=args.exclude,
+        extensions=args.extensions.split(","),
+    )
+
+    if not files:
+        return
+
+    njobs = args.j
+    if njobs == 0:
+        njobs = multiprocessing.cpu_count() + 1
+    njobs = min(len(files), njobs)
+
+    if njobs == 1:
+        # execute directly instead of in a pool,
+        # less overhead, simpler stacktraces
+        it = (run_clang_format_diff_wrapper(args, file) for file in files)
+        pool = None
+    else:
+        pool = multiprocessing.Pool(njobs)
+        it = pool.imap_unordered(partial(run_clang_format_diff_wrapper, args), files)
+    while True:
+        try:
+            outs, errs = next(it)
+        except StopIteration:
+            break
+        except DiffError as e:
+            print_trouble(parser.prog, str(e), use_colors=colored_stderr)
+            retcode = ExitStatus.TROUBLE
+            sys.stderr.writelines(e.errs)
+        except UnexpectedError as e:
+            print_trouble(parser.prog, str(e), use_colors=colored_stderr)
+            sys.stderr.write(e.formatted_traceback)
+            retcode = ExitStatus.TROUBLE
+            # stop at the first unexpected error,
+            # something could be very wrong,
+            # don't process all files unnecessarily
+            if pool:
+                pool.terminate()
+            break
+        else:
+            sys.stderr.writelines(errs)
+            if outs == []:
+                continue
+            if not args.quiet:
+                print_diff(outs, use_color=colored_stdout)
+            if retcode == ExitStatus.SUCCESS:
+                retcode = ExitStatus.DIFF
+    return retcode
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/run_test.sh b/.github/unittest/linux_libs/scripts_ataridqn/run_test.sh
new file mode 100755
index 00000000000..ee7bf9b46b1
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/run_test.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+apt-get update && apt-get remove swig -y && apt-get install -y git gcc patchelf libosmesa6-dev libgl1-mesa-glx libglfw3 swig3.0
+ln -s /usr/bin/swig3.0 /usr/bin/swig
+
+export PYTORCH_TEST_WITH_SLOW='1'
+python -m torch.utils.collect_env
+# Avoid error: "fatal: unsafe repository"
+git config --global --add safe.directory '*'
+
+root_dir="$(git rev-parse --show-toplevel)"
+env_dir="${root_dir}/env"
+lib_dir="${env_dir}/lib"
+
+conda deactivate && conda activate ./env
+
+python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestAtariDQN --error-for-skips --runslow
+coverage combine
+coverage xml -i
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/setup_env.sh b/.github/unittest/linux_libs/scripts_ataridqn/setup_env.sh
new file mode 100755
index 00000000000..5b415112814
--- /dev/null
+++ b/.github/unittest/linux_libs/scripts_ataridqn/setup_env.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# This script is for setting up environment in which unit test is ran.
+# To speed up the CI time, the resulting environment is cached.
+#
+# Do not install PyTorch and torchvision here, otherwise they also get cached.
+
+set -e
+set -v
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+# Avoid error: "fatal: unsafe repository"
+apt-get update && apt-get install -y git wget gcc g++ unzip curl
+
+git config --global --add safe.directory '*'
+root_dir="$(git rev-parse --show-toplevel)"
+conda_dir="${root_dir}/conda"
+env_dir="${root_dir}/env"
+
+cd "${root_dir}"
+
+case "$(uname -s)" in
+    Darwin*) os=MacOSX;;
+    *) os=Linux
+esac
+
+# 1. Install conda at ./conda
+if [ ! -d "${conda_dir}" ]; then
+    printf "* Installing conda\n"
+    wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
+    bash ./miniconda.sh -b -f -p "${conda_dir}"
+fi
+eval "$(${conda_dir}/bin/conda shell.bash hook)"
+
+# 2. Create test environment at ./env
+printf "python: ${PYTHON_VERSION}\n"
+if [ ! -d "${env_dir}" ]; then
+    printf "* Creating a test environment\n"
+    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
+fi
+conda activate "${env_dir}"
+
+# 3. Install Conda dependencies
+printf "* Installing dependencies (except PyTorch)\n"
+echo "  - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
+cat "${this_dir}/environment.yml"
+
+pip3 install pip --upgrade
+
+conda env update --file "${this_dir}/environment.yml" --prune
diff --git a/.github/workflows/test-linux-libs.yml b/.github/workflows/test-linux-libs.yml
index 3b090582e4f..abf78e5e19c 100644
--- a/.github/workflows/test-linux-libs.yml
+++ b/.github/workflows/test-linux-libs.yml
@@ -16,6 +16,32 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+
+  unittests-atari-dqn:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        cuda_arch_version: ["12.1"]
+    if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Data') }}
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      repository: pytorch/rl
+      runner: "linux.g5.4xlarge.nvidia.gpu"
+      docker-image: "nvidia/cudagl:11.4.0-base"
+      timeout: 120
+      script: |
+        set -euo pipefail
+        export PYTHON_VERSION="3.9"
+        export CU_VERSION="cu117"
+        export TAR_OPTIONS="--no-same-owner"
+        export UPLOAD_CHANNEL="nightly"
+        export TF_CPP_MIN_LOG_LEVEL=0
+
+        bash .github/unittest/linux_libs/scripts_ataridqn/setup_env.sh
+        bash .github/unittest/linux_libs/scripts_ataridqn/install.sh
+        bash .github/unittest/linux_libs/scripts_ataridqn/run_test.sh
+        bash .github/unittest/linux_libs/scripts_ataridqn/post_process.sh
+
   unittests-brax:
     strategy:
       matrix:
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
index 90dbe4f3d4e..2def1b4bfa8 100644
--- a/docs/source/reference/data.rst
+++ b/docs/source/reference/data.rst
@@ -280,7 +280,7 @@ Here's an example:
     :toctree: generated/
     :template: rl_template.rst
 
-
+    AtariDQNExperienceReplay
     D4RLExperienceReplay
     GenDGRLExperienceReplay
     MinariExperienceReplay
diff --git a/test/test_libs.py b/test/test_libs.py
index e034cea84c7..13891331b05 100644
--- a/test/test_libs.py
+++ b/test/test_libs.py
@@ -61,9 +61,11 @@
     MultiDiscreteTensorSpec,
     MultiOneHotDiscreteTensorSpec,
     OneHotDiscreteTensorSpec,
+    ReplayBufferEnsemble,
     UnboundedContinuousTensorSpec,
     UnboundedDiscreteTensorSpec,
 )
+from torchrl.data.datasets.atari_dqn import AtariDQNExperienceReplay
 from torchrl.data.datasets.d4rl import D4RLExperienceReplay
 from torchrl.data.datasets.minari_data import MinariExperienceReplay
 from torchrl.data.datasets.openml import OpenMLExperienceReplay
@@ -2489,6 +2491,59 @@ def test_load(self, image_size):
                     break
 
 
+@pytest.mark.slow
+class TestAtariDQN:
+    @pytest.fixture(scope="class")
+    def limit_max_runs(self):
+        prev_val = AtariDQNExperienceReplay._max_runs
+        AtariDQNExperienceReplay._max_runs = 3
+        yield
+        AtariDQNExperienceReplay._max_runs = prev_val
+
+    @pytest.mark.parametrize("dataset_id", ["Asterix/1", "Pong/4"])
+    @pytest.mark.parametrize(
+        "num_slices,slice_len", [[None, None], [None, 8], [2, None]]
+    )
+    def test_single_dataset(self, dataset_id, slice_len, num_slices, limit_max_runs):
+        dataset = AtariDQNExperienceReplay(
+            dataset_id, slice_len=slice_len, num_slices=num_slices
+        )
+        sample = dataset.sample(64)
+        for key in (
+            ("next", "observation"),
+            ("next", "truncated"),
+            ("next", "terminated"),
+            ("next", "done"),
+            ("next", "reward"),
+            "observation",
+            "action",
+            "done",
+            "truncated",
+            "terminated",
+        ):
+            assert key in sample.keys(True)
+        assert sample.shape == (64,)
+        assert sample.get_non_tensor("metadata")["dataset_id"] == dataset_id
+
+    @pytest.mark.parametrize(
+        "num_slices,slice_len", [[None, None], [None, 8], [2, None]]
+    )
+    def test_double_dataset(self, slice_len, num_slices, limit_max_runs):
+        dataset_pong = AtariDQNExperienceReplay(
+            "Pong/4", slice_len=slice_len, num_slices=num_slices
+        )
+        dataset_asterix = AtariDQNExperienceReplay(
+            "Asterix/1", slice_len=slice_len, num_slices=num_slices
+        )
+        dataset = ReplayBufferEnsemble(
+            dataset_pong, dataset_asterix, sample_from_all=True, batch_size=128
+        )
+        sample = dataset.sample()
+        assert sample.shape == (2, 64)
+        assert sample[0].get_non_tensor("metadata")["dataset_id"] == "Pong/4"
+        assert sample[1].get_non_tensor("metadata")["dataset_id"] == "Asterix/1"
+
+
 @pytest.mark.slow
 class TestOpenX:
     @pytest.mark.parametrize(
diff --git a/test/test_rb.py b/test/test_rb.py
index cf9deabb956..5d184c365e2 100644
--- a/test/test_rb.py
+++ b/test/test_rb.py
@@ -1808,7 +1808,8 @@ def test_slice_sampler_errors(self):
         storage.set(range(100), data)
         sampler = SliceSampler(num_slices=num_slices)
         with pytest.raises(
-            RuntimeError, match="can only sample from TensorStorage subclasses"
+            RuntimeError,
+            match="Could not get a tensordict out of the storage, which is required for SliceSampler to compute the trajectories.",
         ):
             index, _ = sampler.sample(storage, batch_size=batch_size)
 
diff --git a/torchrl/data/datasets/__init__.py b/torchrl/data/datasets/__init__.py
index 4822ac35c54..092b80083a1 100644
--- a/torchrl/data/datasets/__init__.py
+++ b/torchrl/data/datasets/__init__.py
@@ -1,3 +1,4 @@
+from .atari_dqn import AtariDQNExperienceReplay
 from .d4rl import D4RLExperienceReplay
 from .gen_dgrl import GenDGRLExperienceReplay
 from .minari_data import MinariExperienceReplay
diff --git a/torchrl/data/datasets/atari_dqn.py b/torchrl/data/datasets/atari_dqn.py
new file mode 100644
index 00000000000..93950532026
--- /dev/null
+++ b/torchrl/data/datasets/atari_dqn.py
@@ -0,0 +1,760 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import functools
+import gzip
+import io
+import json
+import logging
+
+import os
+import shutil
+import subprocess
+import tempfile
+from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
+import torch
+from tensordict import MemoryMappedTensor, TensorDict
+from torch import multiprocessing as mp
+
+from torchrl.data.replay_buffers.replay_buffers import TensorDictReplayBuffer
+from torchrl.data.replay_buffers.samplers import (
+    SamplerWithoutReplacement,
+    SliceSampler,
+    SliceSamplerWithoutReplacement,
+)
+from torchrl.data.replay_buffers.storages import Storage
+from torchrl.data.replay_buffers.writers import ImmutableDatasetWriter
+from torchrl.envs.utils import _classproperty
+
+
+class AtariDQNExperienceReplay(TensorDictReplayBuffer):
+    """Atari DQN Experience replay class.
+
+    The Atari DQN dataset (https://offline-rl.github.io/) is a collection of 5 training
+    iterations of DQN over each of the Arari 2600 games for a total of 200 million frames.
+    The sub-sampling rate (frame-skip) is equal to 4, meaning that each game dataset
+    has 50 million steps in total.
+
+    The data format follows the TED convention. Since the dataset is quite heavy,
+    the data formatting is done on-line, at sampling time.
+
+    To make training more modular, we split the dataset in each of the Atari games
+    and separate each training round. Consequently, each dataset is presented as
+    a Storage of length 50x10^6 elements. Under the hood, this dataset is split
+    in 50 memory-mapped tensordicts of length 1 million each.
+
+    Args:
+        dataset_id (str): The dataset to be downloaded.
+            Must be part of ``AtariDQNExperienceReplay.available_datasets``.
+        batch_size (int): Batch-size used during sampling.
+            Can be overridden by `data.sample(batch_size)` if necessary.
+
+    Keyword Args:
+        root (Path or str, optional): The AtariDQN dataset root directory.
+            The actual dataset memory-mapped files will be saved under
+            `<root>/<dataset_id>`. If none is provided, it defaults to
+            ``~/.cache/torchrl/atari`.
+        num_procs (int, optional): number of processes to launch for preprocessing.
+            Has no effect whenever the data is already downloaded. Defaults to 0
+            (no multiprocessing used).
+        download (bool or str, optional): Whether the dataset should be downloaded if
+            not found. Defaults to ``True``. Download can also be passed as "force",
+            in which case the downloaded data will be overwritten.
+        sampler (Sampler, optional): the sampler to be used. If none is provided
+            a default RandomSampler() will be used.
+        writer (Writer, optional): the writer to be used. If none is provided
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
+        collate_fn (callable, optional): merges a list of samples to form a
+            mini-batch of Tensor(s)/outputs. Used when using batched
+            loading from a map-style dataset.
+        pin_memory (bool): whether pin_memory() should be called on the rb
+            samples.
+        prefetch (int, optional): number of next batches to be prefetched
+            using multithreading.
+        transform (Transform, optional): Transform to be executed when sample() is called.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
+        num_slices (int, optional): the number of slices to be sampled. The batch-size
+            must be greater or equal to the ``num_slices`` argument. Exclusive
+            with ``slice_len``. Defaults to ``None`` (no slice sampling).
+            The ``sampler`` arg will override this value.
+        slice_len (int, optional): the length of the slices to be sampled. The batch-size
+            must be greater or equal to the ``slice_len`` argument and divisible
+            by it. Exclusive with ``num_slices``. Defaults to ``None`` (no slice sampling).
+            The ``sampler`` arg will override this value.
+        strict_length (bool, optional): if ``False``, trajectories of length
+            shorter than `slice_len` (or `batch_size // num_slices`) will be
+            allowed to appear in the batch.
+            Be mindful that this can result in effective `batch_size`  shorter
+            than the one asked for! Trajectories can be split using
+            :func:`torchrl.collectors.split_trajectories`. Defaults to ``True``.
+            The ``sampler`` arg will override this value.
+        replacement (bool, optional): if ``False``, sampling will occur without replacement.
+            The ``sampler`` arg will override this value.
+
+    Attributes:
+        available_datasets: list of available datasets, formatted as `<game_name>/<run>`. Example:
+            `"Pong/5"`, `"Krull/2"`, ...
+        dataset_id (str): the name of the dataset.
+        episodes (torch.Tensor): a 1d tensor indicating to what run each of the
+            1M frames belongs. To be used with :class:`~torchrl.data.replay_buffers.SliceSampler`
+            to cheaply sample slices of episodes.
+
+    Examples:
+        >>> from torchrl.data.datasets import AtariDQNExperienceReplay
+        >>> dataset = AtariDQNExperienceReplay("Pong/5", batch_size=128)
+        >>> for data in dataset:
+        ...     print(data)
+        ...     break
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                index: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.int64, is_shared=False),
+                metadata: NonTensorData(
+                    data={'invalid_range': MemoryMappedTensor([999998, 999999,      0,      1,      2]), 'add_count': MemoryMappedTensor(999999), 'dataset_id': 'Pong/5'}},
+                    batch_size=torch.Size([128]),
+                    device=None,
+                    is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        observation: Tensor(shape=torch.Size([128, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False)},
+                    batch_size=torch.Size([128]),
+                    device=None,
+                    is_shared=False),
+                observation: Tensor(shape=torch.Size([128, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                truncated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False)},
+            batch_size=torch.Size([128]),
+            device=None,
+            is_shared=False)
+
+    .. warning::
+      Atari-DQN does not provide the next observation after a termination signal.
+      In other words, there is no way to obtain the ``("next", "observation")`` state
+      when ``("next", "done")`` is ``True``. This value is filled with 0s but should
+      not be used in practice. If TorchRL's value estimators (:class:`~torchrl.objectives.values.ValueEstimator`)
+      are used, this should not be an issue.
+
+    .. note::
+      Because the construction of the sampler for episode sampling is slightly
+      convoluted, we made it easy for users to pass the arguments of the
+      :class:`~torchrl.data.replay_buffers.SliceSampler` directly to the
+      ``AtariDQNExperienceReplay`` dataset: any of the ``num_slices`` or
+      ``slice_len`` arguments will make the sampler an instance of
+      :class:`~torchrl.data.replay_buffers.SliceSampler`. The ``strict_length``
+      can also be passed.
+
+        >>> from torchrl.data.datasets import AtariDQNExperienceReplay
+        >>> from torchrl.data.replay_buffers import SliceSampler
+        >>> dataset = AtariDQNExperienceReplay("Pong/5", batch_size=128, slice_len=64)
+        >>> for data in dataset:
+        ...     print(data)
+        ...     print(data.get("index"))  # indices are in 4 groups of consecutive values
+        ...     break
+        TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                index: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.int64, is_shared=False),
+                metadata: NonTensorData(
+                    data={'invalid_range': MemoryMappedTensor([999998, 999999,      0,      1,      2]), 'add_count': MemoryMappedTensor(999999), 'dataset_id': 'Pong/5'}},
+                    batch_size=torch.Size([128]),
+                    device=None,
+                    is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([128, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([128, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([128, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([128, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([128]),
+                    device=None,
+                    is_shared=False),
+                observation: Tensor(shape=torch.Size([128, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False),
+                truncated: Tensor(shape=torch.Size([128]), device=cpu, dtype=torch.uint8, is_shared=False)},
+            batch_size=torch.Size([128]),
+            device=None,
+            is_shared=False)
+        tensor([2657628, 2657629, 2657630, 2657631, 2657632, 2657633, 2657634, 2657635,
+                2657636, 2657637, 2657638, 2657639, 2657640, 2657641, 2657642, 2657643,
+                2657644, 2657645, 2657646, 2657647, 2657648, 2657649, 2657650, 2657651,
+                2657652, 2657653, 2657654, 2657655, 2657656, 2657657, 2657658, 2657659,
+                2657660, 2657661, 2657662, 2657663, 2657664, 2657665, 2657666, 2657667,
+                2657668, 2657669, 2657670, 2657671, 2657672, 2657673, 2657674, 2657675,
+                2657676, 2657677, 2657678, 2657679, 2657680, 2657681, 2657682, 2657683,
+                2657684, 2657685, 2657686, 2657687, 2657688, 2657689, 2657690, 2657691,
+                1995687, 1995688, 1995689, 1995690, 1995691, 1995692, 1995693, 1995694,
+                1995695, 1995696, 1995697, 1995698, 1995699, 1995700, 1995701, 1995702,
+                1995703, 1995704, 1995705, 1995706, 1995707, 1995708, 1995709, 1995710,
+                1995711, 1995712, 1995713, 1995714, 1995715, 1995716, 1995717, 1995718,
+                1995719, 1995720, 1995721, 1995722, 1995723, 1995724, 1995725, 1995726,
+                1995727, 1995728, 1995729, 1995730, 1995731, 1995732, 1995733, 1995734,
+                1995735, 1995736, 1995737, 1995738, 1995739, 1995740, 1995741, 1995742,
+                1995743, 1995744, 1995745, 1995746, 1995747, 1995748, 1995749, 1995750])
+
+    .. note::
+      As always, datasets should be composed using :class:`~torchrl.data.replay_buffers.ReplayBufferEnsemble`:
+
+        >>> from torchrl.data.datasets import AtariDQNExperienceReplay
+        >>> from torchrl.data.replay_buffers import ReplayBufferEnsemble
+        >>> # we change this parameter for quick experimentation, in practice it should be left untouched
+        >>> AtariDQNExperienceReplay._max_runs = 2
+        >>> dataset_asterix = AtariDQNExperienceReplay("Asterix/5", batch_size=128, slice_len=64, num_procs=4)
+        >>> dataset_pong = AtariDQNExperienceReplay("Pong/5", batch_size=128, slice_len=64, num_procs=4)
+        >>> dataset = ReplayBufferEnsemble(dataset_pong, dataset_asterix, batch_size=128, sample_from_all=True)
+        >>> sample = dataset.sample()
+        >>> print("first sample, Asterix", sample[0])
+        first sample, Asterix TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                index: TensorDict(
+                    fields={
+                        buffer_ids: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int64, is_shared=False),
+                        index: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int64, is_shared=False)},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                metadata: NonTensorData(
+                    data={'invalid_range': MemoryMappedTensor([999998, 999999,      0,      1,      2]), 'add_count': MemoryMappedTensor(999999), 'dataset_id': 'Pong/5'},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                observation: Tensor(shape=torch.Size([64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                truncated: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False)},
+            batch_size=torch.Size([64]),
+            device=None,
+            is_shared=False)
+        >>> print("second sample, Pong", sample[1])
+        second sample, Pong TensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                index: TensorDict(
+                    fields={
+                        buffer_ids: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int64, is_shared=False),
+                        index: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.int64, is_shared=False)},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                metadata: NonTensorData(
+                    data={'invalid_range': MemoryMappedTensor([999998, 999999,      0,      1,      2]), 'add_count': MemoryMappedTensor(999999), 'dataset_id': 'Asterix/5'},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                next: TensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([64, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    batch_size=torch.Size([64]),
+                    device=None,
+                    is_shared=False),
+                observation: Tensor(shape=torch.Size([64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                truncated: Tensor(shape=torch.Size([64]), device=cpu, dtype=torch.uint8, is_shared=False)},
+            batch_size=torch.Size([64]),
+            device=None,
+            is_shared=False)
+        >>> print("Aggregate (metadata hidden)", sample)
+        Aggregate (metadata hidden) LazyStackedTensorDict(
+            fields={
+                action: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.int32, is_shared=False),
+                done: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                index: LazyStackedTensorDict(
+                    fields={
+                        buffer_ids: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.int64, is_shared=False),
+                        index: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.int64, is_shared=False)},
+                    exclusive_fields={
+                    },
+                    batch_size=torch.Size([2, 64]),
+                    device=None,
+                    is_shared=False,
+                    stack_dim=0),
+                metadata: LazyStackedTensorDict(
+                    fields={
+                    },
+                    exclusive_fields={
+                    },
+                    batch_size=torch.Size([2, 64]),
+                    device=None,
+                    is_shared=False,
+                    stack_dim=0),
+                next: LazyStackedTensorDict(
+                    fields={
+                        done: Tensor(shape=torch.Size([2, 64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        observation: Tensor(shape=torch.Size([2, 64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                        reward: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.float32, is_shared=False),
+                        terminated: Tensor(shape=torch.Size([2, 64, 1]), device=cpu, dtype=torch.bool, is_shared=False),
+                        truncated: Tensor(shape=torch.Size([2, 64, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
+                    exclusive_fields={
+                    },
+                    batch_size=torch.Size([2, 64]),
+                    device=None,
+                    is_shared=False,
+                    stack_dim=0),
+                observation: Tensor(shape=torch.Size([2, 64, 84, 84]), device=cpu, dtype=torch.uint8, is_shared=False),
+                terminated: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.uint8, is_shared=False),
+                truncated: Tensor(shape=torch.Size([2, 64]), device=cpu, dtype=torch.uint8, is_shared=False)},
+            exclusive_fields={
+            },
+            batch_size=torch.Size([2, 64]),
+            device=None,
+            is_shared=False,
+            stack_dim=0)
+
+    """
+
+    @_classproperty
+    def available_datasets(cls):
+        games = [
+            "AirRaid",
+            "Alien",
+            "Amidar",
+            "Assault",
+            "Asterix",
+            "Asteroids",
+            "Atlantis",
+            "BankHeist",
+            "BattleZone",
+            "BeamRider",
+            "Berzerk",
+            "Bowling",
+            "Boxing",
+            "Breakout",
+            "Carnival",
+            "Centipede",
+            "ChopperCommand",
+            "CrazyClimber",
+            "DemonAttack",
+            "DoubleDunk",
+            "ElevatorAction",
+            "Enduro",
+            "FishingDerby",
+            "Freeway",
+            "Frostbite",
+            "Gopher",
+            "Gravitar",
+            "Hero",
+            "IceHockey",
+            "Jamesbond",
+            "JourneyEscape",
+            "Kangaroo",
+            "Krull",
+            "KungFuMaster",
+            "MontezumaRevenge",
+            "MsPacman",
+            "NameThisGame",
+            "Phoenix",
+            "Pitfall",
+            "Pong",
+            "Pooyan",
+            "PrivateEye",
+            "Qbert",
+            "Riverraid",
+            "RoadRunner",
+            "Robotank",
+            "Seaquest",
+            "Skiing",
+            "Solaris",
+            "SpaceInvaders",
+        ]
+        return ["/".join((game, str(loop))) for game in games for loop in range(1, 6)]
+
+    # If we want to keep track of the original atari files
+    tmpdir = None
+    # use _max_runs for debugging, avoids downloading the entire dataset
+    _max_runs = None
+
+    def __init__(
+        self,
+        dataset_id: str,
+        batch_size: int | None = None,
+        *,
+        root: str | Path | None = None,
+        download: bool | str = True,
+        sampler=None,
+        writer=None,
+        transform: "Transform" | None = None,  # noqa: F821
+        num_procs: int = 0,
+        num_slices: int | None = None,
+        slice_len: int | None = None,
+        strict_len: bool = True,
+        replacement: bool = True,
+        **kwargs,
+    ):
+        if dataset_id not in self.available_datasets:
+            raise ValueError(
+                "The dataseet_id is not part of the available datasets. The dataset should be named <game_name>/<run> "
+                "where <game_name> is one of the Atari 2600 games and the run is a number betweeen 1 and 5. "
+                "The full list of accepted dataset_ids is available under AtariDQNExperienceReplay.available_datasets."
+            )
+        self.dataset_id = dataset_id
+        from torchrl.data.datasets.utils import _get_root_dir
+
+        if root is None:
+            root = _get_root_dir("atari")
+        self.root = root
+        self.num_procs = num_procs
+        if download == "force" or (download and not self._is_downloaded):
+            try:
+                self._download_and_preproc()
+            except Exception:
+                # remove temporary data
+                if os.path.exists(self.dataset_path):
+                    shutil.rmtree(self.dataset_path)
+                raise
+        storage = _AtariStorage(self.dataset_path)
+        if writer is None:
+            writer = ImmutableDatasetWriter()
+        if sampler is None:
+            if num_slices is not None or slice_len is not None:
+                if not replacement:
+                    sampler = SliceSamplerWithoutReplacement(
+                        num_slices=num_slices,
+                        slice_len=slice_len,
+                        trajectories=storage.episodes,
+                    )
+                else:
+                    sampler = SliceSampler(
+                        num_slices=num_slices,
+                        slice_len=slice_len,
+                        trajectories=storage.episodes,
+                        cache_values=True,
+                    )
+            elif not replacement:
+                sampler = SamplerWithoutReplacement()
+
+        super().__init__(
+            storage=storage,
+            batch_size=batch_size,
+            writer=writer,
+            sampler=sampler,
+            collate_fn=lambda x: x,
+            transform=transform,
+            **kwargs,
+        )
+
+    @property
+    def episodes(self):
+        return self._storage.episodes
+
+    @property
+    def root(self) -> Path:
+        return self._root
+
+    @root.setter
+    def root(self, value):
+        self._root = Path(value)
+
+    @property
+    def dataset_path(self) -> Path:
+        return self._root / self.dataset_id
+
+    @property
+    def _is_downloaded(self):
+        if os.path.exists(self.dataset_path / "processed.json"):
+            with open(self.dataset_path / "processed.json", "r") as jsonfile:
+                return json.load(jsonfile).get("processed", False) == self._max_runs
+        return False
+
+    def _download_and_preproc(self):
+        logging.info(
+            f"Downloading and preprocessing dataset {self.dataset_id} with {self.num_procs} processes. This may take a while..."
+        )
+        if os.path.exists(self.dataset_path):
+            shutil.rmtree(self.dataset_path)
+        with tempfile.TemporaryDirectory() as tempdir:
+            if self.tmpdir is not None:
+                tempdir = self.tmpdir
+            if not os.listdir(tempdir):
+                os.makedirs(tempdir, exist_ok=True)
+                # get the list of runs
+                command = f"gsutil -m ls -R gs://atari-replay-datasets/dqn/{self.dataset_id}/replay_logs"
+                output = subprocess.run(
+                    command, shell=True, capture_output=True
+                )  # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                files = [
+                    file.decode("utf-8").replace("$", "\$")  # noqa: W605
+                    for file in output.stdout.splitlines()
+                    if file.endswith(b".gz")
+                ]
+                self.remote_gz_files = self._list_runs(None, files)
+                total_runs = list(self.remote_gz_files)[-1]
+                if self.num_procs == 0:
+                    for run, run_files in self.remote_gz_files.items():
+                        self._download_and_proc_split(
+                            run,
+                            run_files,
+                            tempdir=tempdir,
+                            dataset_path=self.dataset_path,
+                            total_episodes=total_runs,
+                            max_runs=self._max_runs,
+                        )
+                else:
+                    func = functools.partial(
+                        self._download_and_proc_split,
+                        tempdir=tempdir,
+                        dataset_path=self.dataset_path,
+                        total_episodes=total_runs,
+                        max_runs=self._max_runs,
+                    )
+                    args = [
+                        (run, run_files)
+                        for (run, run_files) in self.remote_gz_files.items()
+                    ]
+                    with mp.Pool(self.num_procs) as pool:
+                        pool.starmap(func, args)
+        with open(self.dataset_path / "processed.json", "w") as file:
+            # we save self._max_runs such that changing the number of runs to process
+            # forces the data to be re-downloaded
+            json.dump({"processed": self._max_runs}, file)
+
+    @classmethod
+    def _download_and_proc_split(
+        cls, run, run_files, *, tempdir, dataset_path, total_episodes, max_runs
+    ):
+        if (max_runs is not None) and (run >= max_runs):
+            return
+        tempdir = Path(tempdir)
+        os.makedirs(tempdir / str(run))
+        files_str = " ".join(run_files)  # .decode("utf-8")
+        logging.info("downloading", files_str)
+        command = f"gsutil -m cp {files_str} {tempdir}/{run}"
+        subprocess.run(
+            command, shell=True
+        )  # , stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        local_gz_files = cls._list_runs(tempdir / str(run))
+        # we iterate over the dict but this one has length 1
+        for run in local_gz_files:
+            path = dataset_path / str(run)
+            try:
+                cls._preproc_run(path, local_gz_files, run)
+            except Exception:
+                shutil.rmtree(path)
+                raise
+        shutil.rmtree(tempdir / str(run))
+        logging.info(f"Concluded run {run} out of {total_episodes}")
+
+    @classmethod
+    def _preproc_run(cls, path, gz_files, run):
+        files = gz_files[run]
+        td = TensorDict({}, [])
+        path = Path(path)
+        for file in files:
+            name = str(Path(file).parts[-1]).split(".")[0]
+            with gzip.GzipFile(file, mode="rb") as f:
+                file_content = f.read()
+                file_content = io.BytesIO(file_content)
+                file_content = np.load(file_content)
+                t = torch.as_tensor(file_content)
+            # Create the memmap file
+            key = cls._process_name(name)
+            if key == ("data", "observation"):
+                shape = t.shape
+                shape = [shape[0] + 1] + list(shape[1:])
+                filename = path / "data" / "observation.memmap"
+                os.makedirs(filename.parent, exist_ok=True)
+                mmap = MemoryMappedTensor.empty(shape, dtype=t.dtype, filename=filename)
+                mmap[:-1].copy_(t)
+                td[key] = mmap
+                # td["data", "next", key[1:]] = mmap[1:]
+            else:
+                if key in (
+                    ("data", "reward"),
+                    ("data", "done"),
+                    ("data", "terminated"),
+                ):
+                    filename = path / "data" / "next" / (key[-1] + ".memmap")
+                    os.makedirs(filename.parent, exist_ok=True)
+                    mmap = MemoryMappedTensor.from_tensor(t, filename=filename)
+                    td["data", "next", key[1:]] = mmap
+                else:
+                    filename = path
+                    for i, _key in enumerate(key):
+                        if i == len(key) - 1:
+                            _key = _key + ".memmap"
+                        filename = filename / _key
+                    os.makedirs(filename.parent, exist_ok=True)
+                    mmap = MemoryMappedTensor.from_tensor(t, filename=filename)
+                    td[key] = mmap
+        td.set_non_tensor("dataset_id", "/".join(path.parts[-3:-1]))
+        td.memmap_(path, copy_existing=False)
+
+    @staticmethod
+    def _process_name(name):
+        if name.endswith("_ckpt"):
+            name = name[:-5]
+        if "store" in name:
+            key = ("data", name.split("_")[1])
+        else:
+            key = (name,)
+        if key[-1] == "terminal":
+            key = (*key[:-1], "terminated")
+        return key
+
+    @classmethod
+    def _list_runs(cls, download_path, gz_files=None):
+        path = download_path
+        if gz_files is None:
+            gz_files = []
+            for root, _, files in os.walk(path):
+                for file in files:
+                    if file.endswith(".gz"):
+                        gz_files.append(os.path.join(root, file))
+        runs = defaultdict(list)
+        for file in gz_files:
+            filename = Path(file).parts[-1]
+            name, episode, extension = str(filename).split(".")
+            episode = int(episode)
+            runs[episode].append(file)
+        return dict(sorted(runs.items(), key=lambda x: x[0]))
+
+
+class _AtariStorage(Storage):
+    def __init__(self, path):
+        self.path = Path(path)
+
+        def get_folders(path):
+            return [
+                name
+                for name in os.listdir(path)
+                if os.path.isdir(os.path.join(path, name))
+            ]
+
+        # Usage
+        self.splits = []
+        folders = get_folders(path)
+        for folder in folders:
+            self.splits.append(int(Path(folder).parts[-1]))
+        self.splits = sorted(self.splits)
+        self._split_tds = []
+        frames_per_split = {}
+        for split in self.splits:
+            path = self.path / str(split)
+            self._split_tds.append(self._load_split(path))
+            # take away 1 because we padded with 1 empty val
+            frames_per_split[split] = (
+                self._split_tds[-1].get(("data", "observation")).shape[0] - 1
+            )
+
+        frames_per_split = torch.tensor(
+            [[split, length] for (split, length) in frames_per_split.items()]
+        )
+        frames_per_split[:, 1] = frames_per_split[:, 1].cumsum(0)
+        self.frames_per_split = torch.cat(
+            [torch.tensor([[-1, 0]]), frames_per_split], 0
+        )
+
+        # retrieve episodes
+        self.episodes = torch.cumsum(
+            torch.cat(
+                [td.get(("data", "next", "terminated")) for td in self._split_tds], 0
+            ),
+            0,
+        )
+
+    def __len__(self):
+        return self.frames_per_split[-1, 1].item()
+
+    def _read_from_splits(self, item: int | torch.Tensor):
+        # We need to allocate each item to its storage.
+        # We don't assume each storage has the same size (too expensive to test)
+        # so we keep a map of each storage cumulative length and retrieve the
+        # storages one after the other.
+        split = (item < self.frames_per_split[1:, 1].unsqueeze(1)) & (
+            item >= self.frames_per_split[:-1, 1].unsqueeze(1)
+        )
+        split_tmp, idx = split.squeeze().nonzero().unbind(-1)
+        split = torch.zeros_like(split_tmp)
+        split[idx] = split_tmp
+        split = self.frames_per_split[split + 1, 0]
+        item = item - self.frames_per_split[split, 1]
+        assert (item >= 0).all()
+        if isinstance(item, int):
+            unique_splits = (split,)
+            split_inverse = None
+        else:
+            unique_splits, split_inverse = torch.unique(split, return_inverse=True)
+            unique_splits = unique_splits.tolist()
+        out = []
+        for i, split in enumerate(unique_splits):
+            _item = item[split_inverse == i] if split_inverse is not None else item
+            out.append(self._proc_td(self._split_tds[split], _item))
+        return torch.cat(out, 0)
+
+    def _load_split(self, path):
+        return TensorDict.load_memmap(path)
+
+    def _proc_td(self, td, index):
+        td_data = td.get("data")
+        obs_ = td_data.get(("observation"))[index + 1]
+        done = td_data.get(("next", "terminated"))[index].squeeze(-1).bool()
+        if done.ndim and done.any():
+            obs_ = torch.index_fill(obs_, 0, done.nonzero().squeeze(), 0)
+        td_idx = td.empty()
+        td_idx.set(("next", "observation"), obs_)
+        non_tensor = td.exclude("data").to_dict()
+        td_idx.update(td_data.apply(lambda x: x[index]))
+        if isinstance(index, torch.Tensor):
+            td_idx.batch_size = [len(index)]
+        td_idx.set_non_tensor("metadata", non_tensor)
+
+        terminated = td_idx.get(("next", "terminated"))
+        zterminated = torch.zeros_like(terminated)
+        td_idx.set(("next", "done"), terminated.clone())
+        td_idx.set(("next", "truncated"), zterminated)
+        td_idx.set("terminated", zterminated)
+        td_idx.set("done", zterminated)
+        td_idx.set("truncated", zterminated)
+
+        return td_idx
+
+    def get(self, index):
+        if isinstance(index, int):
+            return self._read_from_splits(index)
+        if isinstance(index, tuple):
+            if len(index) == 1:
+                return self.get(index[0])
+            return self.get(index[0])[(Ellipsis, *index[1:])]
+        if isinstance(index, torch.Tensor):
+            if index.ndim <= 1:
+                return self._read_from_splits(index)
+            else:
+                raise RuntimeError("Only 1d tensors are accepted")
+            # with ThreadPoolExecutor(16) as pool:
+            # results = map(self.__getitem__, index.tolist())
+            # return torch.stack(list(results))
+        if isinstance(index, (range, list)):
+            return self[torch.tensor(index)]
+        if isinstance(index, slice):
+            start = index.start if index.start is not None else 0
+            stop = index.stop if index.stop is not None else len(self)
+            step = index.step if index.step is not None else 1
+            return self.get(torch.arange(start, stop, step))
+        return self[torch.arange(len(self))[index]]
diff --git a/torchrl/data/datasets/d4rl.py b/torchrl/data/datasets/d4rl.py
index c2646f8366b..2d91da82367 100644
--- a/torchrl/data/datasets/d4rl.py
+++ b/torchrl/data/datasets/d4rl.py
@@ -52,7 +52,7 @@ class D4RLExperienceReplay(TensorDictReplayBuffer):
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -61,7 +61,7 @@ class D4RLExperienceReplay(TensorDictReplayBuffer):
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
         split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
diff --git a/torchrl/data/datasets/minari_data.py b/torchrl/data/datasets/minari_data.py
index 8e20ebc12da..866888ae925 100644
--- a/torchrl/data/datasets/minari_data.py
+++ b/torchrl/data/datasets/minari_data.py
@@ -75,7 +75,7 @@ class MinariExperienceReplay(TensorDictReplayBuffer):
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -84,15 +84,13 @@ class MinariExperienceReplay(TensorDictReplayBuffer):
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
         split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
             is recovered via ``done = truncated | terminated``. In other words,
             it is assumed that any ``truncated`` or ``terminated`` signal is
-            equivalent to the end of a trajectory. For some datasets from
-            ``D4RL``, this may not be true. It is up to the user to make
-            accurate choices regarding this usage of ``split_trajs``.
+            equivalent to the end of a trajectory.
             Defaults to ``False``.
 
     Attributes:
diff --git a/torchrl/data/datasets/openml.py b/torchrl/data/datasets/openml.py
index 07e5dfc8cdc..fadcc0e7f96 100644
--- a/torchrl/data/datasets/openml.py
+++ b/torchrl/data/datasets/openml.py
@@ -42,7 +42,7 @@ class OpenMLExperienceReplay(TensorDictReplayBuffer):
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -51,7 +51,7 @@ class OpenMLExperienceReplay(TensorDictReplayBuffer):
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
 
     """
 
diff --git a/torchrl/data/datasets/openx.py b/torchrl/data/datasets/openx.py
index 5237386c200..4beb18b00a1 100644
--- a/torchrl/data/datasets/openx.py
+++ b/torchrl/data/datasets/openx.py
@@ -114,10 +114,10 @@ class for more information on how to interact with non-tensor data
             0s. If another value is provided, it will be used for padding. If
             ``False`` or ``None`` (default) any encounter with a trajectory of
             insufficient length will raise an exception.
-        root (Path or str, optional): The Minari dataset root directory.
+        root (Path or str, optional): The OpenX dataset root directory.
             The actual dataset memory-mapped files will be saved under
             `<root>/<dataset_id>`. If none is provided, it defaults to
-            ``~/.cache/torchrl/minari`.
+            ``~/.cache/torchrl/openx`.
         streaming (bool, optional): if ``True``, the data won't be downloaded but
             read from a stream instead.
 
@@ -139,7 +139,7 @@ class for more information on how to interact with non-tensor data
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -148,15 +148,13 @@ class for more information on how to interact with non-tensor data
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
         split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
             is recovered via ``done = truncated | terminated``. In other words,
             it is assumed that any ``truncated`` or ``terminated`` signal is
-            equivalent to the end of a trajectory. For some datasets from
-            ``D4RL``, this may not be true. It is up to the user to make
-            accurate choices regarding this usage of ``split_trajs``.
+            equivalent to the end of a trajectory.
             Defaults to ``False``.
         strict_length (bool, optional): if ``False``, trajectories of length
             shorter than `slice_len` (or `batch_size // num_slices`) will be
diff --git a/torchrl/data/datasets/roboset.py b/torchrl/data/datasets/roboset.py
index bcbb12a4891..825b937e8ac 100644
--- a/torchrl/data/datasets/roboset.py
+++ b/torchrl/data/datasets/roboset.py
@@ -59,7 +59,7 @@ class RobosetExperienceReplay(TensorDictReplayBuffer):
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -68,15 +68,13 @@ class RobosetExperienceReplay(TensorDictReplayBuffer):
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
         split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
             is recovered via ``done = truncated | terminated``. In other words,
             it is assumed that any ``truncated`` or ``terminated`` signal is
-            equivalent to the end of a trajectory. For some datasets from
-            ``D4RL``, this may not be true. It is up to the user to make
-            accurate choices regarding this usage of ``split_trajs``.
+            equivalent to the end of a trajectory.
             Defaults to ``False``.
 
     Attributes:
diff --git a/torchrl/data/datasets/vd4rl.py b/torchrl/data/datasets/vd4rl.py
index a6e79f9b266..417c025ae59 100644
--- a/torchrl/data/datasets/vd4rl.py
+++ b/torchrl/data/datasets/vd4rl.py
@@ -67,7 +67,7 @@ class VD4RLExperienceReplay(TensorDictReplayBuffer):
         sampler (Sampler, optional): the sampler to be used. If none is provided
             a default RandomSampler() will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`~torchrl.data.replay_buffers.writers.ImmutableDatasetWriter` will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
             loading from a map-style dataset.
@@ -76,7 +76,7 @@ class VD4RLExperienceReplay(TensorDictReplayBuffer):
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            To chain transforms use the :class:`~torchrl.envs.transforms.transforms.Compose` class.
         split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
index de9b13b8129..1e1ce31bf96 100644
--- a/torchrl/data/replay_buffers/replay_buffers.py
+++ b/torchrl/data/replay_buffers/replay_buffers.py
@@ -157,13 +157,7 @@ def __init__(
         self._writer = writer if writer is not None else RoundRobinWriter()
         self._writer.register_storage(self._storage)
 
-        self._collate_fn = (
-            collate_fn
-            if collate_fn is not None
-            else _get_default_collate(
-                self._storage, _is_tensordict=isinstance(self, TensorDictReplayBuffer)
-            )
-        )
+        self._get_collate_fn(collate_fn)
         self._pin_memory = pin_memory
 
         self._prefetch = bool(prefetch)
@@ -201,6 +195,43 @@ def __init__(
             )
         self._batch_size = batch_size
 
+    def _get_collate_fn(self, collate_fn):
+        self._collate_fn = (
+            collate_fn
+            if collate_fn is not None
+            else _get_default_collate(
+                self._storage, _is_tensordict=isinstance(self, TensorDictReplayBuffer)
+            )
+        )
+
+    def set_storage(self, storage: Storage, collate_fn: Callable | None = None):
+        """Sets a new storage in the replay buffer and returns the previous storage.
+
+        Args:
+            storage (Storage): the new storage for the buffer.
+            collate_fn (callable, optional): if provided, the collate_fn is set to this
+                value. Otherwise it is reset to a default value.
+
+        """
+        prev_storage = self._storage
+        self._storage = storage
+        self._get_collate_fn(collate_fn)
+
+        return prev_storage
+
+    def set_writer(self, writer: Writer):
+        """Sets a new writer in the replay buffer and returns the previous writer."""
+        prev_writer = self._writer
+        self._writer = writer
+        self._writer.register_storage(self._storage)
+        return prev_writer
+
+    def set_sampler(self, sampler: Sampler):
+        """Sets a new sampler in the replay buffer and returns the previous sampler."""
+        prev_sampler = self._sampler
+        self._sampler = sampler
+        return prev_sampler
+
     def __len__(self) -> int:
         with self._replay_lock:
             return len(self._storage)
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
index 05baa2eaee1..3460f6ed51c 100644
--- a/torchrl/data/replay_buffers/samplers.py
+++ b/torchrl/data/replay_buffers/samplers.py
@@ -524,6 +524,14 @@ class SliceSampler(Sampler):
             trajectory (or episode). Defaults to ``("next", "done")``.
         traj_key (NestedKey, optional): the key indicating the trajectories.
             Defaults to ``"episode"`` (commonly used across datasets in TorchRL).
+        ends (torch.Tensor, optional): a 1d boolean tensor containing the end of run signals.
+            To be used whenever the ``end_key`` or ``traj_key`` is expensive to get,
+            or when this signal is readily available. Must be used with ``cache_values=True``
+            and cannot be used in conjunction with ``end_key`` or ``traj_key``.
+        trajectories (torch.Tensor, optional): a 1d integer tensor containing the run ids.
+            To be used whenever the ``end_key`` or ``traj_key`` is expensive to get,
+            or when this signal is readily available. Must be used with ``cache_values=True``
+            and cannot be used in conjunction with ``end_key`` or ``traj_key``.
         cache_values (bool, optional): to be used with static datasets.
             Will cache the start and end signal of the trajectory.
         truncated_key (NestedKey, optional): If not ``None``, this argument
@@ -612,19 +620,12 @@ def __init__(
         slice_len: int = None,
         end_key: NestedKey | None = None,
         traj_key: NestedKey | None = None,
+        ends: torch.Tensor | None = None,
+        trajectories: torch.Tensor | None = None,
         cache_values: bool = False,
         truncated_key: NestedKey | None = ("next", "truncated"),
         strict_length: bool = True,
     ) -> object:
-        if end_key is None:
-            end_key = ("next", "done")
-        if traj_key is None:
-            traj_key = "episode"
-        if not ((num_slices is None) ^ (slice_len is None)):
-            raise TypeError(
-                "Either num_slices or slice_len must be not None, and not both. "
-                f"Got num_slices={num_slices} and slice_len={slice_len}."
-            )
         self.num_slices = num_slices
         self.slice_len = slice_len
         self.end_key = end_key
@@ -635,6 +636,47 @@ def __init__(
         self._uses_data_prefix = False
         self.strict_length = strict_length
         self._cache = {}
+        if trajectories is not None:
+            if traj_key is not None or end_key:
+                raise RuntimeError(
+                    "`trajectories` and `end_key` or `traj_key` are exclusive arguments."
+                )
+            if ends is not None:
+                raise RuntimeError("trajectories and ends are exclusive arguments.")
+            if not cache_values:
+                raise RuntimeError(
+                    "To be used, trajectories requires `cache_values` to be set to `True`."
+                )
+            vals = self._find_start_stop_traj(trajectory=trajectories)
+            self._cache["stop-and-length"] = vals
+
+        elif ends is not None:
+            if traj_key is not None or end_key:
+                raise RuntimeError(
+                    "`ends` and `end_key` or `traj_key` are exclusive arguments."
+                )
+            if trajectories is not None:
+                raise RuntimeError("trajectories and ends are exclusive arguments.")
+            if not cache_values:
+                raise RuntimeError(
+                    "To be used, ends requires `cache_values` to be set to `True`."
+                )
+            vals = self._find_start_stop_traj(end=ends)
+            self._cache["stop-and-length"] = vals
+
+        else:
+            if end_key is None:
+                end_key = ("next", "done")
+            if traj_key is None:
+                traj_key = "run"
+            self.end_key = end_key
+            self.traj_key = traj_key
+
+        if not ((num_slices is None) ^ (slice_len is None)):
+            raise TypeError(
+                "Either num_slices or slice_len must be not None, and not both. "
+                f"Got num_slices={num_slices} and slice_len={slice_len}."
+            )
 
     @staticmethod
     def _find_start_stop_traj(*, trajectory=None, end=None):
@@ -696,16 +738,24 @@ def _get_stop_and_length(self, storage, fallback=True):
                 # In the future, this may be deprecated, and we don't want to mess
                 # with the keys provided by the user so we fall back on a proxy to
                 # the traj key.
-                try:
-                    trajectory = storage._storage.get(self._used_traj_key)
-                except KeyError:
-                    trajectory = storage._storage.get(("_data", self.traj_key))
-                    # cache that value for future use
-                    self._used_traj_key = ("_data", self.traj_key)
-                self._uses_data_prefix = (
-                    isinstance(self._used_traj_key, tuple)
-                    and self._used_traj_key[0] == "_data"
-                )
+                if isinstance(storage, TensorStorage):
+                    try:
+                        trajectory = storage._storage.get(self._used_traj_key)
+                    except KeyError:
+                        trajectory = storage._storage.get(("_data", self.traj_key))
+                        # cache that value for future use
+                        self._used_traj_key = ("_data", self.traj_key)
+                    self._uses_data_prefix = (
+                        isinstance(self._used_traj_key, tuple)
+                        and self._used_traj_key[0] == "_data"
+                    )
+                else:
+                    try:
+                        trajectory = storage[:].get(self.traj_key)
+                    except Exception:
+                        raise RuntimeError(
+                            "Could not get a tensordict out of the storage, which is required for SliceSampler to compute the trajectories."
+                        )
                 vals = self._find_start_stop_traj(trajectory=trajectory[: len(storage)])
                 if self.cache_values:
                     self._cache["stop-and-length"] = vals
@@ -722,16 +772,24 @@ def _get_stop_and_length(self, storage, fallback=True):
                 # In the future, this may be deprecated, and we don't want to mess
                 # with the keys provided by the user so we fall back on a proxy to
                 # the traj key.
-                try:
-                    done = storage._storage.get(self._used_end_key)
-                except KeyError:
-                    done = storage._storage.get(("_data", self.end_key))
-                    # cache that value for future use
-                    self._used_end_key = ("_data", self.end_key)
-                self._uses_data_prefix = (
-                    isinstance(self._used_end_key, tuple)
-                    and self._used_end_key[0] == "_data"
-                )
+                if isinstance(storage, TensorStorage):
+                    try:
+                        done = storage._storage.get(self._used_end_key)
+                    except KeyError:
+                        done = storage._storage.get(("_data", self.end_key))
+                        # cache that value for future use
+                        self._used_end_key = ("_data", self.end_key)
+                    self._uses_data_prefix = (
+                        isinstance(self._used_end_key, tuple)
+                        and self._used_end_key[0] == "_data"
+                    )
+                else:
+                    try:
+                        done = storage[:].get(self.end_key)
+                    except Exception:
+                        raise RuntimeError(
+                            "Could not get a tensordict out of the storage, which is required for SliceSampler to compute the trajectories."
+                        )
                 vals = self._find_start_stop_traj(end=done.squeeze())[: len(storage)]
                 if self.cache_values:
                     self._cache["stop-and-length"] = vals
@@ -760,11 +818,6 @@ def _adjusted_batch_size(self, batch_size):
         return seq_length, num_slices
 
     def sample(self, storage: Storage, batch_size: int) -> Tuple[torch.Tensor, dict]:
-        if not isinstance(storage, TensorStorage):
-            raise RuntimeError(
-                f"{type(self)} can only sample from TensorStorage subclasses, got {type(storage)} instead."
-            )
-
         # pick up as many trajs as we need
         start_idx, stop_idx, lengths = self._get_stop_and_length(storage)
         seq_length, num_slices = self._adjusted_batch_size(batch_size)
@@ -889,6 +942,14 @@ class SliceSamplerWithoutReplacement(SliceSampler, SamplerWithoutReplacement):
             trajectory (or episode). Defaults to ``("next", "done")``.
         traj_key (NestedKey, optional): the key indicating the trajectories.
             Defaults to ``"episode"`` (commonly used across datasets in TorchRL).
+        ends (torch.Tensor, optional): a 1d boolean tensor containing the end of run signals.
+            To be used whenever the ``end_key`` or ``traj_key`` is expensive to get,
+            or when this signal is readily available. Must be used with ``cache_values=True``
+            and cannot be used in conjunction with ``end_key`` or ``traj_key``.
+        trajectories (torch.Tensor, optional): a 1d integer tensor containing the run ids.
+            To be used whenever the ``end_key`` or ``traj_key`` is expensive to get,
+            or when this signal is readily available. Must be used with ``cache_values=True``
+            and cannot be used in conjunction with ``end_key`` or ``traj_key``.
         truncated_key (NestedKey, optional): If not ``None``, this argument
             indicates where a truncated signal should be written in the output
             data. This is used to indicate to value estimators where the provided
@@ -973,6 +1034,8 @@ def __init__(
         drop_last: bool = False,
         end_key: NestedKey | None = None,
         traj_key: NestedKey | None = None,
+        ends: torch.Tensor | None = None,
+        trajectories: torch.Tensor | None = None,
         truncated_key: NestedKey | None = ("next", "truncated"),
         strict_length: bool = True,
         shuffle: bool = True,
@@ -986,6 +1049,8 @@ def __init__(
             cache_values=True,
             truncated_key=truncated_key,
             strict_length=strict_length,
+            ends=ends,
+            trajectories=trajectories,
         )
         SamplerWithoutReplacement.__init__(self, drop_last=drop_last, shuffle=shuffle)