From 9fbf53b94616dc5cba6397b4701f6c0feb7d8b64 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 20 Mar 2023 18:07:52 +0000
Subject: [PATCH 01/89] init

---
 torchrl/collectors/collectors.py            |   5 +-
 torchrl/data/postprocs/postprocs.py         |   8 +-
 torchrl/modules/tensordict_module/actors.py |   3 +-
 tutorials/sphinx-tutorials/coding_ddpg.py   | 756 +++++++++-----------
 4 files changed, 345 insertions(+), 427 deletions(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 9e2640522ca..d7dabbf70fc 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -750,7 +750,10 @@ def rollout(self) -> TensorDictBase:
                         self._tensordict_out.lock()
 
                 self._step_and_maybe_reset()
-                if self.interruptor is not None and self.interruptor.collection_stopped():
+                if (
+                    self.interruptor is not None
+                    and self.interruptor.collection_stopped()
+                ):
                     break
 
         return self._tensordict_out
diff --git a/torchrl/data/postprocs/postprocs.py b/torchrl/data/postprocs/postprocs.py
index 26cdc470824..c157fd81977 100644
--- a/torchrl/data/postprocs/postprocs.py
+++ b/torchrl/data/postprocs/postprocs.py
@@ -153,6 +153,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         """
         tensordict = tensordict.clone(False)
         done = tensordict.get(("next", "done"))
+        truncated = tensordict.get(
+            ("next", "truncated"), torch.zeros((), dtype=done.dtype, device=done.device)
+        )
+        done = done | truncated
 
         # we'll be using the done states to index the tensordict.
         # if the shapes don't match we're in trouble.
@@ -175,10 +179,6 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
                         "(trailing singleton dimension excluded)."
                     ) from err
 
-        truncated = tensordict.get(
-            ("next", "truncated"), torch.zeros((), dtype=done.dtype, device=done.device)
-        )
-        done = done | truncated
         mask = tensordict.get(("collector", "mask"), None)
         reward = tensordict.get(("next", "reward"))
         *batch, T = tensordict.batch_size
diff --git a/torchrl/modules/tensordict_module/actors.py b/torchrl/modules/tensordict_module/actors.py
index bca7a34090d..1fe6a32c5e5 100644
--- a/torchrl/modules/tensordict_module/actors.py
+++ b/torchrl/modules/tensordict_module/actors.py
@@ -6,7 +6,7 @@
 from typing import Optional, Sequence, Tuple, Union
 
 import torch
-from tensordict.nn import TensorDictModuleWrapper
+from tensordict.nn import get_functional, TensorDictModuleWrapper
 from torch import nn
 
 from torchrl.data.tensor_specs import (
@@ -911,6 +911,7 @@ def __init__(
             policy_operator,
             value_operator,
         )
+        get_functional(self)
 
     def get_policy_operator(self) -> SafeSequential:
         """Returns a stand-alone policy operator that maps an observation to an action."""
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 5a4c9e5f4b0..f2ef9a292ae 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -1,25 +1,28 @@
 # -*- coding: utf-8 -*-
 """
-Coding DDPG using TorchRL
-=========================
+TorchRL objectives: Coding a DDPG loss
+======================================
 **Author**: `Vincent Moens <https://github.com/vmoens>`_
 
 """
 ##############################################################################
-# This tutorial will guide you through the steps to code DDPG from scratch.
+# TorchRL separates the training of RL algorithms in various pieces that will be
+# assembled in your training script: the environment, the data collection and
+# storage, the model and finally the loss function.
 #
+# TorchRL losses (or "objectives") are stateful objects that contain the
+# trainable parameters (policy and value models).
+# This tutorial will guide you through the steps to code a loss from the ground up
+# using torchrl.
+#
+# To this aim, we will be focusing on DDPG, which is a relatively straightforward
+# algorithm to code.
 # DDPG (`Deep Deterministic Policy Gradient <https://arxiv.org/abs/1509.02971>_`_)
 # is a simple continuous control algorithm. It consists in learning a
 # parametric value function for an action-observation pair, and
 # then learning a policy that outputs actions that maximise this value
 # function given a certain observation.
 #
-# This tutorial is more  than the PPO tutorial: it covers
-# multiple topics that were left aside. We strongly advise the reader to go
-# through the PPO tutorial first before trying out this one. The goal is to
-# show how flexible torchrl is when it comes to writing scripts that can cover
-# multiple use cases.
-#
 # Key learnings:
 #
 # - how to build an environment in TorchRL, including transforms
@@ -30,6 +33,10 @@
 # - how to store trajectories (and not transitions) in your replay buffer);
 # - and finally how to evaluate your model.
 #
+# This tutorial assumes that you have completed the PPO tutorial which gives
+# an overview of the TorchRL components.
+#
+#
 # This tutorial assumes the reader is familiar with some of TorchRL primitives,
 # such as :class:`tensordict.TensorDict` and
 # :class:`tensordict.nn.TensorDictModules`, although it should be
@@ -46,6 +53,10 @@
 
 # sphinx_gallery_start_ignore
 import warnings
+from typing import Tuple
+
+from torchrl.objectives import LossModule
+from torchrl.objectives.value import TDEstimate, TDLambdaEstimate
 
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
@@ -58,6 +69,7 @@
 import tqdm
 from matplotlib import pyplot as plt
 from tensordict.nn import TensorDictModule
+from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import nn, optim
 from torchrl.collectors import MultiaSyncDataCollector
 from torchrl.data import CompositeSpec, TensorDictReplayBuffer
@@ -76,21 +88,256 @@
 from torchrl.envs.transforms import RewardScaling, TransformedEnv
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.modules import (
+    Actor,
+    ActorCriticWrapper,
     MLP,
     OrnsteinUhlenbeckProcessWrapper,
     ProbabilisticActor,
     ValueOperator,
 )
 from torchrl.modules.distributions.continuous import TanhDelta
-from torchrl.objectives.utils import hold_out_net
+from torchrl.objectives.utils import (
+    distance_loss,
+    hold_out_net,
+    hold_out_params,
+    SoftUpdate,
+)
 from torchrl.trainers import Recorder
 
 ###############################################################################
+# TorchRL LossModule
+# ------------------
+#
+# The ``__init__`` method
+# ~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The parent class of all losses is :class:`torchrl.objectives.LossModule`.
+# As many other components of the library, its :meth:`__call__` method expects
+# as input a :class:`tensordict.TensorDict` instance sampled from an expenrience
+# replay buffer. Using this format makes it possible to re-use the module across
+# modalities, or in complex settings where the model needs to read multiple
+# entries for instance.
+#
+# To keep the tutorial as didactic as we can, we'll be displaying each method
+# of the class independently and we'll be populating the class at a later stage.
+#
+# Let us start with the :meth:`__init__` method. DDPG aims at a simple goal:
+# training a policy to output actions that maximise the value predicted by
+# a value network. Hence, our loss module needs to receive two networks in its
+# constructor: an actor and a value networks. We expect both of these to be
+# tensordict-compatible objects, such as :class:`tensordict.nn.TensorDictModule`.
+#
+# The crucial step of the :meth:`LossModule.__init__` method is the call to
+# :meth:`LossModule.convert_to_functional`. This method will extract the
+# parameters from the module and convert it to a functional module.
+# The reason TorchRL does this is that RL algorithms often execute the same
+# model with different sets of parameters, called "trainable" and "target" parameters.
+# The "trainable" parameters are those that the optimizer needs to fit. The
+# "target" parameters are usually a copy of the formers with some time lag
+# (absolute or diluted through a moving average). These target parameters
+# are used to compute the value associated with the next observation.
+# One the advantages of using a set of target parameters for the value model
+# that do not match exactly the current configuration is that they provide
+# a pessimistic bound on the value function being computed.
+# Pay attention to the ``create_target_params`` keyword argument below: this
+# argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
+# method to create a set of target parameters in the loss module to be used
+# for target value computation. If this is set to ``False`` (see the actor network
+# for instance) the ``target_actor_network_params`` attribute will still be
+# accessible but this will just return a detached version of the actor parameters.
+#
+# Later, we will see how the target parameters should be updated in TorchRL.
+#
+# We also incorporate an advantage module. This will be used to compute the
+# next state value using our value network. We'll see later in this tutorial
+# how various advantage modules can be used. If none is provided, we'll
+# be using the TD(lambda) method.
+#
+
+
+def _init(
+    self,
+    actor_network: TensorDictModule,
+    value_network: TensorDictModule,
+    advantage="td(lambda)",
+) -> None:
+    super(type(self), self).__init__()
+
+    self.convert_to_functional(
+        actor_network,
+        "actor_network",
+        create_target_params=False,
+    )
+    self.convert_to_functional(
+        value_network,
+        "value_network",
+        create_target_params=True,
+        compare_against=list(actor_network.parameters()),
+    )
+
+    self.actor_in_keys = actor_network.in_keys
+
+    # Since the value we'll be using is based on the actor and value network,
+    # we put them together in a single actor-critic container.
+    actor_critic = ActorCriticWrapper(actor_network, value_network)
+    if advantage == "td(lambda)":
+        advantage_module = TDLambdaEstimate(
+            gamma=0.99,
+            lmbda=0.95,
+            value_network=actor_critic,
+            value_key="state_action_value",
+        )
+    elif advantage == "td(0)":
+        advantage_module = TDEstimate(
+            gamma=0.99, value_network=actor_critic, value_key="state_action_value"
+        )
+    else:
+        raise NotImplementedError("advantage must be one of 'td(lambda)' or 'td(0)'.")
+    self.advantage = advantage
+    self.advantage_module = advantage_module
+
+    self.loss_funtion = "l2"
+
+
+###############################################################################
+# The actor loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# The central piece of an RL algorithm is the training loss for the actor.
+# In the case of DDPG, this function is quite simple: we just need to compute
+# the value associated with an action computed using the policy and optimize
+# the actor weights to maximise this value.
+#
+# When computing this value, we must make sure to take the value parameters out
+# of the graph, otherwise the actor and value loss will be mixed up.
+# For this, the :func:`torchrl.objectives.utils.hold_out_params` function
+# can be used.
+
+
+def _loss_actor(
+    self,
+    tensordict,
+) -> torch.Tensor:
+    td_copy = tensordict.select(*self.actor_in_keys).detach()
+    # Get an action from the actor network
+    td_copy = self.actor_network(
+        td_copy,
+        params=self.actor_network_params,
+    )
+    # get the value associated with that action
+    with hold_out_params(self.value_network_params) as params:
+        td_copy = self.value_network(
+            td_copy,
+            params=params,
+        )
+    return -td_copy.get("state_action_value")
+
+
+###############################################################################
+# The value loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# We now need to optimize our value network parameters.
+# To do this, we will rely on the advantage module provided during
+# the loss construction.
+
+
+def _loss_value(
+    self,
+    tensordict,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    td_copy = tensordict.detach()
+
+    # we manually reconstruct the parameters of the actor-critic, where the first
+    # set of parameters belongs to the actor and the second to the value function.
+    params = TensorDict(
+        {
+            "module": {
+                "0": self.actor_network_params.detach(),
+                "1": self.value_network_params,
+            }
+        },
+        batch_size=self.target_actor_network_params.batch_size,
+        device=self.target_actor_network_params.device,
+    )
+    target_params = TensorDict(
+        {
+            "module": {
+                "0": self.target_actor_network_params,
+                "1": self.target_value_network_params,
+            }
+        },
+        batch_size=self.target_actor_network_params.batch_size,
+        device=self.target_actor_network_params.device,
+    )
+    with set_exploration_mode("mode"):
+        self.advantage_module(td_copy, params=params, target_params=target_params)
+        target_value = td_copy.get(self.advantage_module.value_target_key)
+    pred_val = td_copy.get("state_action_value")
+    # td_error = pred_val - target_value
+    loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
+
+    return loss_value, (pred_val - target_value).pow(2), pred_val, target_value
+
+
+###############################################################################
+# Putting things together in a forward call
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The only missing piece is the forward method, which will glue together the
+# value and actor loss, collect the cost values and write them in a tensordict
+# delivered to the user.
+
+
+def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
+    if not input_tensordict.device == self.device:
+        raise RuntimeError(
+            f"Got device={input_tensordict.device} but "
+            f"actor_network.device={self.device} (self.device={self.device})"
+        )
+
+    loss_value, td_error, pred_val, target_value = self.loss_value(
+        input_tensordict,
+    )
+    td_error = td_error.detach()
+    td_error = td_error.unsqueeze(input_tensordict.ndimension())
+    if input_tensordict.device is not None:
+        td_error = td_error.to(input_tensordict.device)
+    input_tensordict.set(
+        "td_error",
+        td_error,
+        inplace=True,
+    )
+    loss_actor = self.loss_actor(input_tensordict)
+    return TensorDict(
+        source={
+            "loss_actor": loss_actor.mean(),
+            "loss_value": loss_value.mean(),
+            "pred_value": pred_val.mean().detach(),
+            "target_value": target_value.mean().detach(),
+            "pred_value_max": pred_val.max().detach(),
+            "target_value_max": target_value.max().detach(),
+        },
+        batch_size=[],
+    )
+
+
+class DDPGLoss(LossModule):
+    __init__ = _init
+    forward = _forward
+    loss_value = _loss_value
+    loss_actor = _loss_actor
+
+
+###############################################################################
+# Now that we have our loss, we can use it to train a policy to solve a
+# control task.
+#
 # Environment
 # -----------
 #
 # In most algorithms, the first thing that needs to be taken care of is the
-# construction of the environmet as it conditions the remainder of the
+# construction of the environment as it conditions the remainder of the
 # training script.
 #
 # For this example, we will be using the ``"cheetah"`` task. The goal is to make
@@ -118,7 +365,7 @@
 #
 #    env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
 #
-# We write a :func:`make_env` helper funciton that will create an environment
+# We write a :func:`make_env` helper function that will create an environment
 # with either one of the two backends considered above (dm-control or gym).
 #
 
@@ -155,7 +402,7 @@ def make_env():
 
 ###############################################################################
 # Transforms
-# ^^^^^^^^^^
+# ~~~~~~~~~~
 #
 # Now that we have a base environment, we may want to modify its representation
 # to make it more policy-friendly. In TorchRL, transforms are appended to the
@@ -232,7 +479,7 @@ def make_transformed_env(
 
 ###############################################################################
 # Normalization of the observations
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # To compute the normalizing statistics, we run an arbitrary number of random
 # steps in the environment and compute the mean and standard deviation of the
@@ -256,7 +503,7 @@ def get_env_stats():
 
 ###############################################################################
 # Parallel execution
-# ^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~
 #
 # The following helper function allows us to run environments in parallel.
 # Running environments in parallel can significantly speed up the collection
@@ -314,27 +561,19 @@ def make_t_env():
 # Building the model
 # ------------------
 #
-# We now turn to the setup of the model and loss function. DDPG requires a
+# We now turn to the setup of the model. As we have seen, DDPG requires a
 # value network, trained to estimate the value of a state-action pair, and a
 # parametric actor that learns how to select actions that maximize this value.
-# In this tutorial, we will be using two independent networks for these
-# components.
 #
 # Recall that building a torchrl module requires two steps:
 #
-# - writing the :class:`torch.nn.Module` that will be used as network
+# - writing the :class:`torch.nn.Module` that will be used as network,
 # - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
 #   data flow is handled by specifying the input and output keys.
 #
 # In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
 # also be used.
 #
-# In :func:`make_ddpg_actor`, we use a :class:`torchrl.modules.ProbabilisticActor`
-# object to wrap our policy network. Since DDPG is a deterministic algorithm,
-# this is not strictly necessary. We rely on this class to map the output
-# action to the appropriate domain. Alternatively, one could perfectly use a
-# non-linearity such as :class:`torch.tanh` to map the output to the right
-# domain.
 #
 # The Q-Value network is wrapped in a :class:`torchrl.modules.ValueOperator`
 # that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
@@ -357,36 +596,34 @@ def make_ddpg_actor(
     proof_environment.transform[2].load_state_dict(transform_state_dict)
 
     env_specs = proof_environment.specs
-    out_features = env_specs["input_spec"]["action"].shape[0]
+    in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
+        -1
+    ]
+    out_features = env_specs["input_spec"]["action"].shape[-1]
 
     actor_net = MLP(
+        in_features=in_features,
+        out_features=out_features,
         num_cells=[num_cells] * num_layers,
         activation_class=nn.Tanh,
-        out_features=out_features,
+        activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
     )
     in_keys = ["observation_vector"]
-    out_keys = ["param"]
-
-    actor_module = TensorDictModule(actor_net, in_keys=in_keys, out_keys=out_keys)
+    out_keys = ["action"]
 
-    # We use a ProbabilisticActor to make sure that we map the network output
-    # to the right space using a TanhDelta distribution.
-    actor = ProbabilisticActor(
-        module=actor_module,
-        in_keys=["param"],
+    actor = Actor(
+        actor_net,
+        in_keys=in_keys,
+        out_keys=out_keys,
         spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
-        safe=True,
-        distribution_class=TanhDelta,
-        distribution_kwargs={
-            "min": env_specs["input_spec"]["action"].space.minimum,
-            "max": env_specs["input_spec"]["action"].space.maximum,
-        },
     ).to(device)
 
     q_net = MLP(
+        in_features=in_features
+        + out_features,  # receives an action and an observation as input
+        out_features=1,
         num_cells=[num_cells] * num_layers,
         activation_class=nn.Tanh,
-        out_features=1,
     )
 
     in_keys = in_keys + ["action"]
@@ -395,15 +632,6 @@ def make_ddpg_actor(
         module=q_net,
     ).to(device)
 
-    # init: since we have lazy layers, we should run the network
-    # once to initialize them
-    with torch.no_grad(), set_exploration_mode("random"):
-        td = proof_environment.fake_tensordict()
-        td = td.expand((*td.shape, 2))
-        td = td.to(device)
-        actor(td)
-        qnet(td)
-
     return actor, qnet
 
 
@@ -484,7 +712,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Environment
-# ^^^^^^^^^^^
+# ~~~~~~~~~~~
 
 # The backend can be gym or dm_control
 backend = "gym"
@@ -509,7 +737,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Collection
-# ^^^^^^^^^^
+# ~~~~~~~~~~
 
 # We will execute the policy on cuda if available
 device = (
@@ -521,8 +749,10 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 # Total frames we will use during training. Scale up to 500K - 1M for a more
 # meaningful training
-total_frames = 5000 // frame_skip
-# Number of frames returned by the collector at each iteration of the outer loop
+total_frames = 10000 // frame_skip
+
+# Number of frames returned by the collector at each iteration of the outer loop.
+# We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
 frames_per_batch = env_per_collector * 1000 // frame_skip
 max_frames_per_traj = 1000 // frame_skip
 init_random_frames = 0
@@ -535,7 +765,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Optimizer and optimization
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 lr = 5e-4
 weight_decay = 0.0
@@ -545,7 +775,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Model
-# ^^^^^
+# ~~~~~
 
 gamma = 0.99
 tau = 0.005  # Decay factor for the target network
@@ -556,12 +786,13 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Replay buffer
-# ^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~
 
 # If True, a Prioritized replay buffer will be used
 prb = True
 # Number of frames stored in the buffer
-buffer_size = min(total_frames, 1000000 // frame_skip)
+traj_len_collector = frames_per_batch // env_per_collector
+buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
 buffer_scratch_dir = "/tmp/"
 
 seed = 0
@@ -582,13 +813,13 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Normalization stats
-# ^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~
 
 transform_state_dict = get_env_stats()
 
 ###############################################################################
 # Models: policy and q-value network
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 actor, qnet = make_ddpg_actor(
     transform_state_dict=transform_state_dict,
@@ -597,10 +828,17 @@ def make_replay_buffer(buffer_size, prefetch=3):
 if device == torch.device("cpu"):
     actor.share_memory()
 
-###############################################################################
-# We create a copy of the q-value network to be used as target network
 
-qnet_target = deepcopy(qnet).requires_grad_(False)
+###############################################################################
+# Loss module
+# ~~~~~~~~~~~
+# We build our loss module with the actor and qnet we've just created.
+# Because we have target parameters to update, we _must_ create a target network
+# updater.
+#
+loss_module = DDPGLoss(actor, qnet)
+target_net_updater = SoftUpdate(loss_module, eps=0.98)
+target_net_updater.init_()
 
 ###############################################################################
 # The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
@@ -615,7 +853,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Parallel environment creation
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # We pass the stats computed earlier to normalize the output of our
 # environment:
@@ -626,7 +864,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Data collector
-# ^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~
 #
 # TorchRL provides specialized classes to help you collect data by executing
 # the policy in the environment. These "data collectors" iteratively compute
@@ -684,9 +922,8 @@ def make_replay_buffer(buffer_size, prefetch=3):
     reset_at_each_iter=False,
     postproc=multistep,
     split_trajs=True,
-    devices=[device, device],  # device for execution
+    device=device,  # device for execution
     storing_devices=[device, device],  # device where data will be stored and passed
-    pin_memory=False,
     update_at_each_batch=False,
     exploration_mode="random",
 )
@@ -695,83 +932,38 @@ def make_replay_buffer(buffer_size, prefetch=3):
 
 ###############################################################################
 # Replay buffer
-# ^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~
 #
 
 replay_buffer = make_replay_buffer(buffer_size, prefetch=3)
 
 ###############################################################################
 # Recorder
-# ^^^^^^^^
+# ~~~~~~~~
 
 recorder = make_recorder(actor_model_explore, transform_state_dict)
 
 ###############################################################################
 # Optimizer
-# ^^^^^^^^^
+# ~~~~~~~~~
 #
 # Finally, we will use the Adam optimizer for the policy and value network,
 # with the same learning rate for both.
 
-optimizer_actor = optim.Adam(actor.parameters(), lr=lr, weight_decay=weight_decay)
-optimizer_qnet = optim.Adam(qnet.parameters(), lr=lr, weight_decay=weight_decay)
+optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
 total_collection_steps = total_frames // frames_per_batch
 
-scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer_actor, T_max=total_collection_steps
-)
-scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer_qnet, T_max=total_collection_steps
+scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+    optimizer, T_max=total_collection_steps
 )
 
 ###############################################################################
 # Time to train the policy
 # ------------------------
 #
-# Some notes about the following training loop:
-#
-# - :func:`torchrl.objectives.utils.hold_out_net` is a TorchRL context manager
-#   that temporarily sets :func:`torch.Tensor.requires_grad_()` to False for
-#   a designated set of network parameters. This is used to
-#   prevent :func:`torch.Tensor.backward()`` from writing gradients on
-#   parameters that need not to be differentiated given the loss at hand.
-# - The value network is designed using the
-#   :class:`torchrl.modules.ValueOperator` subclass from
-#   :class:`tensordict.nn.TensorDictModule` class. As explained earlier,
-#   this class will write a ``"state_action_value"`` entry if one of its
-#   ``in_keys`` is named ``"action"``, otherwise it will assume that only the
-#   state-value is returned and the output key will simply be ``"state_value"``.
-#   In the case of DDPG, the value if of the state-action pair,
-#   hence the ``"state_action_value"`` will be used.
-# - The :func:`torchrl.envs.utils.step_mdp(tensordict)` helper function is the
-#   equivalent of the ``obs = next_obs`` command found in multiple RL
-#   algorithms. It will return a new :class:`tensordict.TensorDict` instance
-#   that contains all the data that will need to be used in the next iteration.
-#   This makes it possible to pass this new tensordict to the policy or
-#   value network.
-# - When using prioritized replay buffer, a priority key is added to the
-#   sampled tensordict (named ``"td_error"`` by default). Then, this
-#   TensorDict will be fed back to the replay buffer using the
-#   :func:`torchrl.data.replay_buffers.TensorDictReplayBuffer.update_tensordict_priority`
-#   method. Under the hood, this method will read the index present in the
-#   TensorDict as well as the priority value, and update its list of priorities
-#   at these indices.
-# - TorchRL provides optimized versions of the loss functions (such as this one)
-#   where one only needs to pass a sampled tensordict and obtains a dictionary
-#   of losses and metadata in return (see :mod:`torchrl.objectives` for more
-#   context). Here we write the full loss function in the optimization loop
-#   for transparency.
-#   Similarly, the target network updates are written explicitly but
-#   TorchRL provides a couple of dedicated classes for this
-#   (see :class:`torchrl.objectives.SoftUpdate` and
-#   :class:`torchrl.objectives.HardUpdate`).
-# - After each collection of data, we call :func:`collector.update_policy_weights_()`,
-#   which will update the policy network weights on the data collector. If the
-#   code is executed on cpu or with a single cuda device, this part can be
-#   omitted. If the collector is executed on another device, then its weights
-#   must be synced with those on the main, training process and this method
-#   should be incorporated in the training loop (ideally early in the loop in
-#   async settings, and at the end of it in sync settings).
+# The training loop is pretty straightforward now that we have built all the
+# modules we need.
+#
 
 rewards = []
 rewards_eval = []
@@ -794,13 +986,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
     pbar.update(tensordict.numel())
 
     # extend the replay buffer with the new data
-    if ("collector", "mask") in tensordict.keys(True):
-        # if multi-step, a mask is present to help filter padded values
-        current_frames = tensordict["collector", "mask"].sum()
-        tensordict = tensordict[tensordict.get(("collector", "mask"))]
-    else:
-        tensordict = tensordict.view(-1)
-        current_frames = tensordict.numel()
+    current_frames = tensordict.numel()
     collected_frames += current_frames
     replay_buffer.extend(tensordict.cpu())
 
@@ -810,49 +996,22 @@ def make_replay_buffer(buffer_size, prefetch=3):
             # sample from replay buffer
             sampled_tensordict = replay_buffer.sample(batch_size).clone()
 
-            # compute loss for qnet and backprop
-            with hold_out_net(actor):
-                # get next state value
-                next_tensordict = step_mdp(sampled_tensordict)
-                qnet_target(actor(next_tensordict))
-                next_value = next_tensordict["state_action_value"]
-                assert not next_value.requires_grad
-            value_est = (
-                sampled_tensordict["next", "reward"]
-                + gamma * (1 - sampled_tensordict["next", "done"].float()) * next_value
+            # Compute loss
+            loss_dict = loss_module(sampled_tensordict)
+
+            # optimize
+            loss_val = sum(
+                value for key, value in loss_dict.items() if key.startswith("loss")
             )
-            value = qnet(sampled_tensordict)["state_action_value"]
-            value_loss = (value - value_est).pow(2).mean()
-            # we write the td_error in the sampled_tensordict for priority update
-            # because the indices of the samples is tracked in sampled_tensordict
-            # and the replay buffer will know which priorities to update.
-            sampled_tensordict["td_error"] = (value - value_est).pow(2).detach()
-            value_loss.backward()
-
-            optimizer_qnet.step()
-            optimizer_qnet.zero_grad()
-
-            # compute loss for actor and backprop:
-            # the actor must maximise the state-action value, hence the loss
-            # is the neg value of this.
-            sampled_tensordict_actor = sampled_tensordict.select(*actor.in_keys)
-            with hold_out_net(qnet):
-                qnet(actor(sampled_tensordict_actor))
-            actor_loss = -sampled_tensordict_actor["state_action_value"]
-            actor_loss.mean().backward()
-
-            optimizer_actor.step()
-            optimizer_actor.zero_grad()
-
-            # update qnet_target params
-            for (p_in, p_dest) in zip(qnet.parameters(), qnet_target.parameters()):
-                p_dest.data.copy_(tau * p_in.data + (1 - tau) * p_dest.data)
-            for (b_in, b_dest) in zip(qnet.buffers(), qnet_target.buffers()):
-                b_dest.data.copy_(tau * b_in.data + (1 - tau) * b_dest.data)
+            loss_val.backward()
+            optimizer.step()
+            optimizer.zero_grad()
 
             # update priority
             if prb:
                 replay_buffer.update_tensordict_priority(sampled_tensordict)
+            # update target network
+            target_net_updater.step()
 
     rewards.append(
         (
@@ -873,8 +1032,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
     # update the exploration strategy
     actor_model_explore.step(current_frames)
     if collected_frames >= init_random_frames:
-        scheduler1.step()
-        scheduler2.step()
+        scheduler.step()
 
 collector.shutdown()
 del collector
@@ -898,259 +1056,15 @@ def make_replay_buffer(buffer_size, prefetch=3):
 plt.tight_layout()
 
 ###############################################################################
-# Sampling trajectories and using TD(lambda)
-# ------------------------------------------
-#
-# TD(lambda) is known to be less biased than the regular TD-error we used in
-# the previous example. To use it, however, we need to sample trajectories and
-# not single transitions.
-#
-# We modify the previous example to make this possible.
-#
-# The first modification consists in building a replay buffer that stores
-# trajectories (and not transitions).
-#
-# Specifically, we'll collect trajectories of (at most)
-# 250 steps (note that the total trajectory length is actually 1000 frames, but
-# we collect batches of 500 transitions obtained over 2 environments running in
-# parallel, hence only 250 steps per trajectory are collected at any given
-# time). Hence, we'll divide our replay buffer size by 250:
-
-buffer_size = 100000 // frame_skip // 250
-print("the new buffer size is", buffer_size)
-batch_size_traj = max(4, batch_size // 250)
-print("the new batch size for trajectories is", batch_size_traj)
-
-n_steps_forward = 0  # disable multi-step for simplicity
-
-###############################################################################
-# The following code is identical to the initialization we made earlier:
-
-torch.manual_seed(seed)
-np.random.seed(seed)
-
-# get stats for normalization
-transform_state_dict = get_env_stats()
-
-# Actor and qnet instantiation
-actor, qnet = make_ddpg_actor(
-    transform_state_dict=transform_state_dict,
-    device=device,
-)
-if device == torch.device("cpu"):
-    actor.share_memory()
-
-# Target network
-qnet_target = deepcopy(qnet).requires_grad_(False)
-
-# Exploration wrappers:
-actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
-    actor,
-    annealing_num_steps=annealing_frames,
-).to(device)
-if device == torch.device("cpu"):
-    actor_model_explore.share_memory()
-
-# Environment setting:
-create_env_fn = parallel_env_constructor(
-    transform_state_dict=transform_state_dict,
-)
-# Batch collector:
-collector = MultiaSyncDataCollector(
-    create_env_fn=[create_env_fn, create_env_fn],
-    policy=actor_model_explore,
-    total_frames=total_frames,
-    max_frames_per_traj=max_frames_per_traj,
-    frames_per_batch=frames_per_batch,
-    init_random_frames=init_random_frames,
-    reset_at_each_iter=False,
-    postproc=None,
-    split_trajs=False,
-    devices=[device, device],  # device for execution
-    storing_devices=[device, device],  # device where data will be stored and passed
-    seed=None,
-    pin_memory=False,
-    update_at_each_batch=False,
-    exploration_mode="random",
-)
-collector.set_seed(seed)
-
-# Replay buffer:
-replay_buffer = make_replay_buffer(buffer_size, prefetch=0)
-
-# trajectory recorder
-recorder = make_recorder(actor_model_explore, transform_state_dict)
-
-# Optimizers
-optimizer_actor = optim.Adam(actor.parameters(), lr=lr, weight_decay=weight_decay)
-optimizer_qnet = optim.Adam(qnet.parameters(), lr=lr, weight_decay=weight_decay)
-total_collection_steps = total_frames // frames_per_batch
-
-scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer_actor, T_max=total_collection_steps
-)
-scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer_qnet, T_max=total_collection_steps
-)
-
-###############################################################################
-# The training loop needs to be slightly adapted.
-# First, whereas before extending the replay buffer we used to flatten the
-# collected data, this won't be the case anymore. To understand why, let's
-# check the output shape of the data collector:
-
-for data in collector:
-    print(data.shape)
-    break
-
-###############################################################################
-# We see that our data has shape ``[2, 250]`` as expected: 2 envs, each
-# returning 250 frames.
+# Conclusion
+# ----------
 #
-# Let's import the td_lambda function:
+# In this tutorial, we have learnt how to code a loss module in TorchRL given
+# the concrete example of DDPG.
 #
-
-from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
-
-lmbda = 0.95
-
-###############################################################################
-# The training loop is roughly the same as before, with the exception that we
-# don't flatten the collected data. Also, the sampling from the replay buffer
-# is slightly different: We will collect at minimum four trajectories, compute
-# the returns (TD(lambda)), then sample from these the values we'll be using
-# to compute gradients. This ensures that do not have batches that are
-# 'too big' but still compute an accurate return.
+# The key takeaways are:
 #
-
-rewards = []
-rewards_eval = []
-
-# Main loop
-norm_factor_training = (
-    sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
-)
-
-collected_frames = 0
-# # if tqdm is to be used
-# pbar = tqdm.tqdm(total=total_frames)
-r0 = None
-for i, tensordict in enumerate(collector):
-
-    # update weights of the inference policy
-    collector.update_policy_weights_()
-
-    if r0 is None:
-        r0 = tensordict["next", "reward"].mean().item()
-
-    # extend the replay buffer with the new data
-    current_frames = tensordict.numel()
-    collected_frames += current_frames
-    replay_buffer.extend(tensordict.cpu())
-
-    # optimization steps
-    if collected_frames >= init_random_frames:
-        for _ in range(update_to_data):
-            # sample from replay buffer
-            sampled_tensordict = replay_buffer.sample(batch_size_traj)
-            # reset the batch size temporarily, and exclude index
-            # whose shape is incompatible with the new size
-            index = sampled_tensordict.get("index")
-            sampled_tensordict.exclude("index", inplace=True)
-
-            # compute loss for qnet and backprop
-            with hold_out_net(actor):
-                # get next state value
-                next_tensordict = step_mdp(sampled_tensordict)
-                qnet_target(actor(next_tensordict.view(-1))).view(
-                    sampled_tensordict.shape
-                )
-                next_value = next_tensordict["state_action_value"]
-                assert not next_value.requires_grad
-
-            # This is the crucial part: we'll compute the TD(lambda)
-            # instead of a simple single step estimate
-            done = sampled_tensordict["next", "done"]
-            reward = sampled_tensordict["next", "reward"]
-            value = qnet(sampled_tensordict.view(-1)).view(sampled_tensordict.shape)[
-                "state_action_value"
-            ]
-            advantage = vec_td_lambda_advantage_estimate(
-                gamma, lmbda, value, next_value, reward, done
-            )
-            # we sample from the values we have computed
-            rand_idx = torch.randint(0, advantage.numel(), (batch_size,))
-            value_loss = advantage.view(-1)[rand_idx].pow(2).mean()
-
-            # we write the td_error in the sampled_tensordict for priority update
-            # because the indices of the samples is tracked in sampled_tensordict
-            # and the replay buffer will know which priorities to update.
-            value_loss.backward()
-
-            optimizer_qnet.step()
-            optimizer_qnet.zero_grad()
-
-            # compute loss for actor and backprop: the actor must maximise the state-action value, hence the loss is the neg value of this.
-            sampled_tensordict_actor = sampled_tensordict.select(*actor.in_keys)
-            with hold_out_net(qnet):
-                qnet(actor(sampled_tensordict_actor.view(-1))).view(
-                    sampled_tensordict.shape
-                )
-            actor_loss = -sampled_tensordict_actor["state_action_value"]
-            actor_loss.view(-1)[rand_idx].mean().backward()
-
-            optimizer_actor.step()
-            optimizer_actor.zero_grad()
-
-            # update qnet_target params
-            for (p_in, p_dest) in zip(qnet.parameters(), qnet_target.parameters()):
-                p_dest.data.copy_(tau * p_in.data + (1 - tau) * p_dest.data)
-            for (b_in, b_dest) in zip(qnet.buffers(), qnet_target.buffers()):
-                b_dest.data.copy_(tau * b_in.data + (1 - tau) * b_dest.data)
-
-            # update priority
-            sampled_tensordict.batch_size = [batch_size_traj]
-            sampled_tensordict["td_error"] = advantage.detach().pow(2).mean(1)
-            sampled_tensordict["index"] = index
-            if prb:
-                replay_buffer.update_tensordict_priority(sampled_tensordict)
-
-    rewards.append(
-        (
-            i,
-            tensordict["next", "reward"].mean().item()
-            / norm_factor_training
-            / frame_skip,
-        )
-    )
-    td_record = recorder(None)
-    if td_record is not None:
-        rewards_eval.append((i, td_record["r_evaluation"].item()))
-    #     if len(rewards_eval):
-    #         pbar.set_description(f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}")
-
-    # update the exploration strategy
-    actor_model_explore.step(current_frames)
-    if collected_frames >= init_random_frames:
-        scheduler1.step()
-        scheduler2.step()
-
-collector.shutdown()
-del create_env_fn
-del collector
-
-###############################################################################
-# We can observe that using TD(lambda) made our results considerably more
-# stable for a similar training speed:
+# - How to use the :class:`torchrl.objectives.LossModule` class to register components;
+# - How to use (or not) a target network, and how to update its parameters;
+# - How to create an optimizer associated with a loss module.
 #
-# **Note**: As already mentioned above, to get a more reasonable performance,
-# use a greater value for ``total_frames`` e.g. 1000000.
-
-plt.figure()
-plt.plot(*zip(*rewards), label="training")
-plt.plot(*zip(*rewards_eval), label="eval")
-plt.legend()
-plt.xlabel("iter")
-plt.ylabel("reward")
-plt.tight_layout()
-plt.title("TD-labmda DDPG results")

From 5488d4d7effa5de75fb4ad3369cbf4cf925f4a92 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 20 Mar 2023 18:08:18 +0000
Subject: [PATCH 02/89] lint

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index f2ef9a292ae..dc568b63fe2 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -61,10 +61,8 @@
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
 
-from copy import deepcopy
 
 import numpy as np
-import torch
 import torch.cuda
 import tqdm
 from matplotlib import pyplot as plt
@@ -86,19 +84,16 @@
 from torchrl.envs.libs.dm_control import DMControlEnv
 from torchrl.envs.libs.gym import GymEnv
 from torchrl.envs.transforms import RewardScaling, TransformedEnv
-from torchrl.envs.utils import set_exploration_mode, step_mdp
+from torchrl.envs.utils import set_exploration_mode
 from torchrl.modules import (
     Actor,
     ActorCriticWrapper,
     MLP,
     OrnsteinUhlenbeckProcessWrapper,
-    ProbabilisticActor,
     ValueOperator,
 )
-from torchrl.modules.distributions.continuous import TanhDelta
 from torchrl.objectives.utils import (
     distance_loss,
-    hold_out_net,
     hold_out_params,
     SoftUpdate,
 )

From 93deeebfe9b2b47122675587461ad63b2e9fce29 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 20 Mar 2023 18:10:19 +0000
Subject: [PATCH 03/89] amend

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index dc568b63fe2..c35bd87c41e 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -146,7 +146,10 @@
 # We also incorporate an advantage module. This will be used to compute the
 # next state value using our value network. We'll see later in this tutorial
 # how various advantage modules can be used. If none is provided, we'll
-# be using the TD(lambda) method.
+# be using the TD(lambda) method, which is usually preferable to TD(0).
+# Notice that this choice makes it necessary that the tensordict provided
+# has its last dimension representing the time span of the experiment (ie
+# our replay buffer must be populated using non-flatten data).
 #
 
 
From f511020d81b551cf230dc40b9ad3938e8bf32daf Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 21 Mar 2023 10:22:45 +0000
Subject: [PATCH 04/89] dqn (1)

---
 tutorials/sphinx-tutorials/coding_dqn.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index a50ac553b21..8455811898c 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -1,16 +1,25 @@
 # -*- coding: utf-8 -*-
 """
-Coding a pixel-based DQN using TorchRL
-======================================
+TorchRL trainer: A DQN example
+==============================
 **Author**: `Vincent Moens <https://github.com/vmoens>`_
 
 """
 
 ##############################################################################
-# This tutorial will guide you through the steps to code DQN to solve the
-# CartPole task from scratch. DQN
-# (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
+# TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
+# your training loop. The trainer executes a nested loop where the outer loop
+# is the data collection and the inner loop consumes this data or some data
+# retrieved from the replay buffer to train the model.
+# At various points in this training loop, hooks can be attached and executed at
+# given intervals.
+#
+# In this tutorial, we will be using the trainer class to train a DQN algorithm
+# to solve the CartPole task from scratch.
+#
+# DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
 # the founding work in deep reinforcement learning.
+#
 # On a high level, the algorithm is quite simple: Q-learning consists in learning a table of
 # state-action values in such a way that, when encountering any particular state,
 # we know which action to pick just by searching for the action with the

From 2586b74164a78ee8aa0353be4cb7532867840a2b Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 21 Mar 2023 11:58:31 +0000
Subject: [PATCH 05/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 392 ++++++++++++-----------
 1 file changed, 201 insertions(+), 191 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 8455811898c..331128eee9a 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -17,13 +17,41 @@
 # In this tutorial, we will be using the trainer class to train a DQN algorithm
 # to solve the CartPole task from scratch.
 #
+# Main takeaways:
+#
+# - Building a trainer with its essential components: data collector, loss
+#   module, replay buffer and optimizer.
+# - Adding hooks to a trainer, such as loggers, target network updaters and such.
+#
+# We will also focus on some other aspects of the library:
+#
+# - how to build an environment in TorchRL, including transforms (e.g. data
+#   normalization, frame concatenation, resizing and turning to grayscale)
+#   and parallel execution. Unlike what we did in the
+#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+#   will normalize the pixels and not the state vector.
+# - how to design a ``QValueActor``, i.e. an actor that estimates the action
+#   values and picks up the action with the highest estimated return;
+# - how to collect data from your environment efficiently and store them
+#   in a replay buffer;
+# - how to store trajectories (and not transitions) in your replay buffer),
+#   and how to estimate returns using TD(lambda);
+# - and finally how to evaluate your model.
+#
+# **Prerequisites**: We encourage you to get familiar with torchrl through the
+# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+#
+# DQN
+# ---
+#
 # DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
 # the founding work in deep reinforcement learning.
 #
-# On a high level, the algorithm is quite simple: Q-learning consists in learning a table of
-# state-action values in such a way that, when encountering any particular state,
-# we know which action to pick just by searching for the action with the
-# highest value. This simple setting requires the actions and states to be
+# On a high level, the algorithm is quite simple: Q-learning consists in
+# learning a table of state-action values in such a way that, when
+# encountering any particular state, we know which action to pick just by
+# searching for the action with the highest value. This simple setting
+# requires the actions and states to be
 # discrete, otherwise a lookup table cannot be built.
 #
 # DQN uses a neural network that encodes a map from the state-action space to
@@ -44,32 +72,6 @@
 # .. figure:: /_static/img/cartpole_demo.gif
 #    :alt: Cart Pole
 #
-# **Prerequisites**: We encourage you to get familiar with torchrl through the
-# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
-# This tutorial is more complex and full-fleshed, but it may be .
-#
-# In this tutorial, you will learn:
-#
-# - how to build an environment in TorchRL, including transforms (e.g. data
-#   normalization, frame concatenation, resizing and turning to grayscale)
-#   and parallel execution. Unlike what we did in the
-#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
-#   will normalize the pixels and not the state vector.
-# - how to design a QValue actor, i.e. an actor that estimates the action
-#   values and picks up the action with the highest estimated return;
-# - how to collect data from your environment efficiently and store them
-#   in a replay buffer;
-# - how to store trajectories (and not transitions) in your replay buffer),
-#   and how to estimate returns using TD(lambda);
-# - how to make a module functional and use ;
-# - and finally how to evaluate your model.
-#
-# This tutorial assumes the reader is familiar with some of TorchRL
-# primitives, such as :class:`tensordict.TensorDict` and
-# :class:`tensordict.TensorDictModules`, although it
-# should be sufficiently transparent to be understood without a deep
-# understanding of these classes.
-#
 # We do not aim at giving a SOTA implementation of the algorithm, but rather
 # to provide a high-level illustration of TorchRL features in the context
 # of this algorithm.
@@ -120,102 +122,6 @@ def is_notebook() -> bool:
 
 
 ###############################################################################
-# Hyperparameters
-# ---------------
-#
-# Let's start with our hyperparameters. The following setting should work well
-# in practice, and the performance of the algorithm should hopefully not be
-# too sensitive to slight variations of these.
-
-device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-###############################################################################
-# Optimizer
-# ^^^^^^^^^
-
-# the learning rate of the optimizer
-lr = 2e-3
-# the beta parameters of Adam
-betas = (0.9, 0.999)
-# Optimization steps per batch collected (aka UPD or updates per data)
-n_optim = 8
-
-###############################################################################
-# DQN parameters
-# ^^^^^^^^^^^^^^
-
-###############################################################################
-# gamma decay factor
-gamma = 0.99
-
-###############################################################################
-# lambda decay factor (see second the part with TD(:math:`\lambda`)
-lmbda = 0.95
-
-###############################################################################
-# Smooth target network update decay parameter.
-# This loosely corresponds to a 1/(1-tau) interval with hard target network
-# update
-tau = 0.005
-
-###############################################################################
-# Data collection and replay buffer
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# Values to be used for proper training have been commented.
-#
-# Total frames collected in the environment. In other implementations, the
-# user defines a maximum number of episodes.
-# This is harder to do with our data collectors since they return batches
-# of N collected frames, where N is a constant.
-# However, one can easily get the same restriction on number of episodes by
-# breaking the training loop when a certain number
-# episodes has been collected.
-total_frames = 5000  # 500000
-
-###############################################################################
-# Random frames used to initialize the replay buffer.
-init_random_frames = 100  # 1000
-
-###############################################################################
-# Frames in each batch collected.
-frames_per_batch = 32  # 128
-
-###############################################################################
-# Frames sampled from the replay buffer at each optimization step
-batch_size = 32  # 256
-
-###############################################################################
-# Size of the replay buffer in terms of frames
-buffer_size = min(total_frames, 100000)
-
-###############################################################################
-# Number of environments run in parallel in each data collector
-num_workers = 2  # 8
-num_collectors = 2  # 4
-
-
-###############################################################################
-# Environment and exploration
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#
-# We set the initial and final value of the epsilon factor in Epsilon-greedy
-# exploration.
-# Since our policy is deterministic, exploration is crucial: without it, the
-# only source of randomness would be the environment reset.
-
-eps_greedy_val = 0.1
-eps_greedy_val_env = 0.005
-
-###############################################################################
-# To speed up learning, we set the bias of the last layer of our value network
-# to a predefined value (this is not mandatory)
-init_bias = 2.0
-
-###############################################################################
-# **Note**: for fast rendering of the tutorial ``total_frames`` hyperparameter
-# was set to a very low number. To get a reasonable performance, use a greater
-# value e.g. 500000
-#
 # Building the environment
 # ------------------------
 #
@@ -283,7 +189,7 @@ def make_env(parallel=False, observation_norm_state_dict=None):
 
 ###############################################################################
 # Compute normalizing constants
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # To normalize images, we don't want to normalize each pixel independently
 # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
@@ -292,16 +198,16 @@ def make_env(parallel=False, observation_norm_state_dict=None):
 # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
 # not all dimensions disappear in the process:
 
-test_env = make_env()
-test_env.transform[-1].init_stats(
+def get_norm_const():
+    test_env = make_env()
+    test_env.transform[-1].init_stats(
     num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
 )
-observation_norm_state_dict = test_env.transform[-1].state_dict()
-
-###############################################################################
-# let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-# ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-print(observation_norm_state_dict)
+    observation_norm_state_dict = test_env.transform[-1].state_dict()
+    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+    print(observation_norm_state_dict)
+    return observation_norm_state_dict
 
 ###############################################################################
 # Building the model (Deep Q-network)
@@ -324,7 +230,7 @@ def make_env(parallel=False, observation_norm_state_dict=None):
 # in the input :class:`tensordict.TensorDict`.
 #
 # Target parameters
-# ^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~
 #
 # Many off-policy RL algorithms use the concept of "target parameters" when it
 # comes to estimate the value of the ``t+1`` state or state-action pair.
@@ -335,7 +241,7 @@ def make_env(parallel=False, observation_norm_state_dict=None):
 # in similar algorithms.
 #
 # Functionalizing modules
-# ^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~
 #
 # One of the features of torchrl is its usage of functional modules: as the
 # same architecture is often used with multiple sets of parameters (e.g.
@@ -401,40 +307,12 @@ def make_model(dummy_env):
     return factor, actor, actor_explore, params, params_target
 
 
-(
-    factor,
-    actor,
-    actor_explore,
-    params,
-    params_target,
-) = make_model(test_env)
-
-###############################################################################
-# We represent the parameters and targets as flat structures, but unflattening
-# them is quite easy:
-
-params_flat = params.flatten_keys(".")
-
-###############################################################################
-# We will be using the adam optimizer:
-
-optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-
-###############################################################################
-# We create a test environment for evaluation of the policy:
-
-test_env = make_env(
-    parallel=False, observation_norm_state_dict=observation_norm_state_dict
-)
-# sanity check:
-print(actor_explore(test_env.reset()))
-
 ###############################################################################
 # Collecting and storing data
 # ---------------------------
 #
 # Replay buffers
-# ^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~
 #
 # Replay buffers play a central role in off-policy RL algorithms such as DQN.
 # They constitute the dataset we will be sampling from during training.
@@ -450,14 +328,16 @@ def make_model(dummy_env):
 # The only requirement of this storage is that the data passed to it at write
 # time must always have the same shape.
 
-replay_buffer = TensorDictReplayBuffer(
-    storage=LazyMemmapStorage(buffer_size),
-    prefetch=n_optim,
-)
+def get_replay_buffer(buffer_size, n_optim):
+    replay_buffer = TensorDictReplayBuffer(
+        storage=LazyMemmapStorage(buffer_size),
+        prefetch=n_optim,
+    )
+    return replay_buffer
 
 ###############################################################################
 # Data collector
-# ^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~
 #
 # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
 # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
@@ -485,27 +365,157 @@ def make_model(dummy_env):
 # out training loop must account for. For simplicity, we set the devices to
 # the same value for all sub-collectors.
 
-data_collector = MultiaSyncDataCollector(
-    # ``num_collectors`` collectors, each with an set of `num_workers` environments being run in parallel
-    [
-        make_env(
-            parallel=True, observation_norm_state_dict=observation_norm_state_dict
-        ),
-    ]
-    * num_collectors,
-    policy=actor_explore,
-    frames_per_batch=frames_per_batch,
-    total_frames=total_frames,
-    # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-    exploration_mode="random",
-    # We set the all the devices to be identical. Below is an example of
-    # heterogeneous devices
-    devices=[device] * num_collectors,
-    storing_devices=[device] * num_collectors,
-    # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    split_trajs=False,
+def get_collector(observation_norm_state_dict, num_collectors, actor_explore, frames_per_batch, total_frames, device):
+    data_collector = MultiaSyncDataCollector(
+        [
+            make_env(
+                parallel=True, observation_norm_state_dict=observation_norm_state_dict
+            ),
+        ]
+        * num_collectors,
+        policy=actor_explore,
+        frames_per_batch=frames_per_batch,
+        total_frames=total_frames,
+        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+        exploration_mode="random",
+        # We set the all the devices to be identical. Below is an example of
+        # heterogeneous devices
+        device=device,
+        storing_device=device,
+        split_trajs=False,
+    )
+    return data_collector
+
+
+
+
+###############################################################################
+# Hyperparameters
+# ---------------
+#
+# Let's start with our hyperparameters. The following setting should work well
+# in practice, and the performance of the algorithm should hopefully not be
+# too sensitive to slight variations of these.
+
+device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
+
+# the learning rate of the optimizer
+lr = 2e-3
+# the beta parameters of Adam
+betas = (0.9, 0.999)
+# Optimization steps per batch collected (aka UPD or updates per data)
+n_optim = 8
+
+###############################################################################
+# DQN parameters
+# ~~~~~~~~~~~~~~
+
+###############################################################################
+# gamma decay factor
+gamma = 0.99
+
+###############################################################################
+# lambda decay factor (see second the part with TD(:math:`\lambda`)
+lmbda = 0.95
+
+###############################################################################
+# Smooth target network update decay parameter.
+# This loosely corresponds to a 1/(1-tau) interval with hard target network
+# update
+tau = 0.005
+
+###############################################################################
+# Data collection and replay buffer
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Values to be used for proper training have been commented.
+#
+# Total frames collected in the environment. In other implementations, the
+# user defines a maximum number of episodes.
+# This is harder to do with our data collectors since they return batches
+# of N collected frames, where N is a constant.
+# However, one can easily get the same restriction on number of episodes by
+# breaking the training loop when a certain number
+# episodes has been collected.
+total_frames = 5000  # 500000
+
+###############################################################################
+# Random frames used to initialize the replay buffer.
+init_random_frames = 100  # 1000
+
+###############################################################################
+# Frames in each batch collected.
+frames_per_batch = 32  # 128
+
+###############################################################################
+# Frames sampled from the replay buffer at each optimization step
+batch_size = 32  # 256
+
+###############################################################################
+# Size of the replay buffer in terms of frames
+buffer_size = min(total_frames, 100000)
+
+###############################################################################
+# Number of environments run in parallel in each data collector
+num_workers = 2  # 8
+num_collectors = 2  # 4
+
+
+###############################################################################
+# Environment and exploration
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We set the initial and final value of the epsilon factor in Epsilon-greedy
+# exploration.
+# Since our policy is deterministic, exploration is crucial: without it, the
+# only source of randomness would be the environment reset.
+
+eps_greedy_val = 0.1
+eps_greedy_val_env = 0.005
+
+###############################################################################
+# To speed up learning, we set the bias of the last layer of our value network
+# to a predefined value (this is not mandatory)
+init_bias = 2.0
+
+###############################################################################
+# .. note::
+#   For fast rendering of the tutorial ``total_frames`` hyperparameter
+#   was set to a very low number. To get a reasonable performance, use a greater
+#   value e.g. 500000
+#
+
+
+(
+    factor,
+    actor,
+    actor_explore,
+    params,
+    params_target,
+) = make_model(test_env)
+
+###############################################################################
+# We represent the parameters and targets as flat structures, but unflattening
+# them is quite easy:
+
+params_flat = params.flatten_keys(".")
+
+###############################################################################
+# We will be using the adam optimizer:
+
+optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
+
+###############################################################################
+# We create a test environment for evaluation of the policy:
+
+test_env = make_env(
+    parallel=False, observation_norm_state_dict=observation_norm_state_dict
 )
+# sanity check:
+print(actor_explore(test_env.reset()))
 
 ###############################################################################
 # Training loop of a regular DQN

From b330b166384f4e7405af150f13be1fc7e5e6da7f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 21 Mar 2023 13:26:26 +0000
Subject: [PATCH 06/89] edit training dqn

---
 tutorials/sphinx-tutorials/coding_dqn.py | 1928 +++++++++++-----------
 1 file changed, 961 insertions(+), 967 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 331128eee9a..cc6bc232513 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -76,982 +76,976 @@
 # to provide a high-level illustration of TorchRL features in the context
 # of this algorithm.
 
-# sphinx_gallery_start_ignore
-import warnings
-from collections import defaultdict
-
-warnings.filterwarnings("ignore")
-# sphinx_gallery_end_ignore
-
-import torch
-import tqdm
-from functorch import vmap
-from matplotlib import pyplot as plt
-from tensordict import TensorDict
-from tensordict.nn import get_functional
-from torch import nn
-from torchrl.collectors import MultiaSyncDataCollector
-from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
-from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.transforms import (
-    CatFrames,
-    CatTensors,
-    Compose,
-    GrayScale,
-    ObservationNorm,
-    Resize,
-    ToTensorImage,
-    TransformedEnv,
-)
-from torchrl.envs.utils import set_exploration_mode, step_mdp
-from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+if __name__ == "__main__":
+    # sphinx_gallery_start_ignore
+    import warnings
+    from collections import defaultdict
+
+    from torchrl.objectives import DQNLoss
+    from torchrl.trainers import Trainer
+
+    warnings.filterwarnings("ignore")
+    # sphinx_gallery_end_ignore
+
+    import torch
+    import tqdm
+    from functorch import vmap
+    from matplotlib import pyplot as plt
+    from tensordict import TensorDict
+    from tensordict.nn import get_functional
+    from torch import nn
+    from torchrl.collectors import MultiaSyncDataCollector
+    from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
+    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+    from torchrl.envs.libs.gym import GymEnv
+    from torchrl.envs.transforms import (
+        CatFrames,
+        CatTensors,
+        Compose,
+        GrayScale,
+        ObservationNorm,
+        Resize,
+        ToTensorImage,
+        TransformedEnv,
+    )
+    from torchrl.envs.utils import set_exploration_mode, step_mdp
+    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
 
 
-def is_notebook() -> bool:
-    try:
-        shell = get_ipython().__class__.__name__
-        if shell == "ZMQInteractiveShell":
-            return True  # Jupyter notebook or qtconsole
-        elif shell == "TerminalInteractiveShell":
-            return False  # Terminal running IPython
+    def is_notebook() -> bool:
+        try:
+            shell = get_ipython().__class__.__name__
+            if shell == "ZMQInteractiveShell":
+                return True  # Jupyter notebook or qtconsole
+            elif shell == "TerminalInteractiveShell":
+                return False  # Terminal running IPython
+            else:
+                return False  # Other type (?)
+        except NameError:
+            return False  # Probably standard Python interpreter
+
+
+    ###############################################################################
+    # Building the environment
+    # ------------------------
+    #
+    # Our environment builder has two arguments:
+    #
+    # - ``parallel``: determines whether multiple environments have to be run in
+    #   parallel. We stack the transforms after the
+    #   :class:`torchrl.envs.ParallelEnv` to take advantage
+    #   of vectorization of the operations on device, although this would
+    #   technically work with every single environment attached to its own set of
+    #   transforms.
+    # - ``observation_norm_state_dict`` will contain the normalizing constants for
+    #   the :class:`torchrl.envs.ObservationNorm` tranform.
+    #
+    # We will be using five transforms:
+    #
+    # - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
+    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+    #   ``[C, W, H]``;
+    # - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
+    # - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
+    # - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
+    # - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
+    #   successive frames (``N=4``) in a single tensor along the channel dimension.
+    #   This is useful as a single image does not carry information about the
+    #   motion of the cartpole. Some memory about past observations and actions
+    #   is needed, either via a recurrent neural network or using a stack of
+    #   frames.
+    # - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
+    #   given some custom summary statistics.
+    #
+
+
+    def make_env(parallel=False, observation_norm_state_dict=None, frame_skip=1):
+        if observation_norm_state_dict is None:
+            observation_norm_state_dict = {"standard_normal": True}
+        if parallel:
+            base_env = ParallelEnv(
+                num_workers,
+                EnvCreator(
+                    lambda: GymEnv(
+                        "CartPole-v1", from_pixels=True, pixels_only=True, device=device, frame_skip=frame_skip
+                    )
+                ),
+            )
         else:
-            return False  # Other type (?)
-    except NameError:
-        return False  # Probably standard Python interpreter
-
-
-###############################################################################
-# Building the environment
-# ------------------------
-#
-# Our environment builder has two arguments:
-#
-# - ``parallel``: determines whether multiple environments have to be run in
-#   parallel. We stack the transforms after the
-#   :class:`torchrl.envs.ParallelEnv` to take advantage
-#   of vectorization of the operations on device, although this would
-#   technically work with every single environment attached to its own set of
-#   transforms.
-# - ``observation_norm_state_dict`` will contain the normalizing constants for
-#   the :class:`torchrl.envs.ObservationNorm` tranform.
-#
-# We will be using five transforms:
-#
-# - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
-#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-#   ``[C, W, H]``;
-# - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
-# - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
-# - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
-# - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
-#   successive frames (``N=4``) in a single tensor along the channel dimension.
-#   This is useful as a single image does not carry information about the
-#   motion of the cartpole. Some memory about past observations and actions
-#   is needed, either via a recurrent neural network or using a stack of
-#   frames.
-# - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
-#   given some custom summary statistics.
-#
-
+            base_env = GymEnv(
+                "CartPole-v1", from_pixels=True, pixels_only=True, device=device, frame_skip=frame_skip,
+            )
 
-def make_env(parallel=False, observation_norm_state_dict=None):
-    if observation_norm_state_dict is None:
-        observation_norm_state_dict = {"standard_normal": True}
-    if parallel:
-        base_env = ParallelEnv(
-            num_workers,
-            EnvCreator(
-                lambda: GymEnv(
-                    "CartPole-v1", from_pixels=True, pixels_only=True, device=device
-                )
+        env = TransformedEnv(
+            base_env,
+            Compose(
+                StepCounter(),  # to count the steps of each trajectory
+                ToTensorImage(),
+                RewardScaling(loc=0.0, scale=0.1),
+                GrayScale(),
+                Resize(64, 64),
+                CatFrames(4, in_keys=["pixels"], dim=-3),
+                ObservationNorm(in_keys=["pixels"], **observation_norm_state_dict),
             ),
         )
-    else:
-        base_env = GymEnv(
-            "CartPole-v1", from_pixels=True, pixels_only=True, device=device
-        )
-
-    env = TransformedEnv(
-        base_env,
-        Compose(
-            StepCounter(),  # to count the steps of each trajectory
-            ToTensorImage(),
-            RewardScaling(loc=0.0, scale=0.1),
-            GrayScale(),
-            Resize(64, 64),
-            CatFrames(4, in_keys=["pixels"], dim=-3),
-            ObservationNorm(in_keys=["pixels"], **observation_norm_state_dict),
-        ),
-    )
-    return env
-
-
-###############################################################################
-# Compute normalizing constants
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# To normalize images, we don't want to normalize each pixel independently
-# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-# shaped loc and scale parameters. We will be using the ``reduce_dim`` argument
-# of :func:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-# not all dimensions disappear in the process:
-
-def get_norm_const():
-    test_env = make_env()
-    test_env.transform[-1].init_stats(
-    num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
-)
-    observation_norm_state_dict = test_env.transform[-1].state_dict()
-    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-    print(observation_norm_state_dict)
-    return observation_norm_state_dict
-
-###############################################################################
-# Building the model (Deep Q-network)
-# -----------------------------------
-#
-# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-# object which is a simple CNN followed by a two-layer MLP. The only trick used
-# here is that the action values (i.e. left and right action value) are
-# computed using
-#
-# .. math::
-#
-#    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-#
-# where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
-# :math:`\# obs \rightarrow num_actions` function.
-#
-# Our network is wrapped in a :class:`torchrl.modules.QValueActor`, which will read the state-action
-# values, pick up the one with the maximum value and write all those results
-# in the input :class:`tensordict.TensorDict`.
-#
-# Target parameters
-# ~~~~~~~~~~~~~~~~~
-#
-# Many off-policy RL algorithms use the concept of "target parameters" when it
-# comes to estimate the value of the ``t+1`` state or state-action pair.
-# The target parameters are lagged copies of the model parameters. Because
-# their predictions mismatch those of the current model configuration, they
-# help learning by putting a pessimistic bound on the value being estimated.
-# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-# in similar algorithms.
-#
-# Functionalizing modules
-# ~~~~~~~~~~~~~~~~~~~~~~~
-#
-# One of the features of torchrl is its usage of functional modules: as the
-# same architecture is often used with multiple sets of parameters (e.g.
-# trainable and target parameters), we functionalize the modules and isolate
-# the various sets of parameters in separate tensordicts.
-#
-# To this aim, we use :func:`tensordict.nn.get_functional`, which augments
-# our modules with some extra feature that make them compatible with parameters
-# passed in the ``TensorDict`` format.
-
-
-def make_model(dummy_env):
-    cnn_kwargs = {
-        "num_cells": [32, 64, 64],
-        "kernel_sizes": [6, 4, 3],
-        "strides": [2, 2, 1],
-        "activation_class": nn.ELU,
-        # This can be used to reduce the size of the last layer of the CNN
-        # "squeeze_output": True,
-        # "aggregator_class": nn.AdaptiveAvgPool2d,
-        # "aggregator_kwargs": {"output_size": (1, 1)},
-    }
-    mlp_kwargs = {
-        "depth": 2,
-        "num_cells": [
-            64,
-            64,
-        ],
-        "activation_class": nn.ELU,
-    }
-    net = DuelingCnnDQNet(
-        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-    ).to(device)
-    net.value[-1].bias.data.fill_(init_bias)
-
-    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
-    # init actor: because the model is composed of lazy conv/linear layers,
-    # we must pass a fake batch of data through it to instantiate them.
-    tensordict = dummy_env.fake_tensordict()
-    actor(tensordict)
-
-    # Make functional:
-    # here's an explicit way of creating the parameters and buffer tensordict.
-    # Alternatively, we could have used `params = make_functional(actor)` from
-    # tensordict.nn
-    params = TensorDict({k: v for k, v in actor.named_parameters()}, [])
-    buffers = TensorDict({k: v for k, v in actor.named_buffers()}, [])
-    params = params.update(buffers)
-    params = params.unflatten_keys(".")  # creates a nested TensorDict
-    factor = get_functional(actor)
-
-    # creating the target parameters is fairly easy with tensordict:
-    params_target = params.clone().detach()
-
-    # we wrap our actor in an EGreedyWrapper for data collection
-    actor_explore = EGreedyWrapper(
-        actor,
-        annealing_num_steps=total_frames,
-        eps_init=eps_greedy_val,
-        eps_end=eps_greedy_val_env,
-    )
-
-    return factor, actor, actor_explore, params, params_target
-
-
-###############################################################################
-# Collecting and storing data
-# ---------------------------
-#
-# Replay buffers
-# ~~~~~~~~~~~~~~
-#
-# Replay buffers play a central role in off-policy RL algorithms such as DQN.
-# They constitute the dataset we will be sampling from during training.
-#
-# Here, we will use a regular sampling strategy, although a prioritized RB
-# could improve the performance significantly.
-#
-# We place the storage on disk using
-# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-# storage is created in a lazy manner: it will only be instantiated once the
-# first batch of data is passed to it.
-#
-# The only requirement of this storage is that the data passed to it at write
-# time must always have the same shape.
-
-def get_replay_buffer(buffer_size, n_optim):
-    replay_buffer = TensorDictReplayBuffer(
-        storage=LazyMemmapStorage(buffer_size),
-        prefetch=n_optim,
-    )
-    return replay_buffer
-
-###############################################################################
-# Data collector
-# ~~~~~~~~~~~~~~
-#
-# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
-# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
-# a data collector as a dataloader in the outer loop.
-#
-# We choose the following configuration: we will be running a series of
-# parallel environments synchronously in parallel in different collectors,
-# themselves running in parallel but asynchronously.
-# The advantage of this configuration is that we can balance the amount of
-# compute that is executed in batch with what we want to be executed
-# asynchronously. We encourage the reader to experiment how the collection
-# speed is impacted by modifying the number of collectors (ie the number of
-# environment constructors passed to the collector) and the number of
-# environment executed in parallel in each collector (controlled by the
-# ``num_workers`` hyperparameter).
-#
-# When building the collector, we can choose on which device we want the
-# environment and policy to execute the operations through the ``device``
-# keyword argument. The ``storing_devices`` argument will modify the
-# location of the data being collected: if the batches that we are gathering
-# have a considerable size, we may want to store them on a different location
-# than the device where the computation is happening. For asynchronous data
-# collectors such as ours, different storing devices mean that the data that
-# we collect won't sit on the same device each time, which is something that
-# out training loop must account for. For simplicity, we set the devices to
-# the same value for all sub-collectors.
-
-def get_collector(observation_norm_state_dict, num_collectors, actor_explore, frames_per_batch, total_frames, device):
-    data_collector = MultiaSyncDataCollector(
-        [
-            make_env(
-                parallel=True, observation_norm_state_dict=observation_norm_state_dict
-            ),
-        ]
-        * num_collectors,
-        policy=actor_explore,
-        frames_per_batch=frames_per_batch,
-        total_frames=total_frames,
-        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-        exploration_mode="random",
-        # We set the all the devices to be identical. Below is an example of
-        # heterogeneous devices
-        device=device,
-        storing_device=device,
-        split_trajs=False,
+        return env
+
+
+    ###############################################################################
+    # Compute normalizing constants
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # To normalize images, we don't want to normalize each pixel independently
+    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+    # shaped loc and scale parameters. We will be using the ``reduce_dim`` argument
+    # of :func:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+    # not all dimensions disappear in the process:
+
+    def get_norm_stats():
+        test_env = make_env()
+        test_env.transform[-1].init_stats(
+        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
     )
-    return data_collector
-
-
-
-
-###############################################################################
-# Hyperparameters
-# ---------------
-#
-# Let's start with our hyperparameters. The following setting should work well
-# in practice, and the performance of the algorithm should hopefully not be
-# too sensitive to slight variations of these.
-
-device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-###############################################################################
-# Optimizer
-# ~~~~~~~~~
-
-# the learning rate of the optimizer
-lr = 2e-3
-# the beta parameters of Adam
-betas = (0.9, 0.999)
-# Optimization steps per batch collected (aka UPD or updates per data)
-n_optim = 8
-
-###############################################################################
-# DQN parameters
-# ~~~~~~~~~~~~~~
-
-###############################################################################
-# gamma decay factor
-gamma = 0.99
-
-###############################################################################
-# lambda decay factor (see second the part with TD(:math:`\lambda`)
-lmbda = 0.95
-
-###############################################################################
-# Smooth target network update decay parameter.
-# This loosely corresponds to a 1/(1-tau) interval with hard target network
-# update
-tau = 0.005
-
-###############################################################################
-# Data collection and replay buffer
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Values to be used for proper training have been commented.
-#
-# Total frames collected in the environment. In other implementations, the
-# user defines a maximum number of episodes.
-# This is harder to do with our data collectors since they return batches
-# of N collected frames, where N is a constant.
-# However, one can easily get the same restriction on number of episodes by
-# breaking the training loop when a certain number
-# episodes has been collected.
-total_frames = 5000  # 500000
-
-###############################################################################
-# Random frames used to initialize the replay buffer.
-init_random_frames = 100  # 1000
-
-###############################################################################
-# Frames in each batch collected.
-frames_per_batch = 32  # 128
-
-###############################################################################
-# Frames sampled from the replay buffer at each optimization step
-batch_size = 32  # 256
-
-###############################################################################
-# Size of the replay buffer in terms of frames
-buffer_size = min(total_frames, 100000)
-
-###############################################################################
-# Number of environments run in parallel in each data collector
-num_workers = 2  # 8
-num_collectors = 2  # 4
-
-
-###############################################################################
-# Environment and exploration
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# We set the initial and final value of the epsilon factor in Epsilon-greedy
-# exploration.
-# Since our policy is deterministic, exploration is crucial: without it, the
-# only source of randomness would be the environment reset.
-
-eps_greedy_val = 0.1
-eps_greedy_val_env = 0.005
-
-###############################################################################
-# To speed up learning, we set the bias of the last layer of our value network
-# to a predefined value (this is not mandatory)
-init_bias = 2.0
-
-###############################################################################
-# .. note::
-#   For fast rendering of the tutorial ``total_frames`` hyperparameter
-#   was set to a very low number. To get a reasonable performance, use a greater
-#   value e.g. 500000
-#
-
-
-(
-    factor,
-    actor,
-    actor_explore,
-    params,
-    params_target,
-) = make_model(test_env)
-
-###############################################################################
-# We represent the parameters and targets as flat structures, but unflattening
-# them is quite easy:
-
-params_flat = params.flatten_keys(".")
-
-###############################################################################
-# We will be using the adam optimizer:
-
-optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-
-###############################################################################
-# We create a test environment for evaluation of the policy:
-
-test_env = make_env(
-    parallel=False, observation_norm_state_dict=observation_norm_state_dict
-)
-# sanity check:
-print(actor_explore(test_env.reset()))
-
-###############################################################################
-# Training loop of a regular DQN
-# ------------------------------
-#
-# We'll start with a simple implementation of DQN where the returns are
-# computed without bootstrapping, i.e.
-#
-# .. math::
-#
-#       Q_{t}(s, a) = R(s, a) + \gamma * V_{t+1}(s)
-#
-# where :math:`Q(s, a)` is the Q-value of the current state-action pair,
-# :math:`R(s, a)` is the result of the reward function, and :math:`V(s)` is a
-# value function that returns 0 for terminating states.
-#
-# We store the logs in a defaultdict:
-
-logs_exp1 = defaultdict(list)
-prev_traj_count = 0
-
-pbar = tqdm.tqdm(total=total_frames)
-for j, data in enumerate(data_collector):
-    current_frames = data.numel()
-    pbar.update(current_frames)
-    data = data.view(-1)
-
-    # We store the values on the replay buffer, after placing them on CPU.
-    # When called for the first time, this will instantiate our storage
-    # object which will print its content.
-    replay_buffer.extend(data.cpu())
-
-    # some logging
-    if len(logs_exp1["frames"]):
-        logs_exp1["frames"].append(current_frames + logs_exp1["frames"][-1])
-    else:
-        logs_exp1["frames"].append(current_frames)
-
-    if data["next", "done"].any():
-        done = data["next", "done"].squeeze(-1)
-        logs_exp1["traj_lengths"].append(
-            data["next", "step_count"][done].float().mean().item()
+        observation_norm_state_dict = test_env.transform[-1].state_dict()
+        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+        print(observation_norm_state_dict)
+        return observation_norm_state_dict
+
+    ###############################################################################
+    # Building the model (Deep Q-network)
+    # -----------------------------------
+    #
+    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+    # object which is a simple CNN followed by a two-layer MLP. The only trick used
+    # here is that the action values (i.e. left and right action value) are
+    # computed using
+    #
+    # .. math::
+    #
+    #    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+    #
+    # where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
+    # :math:`\# obs \rightarrow num_actions` function.
+    #
+    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`, which will read the state-action
+    # values, pick up the one with the maximum value and write all those results
+    # in the input :class:`tensordict.TensorDict`.
+    #
+    # Target parameters
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Many off-policy RL algorithms use the concept of "target parameters" when it
+    # comes to estimate the value of the ``t+1`` state or state-action pair.
+    # The target parameters are lagged copies of the model parameters. Because
+    # their predictions mismatch those of the current model configuration, they
+    # help learning by putting a pessimistic bound on the value being estimated.
+    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+    # in similar algorithms.
+    #
+
+    def make_model(dummy_env):
+        cnn_kwargs = {
+            "num_cells": [32, 64, 64],
+            "kernel_sizes": [6, 4, 3],
+            "strides": [2, 2, 1],
+            "activation_class": nn.ELU,
+            # This can be used to reduce the size of the last layer of the CNN
+            # "squeeze_output": True,
+            # "aggregator_class": nn.AdaptiveAvgPool2d,
+            # "aggregator_kwargs": {"output_size": (1, 1)},
+        }
+        mlp_kwargs = {
+            "depth": 2,
+            "num_cells": [
+                64,
+                64,
+            ],
+            "activation_class": nn.ELU,
+        }
+        net = DuelingCnnDQNet(
+            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+        ).to(device)
+        net.value[-1].bias.data.fill_(init_bias)
+
+        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
+        # init actor: because the model is composed of lazy conv/linear layers,
+        # we must pass a fake batch of data through it to instantiate them.
+        tensordict = dummy_env.fake_tensordict()
+        actor(tensordict)
+
+        # we wrap our actor in an EGreedyWrapper for data collection
+        actor_explore = EGreedyWrapper(
+            actor,
+            annealing_num_steps=total_frames,
+            eps_init=eps_greedy_val,
+            eps_end=eps_greedy_val_env,
         )
 
-    # check that we have enough data to start training
-    if sum(logs_exp1["frames"]) > init_random_frames:
-        for _ in range(n_optim):
-            # sample from the RB and send to device
-            sampled_data = replay_buffer.sample(batch_size)
-            sampled_data = sampled_data.to(device, non_blocking=True)
-
-            # collect data from RB
-            reward = sampled_data["next", "reward"].squeeze(-1)
-            done = sampled_data["next", "done"].squeeze(-1).to(reward.dtype)
-            action = sampled_data["action"].clone()
-
-            # Compute action value (of the action actually taken) at time t
-            # By default, TorchRL uses one-hot encodings for discrete actions
-            sampled_data_out = sampled_data.select(*actor.in_keys)
-            sampled_data_out = factor(sampled_data_out, params=params)
-            action_value = sampled_data_out["action_value"]
-            action_value = (action_value * action.to(action_value.dtype)).sum(-1)
-            with torch.no_grad():
-                # compute best action value for the next step, using target parameters
-                tdstep = step_mdp(sampled_data)
-                next_value = factor(
-                    tdstep.select(*actor.in_keys),
-                    params=params_target,
-                )["chosen_action_value"].squeeze(-1)
-                exp_value = reward + gamma * next_value * (1 - done)
-            assert exp_value.shape == action_value.shape
-            # we use MSE loss but L1 or smooth L1 should also work
-            error = nn.functional.mse_loss(exp_value, action_value).mean()
-            error.backward()
-
-            gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-
-            optim.step()
-            optim.zero_grad()
-
-            # update of the target parameters
-            params_target.apply(
-                lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-                params.detach(),
-                inplace=True,
-            )
-
-        actor_explore.step(current_frames)
-
-        # Logging
-        logs_exp1["grad_vals"].append(float(gv))
-        logs_exp1["losses"].append(error.item())
-        logs_exp1["values"].append(action_value.mean().item())
-        logs_exp1["traj_count"].append(
-            prev_traj_count + data["next", "done"].sum().item()
+        return actor, actor_explore
+
+
+    ###############################################################################
+    # Collecting and storing data
+    # ---------------------------
+    #
+    # Replay buffers
+    # ~~~~~~~~~~~~~~
+    #
+    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
+    # They constitute the dataset we will be sampling from during training.
+    #
+    # Here, we will use a regular sampling strategy, although a prioritized RB
+    # could improve the performance significantly.
+    #
+    # We place the storage on disk using
+    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+    # storage is created in a lazy manner: it will only be instantiated once the
+    # first batch of data is passed to it.
+    #
+    # The only requirement of this storage is that the data passed to it at write
+    # time must always have the same shape.
+
+    def get_replay_buffer(buffer_size, n_optim):
+        replay_buffer = TensorDictReplayBuffer(
+            storage=LazyMemmapStorage(buffer_size),
+            prefetch=n_optim,
         )
-        prev_traj_count = logs_exp1["traj_count"][-1]
-
-        if j % 10 == 0:
-            with set_exploration_mode("mode"), torch.no_grad():
-                # execute a rollout. The `set_exploration_mode("mode")` has no effect here since the policy is deterministic, but we add it for completeness
-                eval_rollout = test_env.rollout(
-                    max_steps=10000,
-                    policy=actor,
-                ).cpu()
-            logs_exp1["traj_lengths_eval"].append(eval_rollout.shape[-1])
-            logs_exp1["evals"].append(eval_rollout["next", "reward"].sum().item())
-            if len(logs_exp1["mavgs"]):
-                logs_exp1["mavgs"].append(
-                    logs_exp1["evals"][-1] * 0.05 + logs_exp1["mavgs"][-1] * 0.95
-                )
-            else:
-                logs_exp1["mavgs"].append(logs_exp1["evals"][-1])
-            logs_exp1["traj_count_eval"].append(logs_exp1["traj_count"][-1])
-            pbar.set_description(
-                f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp1['evals'][-1]: 4.4f}"
-            )
-
-    # update policy weights
-    data_collector.update_policy_weights_()
-
-###############################################################################
-# We write a custom plot function to display the performance of our algorithm
-#
-
-
-def plot(logs, name):
-    plt.figure(figsize=(15, 10))
-    plt.subplot(2, 3, 1)
-    plt.plot(
-        logs["frames"][-len(logs["evals"]) :],
-        logs["evals"],
-        label="return (eval)",
-    )
-    plt.plot(
-        logs["frames"][-len(logs["mavgs"]) :],
-        logs["mavgs"],
-        label="mavg of returns (eval)",
-    )
-    plt.xlabel("frames collected")
-    plt.ylabel("trajectory length (= return)")
-    plt.subplot(2, 3, 2)
-    plt.plot(
-        logs["traj_count"][-len(logs["evals"]) :],
-        logs["evals"],
-        label="return",
-    )
-    plt.plot(
-        logs["traj_count"][-len(logs["mavgs"]) :],
-        logs["mavgs"],
-        label="mavg",
-    )
-    plt.xlabel("trajectories collected")
-    plt.legend()
-    plt.subplot(2, 3, 3)
-    plt.plot(logs["frames"][-len(logs["losses"]) :], logs["losses"])
-    plt.xlabel("frames collected")
-    plt.title("loss")
-    plt.subplot(2, 3, 4)
-    plt.plot(logs["frames"][-len(logs["values"]) :], logs["values"])
-    plt.xlabel("frames collected")
-    plt.title("value")
-    plt.subplot(2, 3, 5)
-    plt.plot(
-        logs["frames"][-len(logs["grad_vals"]) :],
-        logs["grad_vals"],
-    )
-    plt.xlabel("frames collected")
-    plt.title("grad norm")
-    if len(logs["traj_lengths"]):
-        plt.subplot(2, 3, 6)
-        plt.plot(logs["traj_lengths"])
-        plt.xlabel("batches")
-        plt.title("traj length (training)")
-    plt.savefig(name)
-    if is_notebook():
-        plt.show()
-
-
-###############################################################################
-# The performance of the policy can be measured as the length of trajectories.
-# As we can see on the results of the :func:`plot` function, the performance
-# of the policy increases, albeit slowly.
-#
-# .. code-block:: python
-#
-#    plot(logs_exp1, "dqn_td0.png")
-#
-# .. figure:: /_static/img/dqn_td0.png
-#    :alt: Cart Pole results with TD(0)
-#
-
-print("shutting down")
-data_collector.shutdown()
-del data_collector
-
-###############################################################################
-# DQN with TD(:math:`\lambda`)
-# ----------------------------
-#
-# We can improve the above algorithm by getting a better estimate of the
-# return, using not only the next state value but the whole sequence of rewards
-# and values that follow a particular step.
-#
-# TorchRL provides a vectorized version of TD(lambda) named
-# :func:`torchrl.objectives.value.functional.vec_td_lambda_advantage_estimate`.
-# We'll use this to obtain a target value that the value network will be
-# trained to match.
-#
-# The big difference in this implementation is that we'll store entire
-# trajectories and not single steps in the replay buffer. This will be done
-# automatically as long as we're not "flattening" the tensordict collected:
-# by keeping a shape ``[Batch x timesteps]`` and giving this
-# to the RB, we'll be creating a replay buffer of size
-# ``[Capacity x timesteps]``.
-
-
-from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
-
-###############################################################################
-# We reset the actor parameters:
-#
-
-(
-    factor,
-    actor,
-    actor_explore,
-    params,
-    params_target,
-) = make_model(test_env)
-params_flat = params.flatten_keys(".")
-
-optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-test_env = make_env(
-    parallel=False, observation_norm_state_dict=observation_norm_state_dict
-)
-print(actor_explore(test_env.reset()))
-
-###############################################################################
-# Data: Replay buffer and collector
-# ---------------------------------
-#
-# We need to build a new replay buffer of the appropriate size:
-#
-
-max_size = frames_per_batch // num_workers
-
-replay_buffer = TensorDictReplayBuffer(
-    storage=LazyMemmapStorage(-(-buffer_size // max_size)),
-    prefetch=n_optim,
-)
-
-data_collector = MultiaSyncDataCollector(
-    [
-        make_env(
-            parallel=True, observation_norm_state_dict=observation_norm_state_dict
-        ),
-    ]
-    * num_collectors,
-    policy=actor_explore,
-    frames_per_batch=frames_per_batch,
-    total_frames=total_frames,
-    exploration_mode="random",
-    devices=[device] * num_collectors,
-    storing_devices=[device] * num_collectors,
-    # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    split_trajs=False,
-)
-
-
-logs_exp2 = defaultdict(list)
-prev_traj_count = 0
-
-###############################################################################
-# Training loop
-# -------------
-#
-# There are very few differences with the training loop above:
-#
-# - The tensordict received by the collector is used as-is, without being
-#   flattened (recall the ``data.view(-1)`` above), to keep the temporal
-#   relation between consecutive steps.
-# - We use :func:`vec_td_lambda_advantage_estimate` to compute the target
-#   value.
-
-pbar = tqdm.tqdm(total=total_frames)
-for j, data in enumerate(data_collector):
-    current_frames = data.numel()
-    pbar.update(current_frames)
-
-    replay_buffer.extend(data.cpu())
-    if len(logs_exp2["frames"]):
-        logs_exp2["frames"].append(current_frames + logs_exp2["frames"][-1])
-    else:
-        logs_exp2["frames"].append(current_frames)
-
-    if data["next", "done"].any():
-        done = data["next", "done"].squeeze(-1)
-        logs_exp2["traj_lengths"].append(
-            data["next", "step_count"][done].float().mean().item()
+        return replay_buffer
+
+    ###############################################################################
+    # Data collector
+    # ~~~~~~~~~~~~~~
+    #
+    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
+    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
+    # a data collector as a dataloader in the outer loop.
+    #
+    # We choose the following configuration: we will be running a series of
+    # parallel environments synchronously in parallel in different collectors,
+    # themselves running in parallel but asynchronously.
+    # The advantage of this configuration is that we can balance the amount of
+    # compute that is executed in batch with what we want to be executed
+    # asynchronously. We encourage the reader to experiment how the collection
+    # speed is impacted by modifying the number of collectors (ie the number of
+    # environment constructors passed to the collector) and the number of
+    # environment executed in parallel in each collector (controlled by the
+    # ``num_workers`` hyperparameter).
+    #
+    # When building the collector, we can choose on which device we want the
+    # environment and policy to execute the operations through the ``device``
+    # keyword argument. The ``storing_devices`` argument will modify the
+    # location of the data being collected: if the batches that we are gathering
+    # have a considerable size, we may want to store them on a different location
+    # than the device where the computation is happening. For asynchronous data
+    # collectors such as ours, different storing devices mean that the data that
+    # we collect won't sit on the same device each time, which is something that
+    # out training loop must account for. For simplicity, we set the devices to
+    # the same value for all sub-collectors.
+
+    def get_collector(observation_norm_state_dict, num_collectors, actor_explore, frames_per_batch, total_frames, device):
+        data_collector = MultiaSyncDataCollector(
+            [
+                make_env(
+                    parallel=True, observation_norm_state_dict=observation_norm_state_dict
+                ),
+            ]
+            * num_collectors,
+            policy=actor_explore,
+            frames_per_batch=frames_per_batch,
+            total_frames=total_frames,
+            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+            exploration_mode="random",
+            # We set the all the devices to be identical. Below is an example of
+            # heterogeneous devices
+            device=device,
+            storing_device=device,
+            split_trajs=False,
         )
-
-    if sum(logs_exp2["frames"]) > init_random_frames:
-        for _ in range(n_optim):
-            sampled_data = replay_buffer.sample(batch_size // max_size)
-            sampled_data = sampled_data.clone().to(device, non_blocking=True)
-
-            reward = sampled_data["next", "reward"]
-            done = sampled_data["next", "done"].to(reward.dtype)
-            action = sampled_data["action"].clone()
-
-            sampled_data_out = sampled_data.select(*actor.in_keys)
-            sampled_data_out = vmap(factor, (0, None))(sampled_data_out, params)
-            action_value = sampled_data_out["action_value"]
-            action_value = (action_value * action.to(action_value.dtype)).sum(-1, True)
-            with torch.no_grad():
-                tdstep = step_mdp(sampled_data)
-                next_value = vmap(factor, (0, None))(
-                    tdstep.select(*actor.in_keys), params
-                )
-                next_value = next_value["chosen_action_value"]
-            error = vec_td_lambda_advantage_estimate(
-                gamma,
-                lmbda,
-                action_value,
-                next_value,
-                reward,
-                done,
-            ).pow(2)
-            error = error.mean()
-            error.backward()
-
-            gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-
-            optim.step()
-            optim.zero_grad()
-
-            # update of the target parameters
-            params_target.apply(
-                lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-                params.detach(),
-                inplace=True,
-            )
-
-        actor_explore.step(current_frames)
-
-        # Logging
-        logs_exp2["grad_vals"].append(float(gv))
-
-        logs_exp2["losses"].append(error.item())
-        logs_exp2["values"].append(action_value.mean().item())
-        logs_exp2["traj_count"].append(
-            prev_traj_count + data["next", "done"].sum().item()
+        return data_collector
+
+
+
+
+    ###############################################################################
+    # Hyperparameters
+    # ---------------
+    #
+    # Let's start with our hyperparameters. The following setting should work well
+    # in practice, and the performance of the algorithm should hopefully not be
+    # too sensitive to slight variations of these.
+
+    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+    ###############################################################################
+    # Optimizer
+    # ~~~~~~~~~
+
+    # the learning rate of the optimizer
+    lr = 2e-3
+    # weight decay
+    wd = 1e-5
+    # the beta parameters of Adam
+    betas = (0.9, 0.999)
+    # Optimization steps per batch collected (aka UPD or updates per data)
+    n_optim = 8
+
+    ###############################################################################
+    # DQN parameters
+    # ~~~~~~~~~~~~~~
+
+    ###############################################################################
+    # gamma decay factor
+    gamma = 0.99
+
+    ###############################################################################
+    # lambda decay factor (see second the part with TD(:math:`\lambda`)
+    lmbda = 0.95
+
+    ###############################################################################
+    # Smooth target network update decay parameter.
+    # This loosely corresponds to a 1/(1-tau) interval with hard target network
+    # update
+    tau = 0.005
+
+    ###############################################################################
+    # Data collection and replay buffer
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # Values to be used for proper training have been commented.
+    #
+    # Total frames collected in the environment. In other implementations, the
+    # user defines a maximum number of episodes.
+    # This is harder to do with our data collectors since they return batches
+    # of N collected frames, where N is a constant.
+    # However, one can easily get the same restriction on number of episodes by
+    # breaking the training loop when a certain number
+    # episodes has been collected.
+    total_frames = 4096  # 500000
+
+    ###############################################################################
+    # Random frames used to initialize the replay buffer.
+    init_random_frames = 100  # 1000
+
+    ###############################################################################
+    # Frames in each batch collected.
+    frames_per_batch = 32  # 128
+
+    ###############################################################################
+    # Frames sampled from the replay buffer at each optimization step
+    batch_size = 32  # 256
+
+    ###############################################################################
+    # Size of the replay buffer in terms of frames
+    buffer_size = min(total_frames, 100000)
+
+    ###############################################################################
+    # Number of environments run in parallel in each data collector
+    num_workers = 2  # 8
+    num_collectors = 2  # 4
+
+
+    ###############################################################################
+    # Environment and exploration
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # We set the initial and final value of the epsilon factor in Epsilon-greedy
+    # exploration.
+    # Since our policy is deterministic, exploration is crucial: without it, the
+    # only source of randomness would be the environment reset.
+
+    eps_greedy_val = 0.1
+    eps_greedy_val_env = 0.005
+
+    ###############################################################################
+    # To speed up learning, we set the bias of the last layer of our value network
+    # to a predefined value (this is not mandatory)
+    init_bias = 2.0
+
+    ###############################################################################
+    # .. note::
+    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
+    #   was set to a very low number. To get a reasonable performance, use a greater
+    #   value e.g. 500000
+    #
+
+    def get_trainer():
+        stats = get_norm_stats()
+        test_env = make_env(parallel=False, observation_norm_state_dict=stats)
+        # Get model
+        actor, actor_explore = make_model(test_env)
+        loss_module = DQNLoss(actor, gamma=0.99)
+        collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
+        optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
+        trainer = Trainer(
+                    collector=collector,
+            total_frames=total_frames,
+            frame_skip=1,
+            loss_module=loss_module,
+            optimizer=optimizer,
+            logger=None,
+            optim_steps_per_batch = n_optim,
         )
-        prev_traj_count = logs_exp2["traj_count"][-1]
-        if j % 10 == 0:
-            with set_exploration_mode("mode"), torch.no_grad():
-                # execute a rollout. The `set_exploration_mode("mode")` has
-                # no effect here since the policy is deterministic, but we add
-                # it for completeness
-                eval_rollout = test_env.rollout(
-                    max_steps=10000,
-                    policy=actor,
-                ).cpu()
-            logs_exp2["traj_lengths_eval"].append(eval_rollout.shape[-1])
-            logs_exp2["evals"].append(eval_rollout["next", "reward"].sum().item())
-            if len(logs_exp2["mavgs"]):
-                logs_exp2["mavgs"].append(
-                    logs_exp2["evals"][-1] * 0.05 + logs_exp2["mavgs"][-1] * 0.95
-                )
-            else:
-                logs_exp2["mavgs"].append(logs_exp2["evals"][-1])
-            logs_exp2["traj_count_eval"].append(logs_exp2["traj_count"][-1])
-            pbar.set_description(
-                f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp2['evals'][-1]: 4.4f}"
-            )
-
-    # update policy weights
-    data_collector.update_policy_weights_()
-
-
-###############################################################################
-# TD(:math:`\lambda`) performs significantly better than TD(0) because it
-# retrieves a much less biased estimate of the state-action value.
-#
-# .. code-block:: python
-#
-#    plot(logs_exp2, "dqn_tdlambda.png")
-#
-# .. figure:: /_static/img/dqn_tdlambda.png
-#    :alt: Cart Pole results with TD(lambda)
-#
-
-
-print("shutting down")
-data_collector.shutdown()
-del data_collector
-
-###############################################################################
-# Let's compare the results on a single plot. Because the TD(lambda) version
-# works better, we'll have fewer episodes collected for a given number of
-# frames (as there are more frames per episode).
-#
-# **Note**: As already mentioned above, to get a more reasonable performance,
-# use a greater value for ``total_frames`` e.g. 500000.
-
-
-def plot_both():
-    frames_td0 = logs_exp1["frames"]
-    frames_tdlambda = logs_exp2["frames"]
-    evals_td0 = logs_exp1["evals"]
-    evals_tdlambda = logs_exp2["evals"]
-    mavgs_td0 = logs_exp1["mavgs"]
-    mavgs_tdlambda = logs_exp2["mavgs"]
-    traj_count_td0 = logs_exp1["traj_count_eval"]
-    traj_count_tdlambda = logs_exp2["traj_count_eval"]
-
-    plt.figure(figsize=(15, 10))
-    plt.subplot(1, 2, 1)
-    plt.plot(frames_td0[-len(evals_td0) :], evals_td0, label="return (td0)", alpha=0.5)
-    plt.plot(
-        frames_tdlambda[-len(evals_tdlambda) :],
-        evals_tdlambda,
-        label="return (td(lambda))",
-        alpha=0.5,
-    )
-    plt.plot(frames_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-    plt.plot(
-        frames_tdlambda[-len(mavgs_tdlambda) :],
-        mavgs_tdlambda,
-        label="mavg (td(lambda))",
-    )
-    plt.xlabel("frames collected")
-    plt.ylabel("trajectory length (= return)")
-
-    plt.subplot(1, 2, 2)
-    plt.plot(
-        traj_count_td0[-len(evals_td0) :],
-        evals_td0,
-        label="return (td0)",
-        alpha=0.5,
-    )
-    plt.plot(
-        traj_count_tdlambda[-len(evals_tdlambda) :],
-        evals_tdlambda,
-        label="return (td(lambda))",
-        alpha=0.5,
-    )
-    plt.plot(traj_count_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-    plt.plot(
-        traj_count_tdlambda[-len(mavgs_tdlambda) :],
-        mavgs_tdlambda,
-        label="mavg (td(lambda))",
-    )
-    plt.xlabel("trajectories collected")
-    plt.legend()
-
-    plt.savefig("dqn.png")
-
-
-###############################################################################
-# .. code-block:: python
-#
-#    plot_both()
-#
-# .. figure:: /_static/img/dqn.png
-#    :alt: Cart Pole results from the TD(:math:`lambda`) trained policy.
-#
-# Finally, we generate a new video to check what the algorithm has learnt.
-# If all goes well, the duration should be significantly longer than with a
-# random rollout.
-#
-# To get the raw pixels of the rollout, we insert a
-# :class:`torchrl.envs.CatTensors` transform that precedes all others and copies
-# the ``"pixels"`` key onto a ``"pixels_save"`` key. This is necessary because
-# the other transforms that modify this key will update its value in-place in
-# the output tensordict.
-#
-
-test_env.transform.insert(0, CatTensors(["pixels"], "pixels_save", del_keys=False))
-eval_rollout = test_env.rollout(max_steps=10000, policy=actor, auto_reset=True).cpu()
-
-# sphinx_gallery_start_ignore
-import imageio
-
-imageio.mimwrite("cartpole.gif", eval_rollout["pixels_save"].numpy(), fps=30)
-# sphinx_gallery_end_ignore
-
-del test_env
-
-###############################################################################
-# The video of the rollout can be saved using the imageio package:
-#
-# .. code-block::
-#
-#   import imageio
-#   imageio.mimwrite('cartpole.mp4', eval_rollout["pixels_save"].numpy(), fps=30);
-#
-# .. figure:: /_static/img/cartpole.gif
-#    :alt: Cart Pole results from the TD(:math:`\lambda`) trained policy.
-
-###############################################################################
-# Conclusion and possible improvements
-# ------------------------------------
-#
-# In this tutorial we have learnt:
-#
-# - How to train a policy that read pixel-based states, what transforms to
-#   include and how to normalize the data;
-# - How to create a policy that picks up the action with the highest value
-#   with :class:`torchrl.modules.QValueNetwork`;
-# - How to build a multiprocessed data collector;
-# - How to train a DQN with TD(:math:`\lambda`) returns.
-#
-# We have seen that using TD(:math:`\lambda`) greatly improved the performance
-# of DQN. Other possible improvements could include:
-#
-# - Using the Multi-Step post-processing. Multi-step will project an action
-#   to the nth following step, and create a discounted sum of the rewards in
-#   between. This trick can make the algorithm noticebly less myopic. To use
-#   this, simply create the collector with
-#
-#       from torchrl.data.postprocs.postprocs import MultiStep
-#       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-#
-#   where ``n`` is the number of looking-forward steps. Pay attention to the
-#   fact that the ``gamma`` factor has to be corrected by the number of
-#   steps till the next observation when being passed to
-#   ``vec_td_lambda_advantage_estimate``:
-#
-#       gamma = gamma ** tensordict["steps_to_next_obs"]
-# - A prioritized replay buffer could also be used. This will give a
-#   higher priority to samples that have the worst value accuracy.
-# - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
-#   for more information).
-# - More fancy exploration techniques, such as NoisyLinear layers and such
-#   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
-#   ``MLP`` class used in our Dueling DQN).
+        return trainer
+
+    trainer = get_trainer()
+    trainer.train()
+
+    # ###############################################################################
+    # # We represent the parameters and targets as flat structures, but unflattening
+    # # them is quite easy:
+    #
+    # params_flat = params.flatten_keys(".")
+    #
+    # ###############################################################################
+    # # We will be using the adam optimizer:
+    #
+    # optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
+    #
+    # ###############################################################################
+    # # We create a test environment for evaluation of the policy:
+    #
+    # test_env = make_env(
+    #     parallel=False, observation_norm_state_dict=observation_norm_state_dict
+    # )
+    # # sanity check:
+    # print(actor_explore(test_env.reset()))
+    #
+    # ###############################################################################
+    # # Training loop of a regular DQN
+    # # ------------------------------
+    # #
+    # # We'll start with a simple implementation of DQN where the returns are
+    # # computed without bootstrapping, i.e.
+    # #
+    # # .. math::
+    # #
+    # #       Q_{t}(s, a) = R(s, a) + \gamma * V_{t+1}(s)
+    # #
+    # # where :math:`Q(s, a)` is the Q-value of the current state-action pair,
+    # # :math:`R(s, a)` is the result of the reward function, and :math:`V(s)` is a
+    # # value function that returns 0 for terminating states.
+    # #
+    # # We store the logs in a defaultdict:
+    #
+    # logs_exp1 = defaultdict(list)
+    # prev_traj_count = 0
+    #
+    # pbar = tqdm.tqdm(total=total_frames)
+    # for j, data in enumerate(data_collector):
+    #     current_frames = data.numel()
+    #     pbar.update(current_frames)
+    #     data = data.view(-1)
+    #
+    #     # We store the values on the replay buffer, after placing them on CPU.
+    #     # When called for the first time, this will instantiate our storage
+    #     # object which will print its content.
+    #     replay_buffer.extend(data.cpu())
+    #
+    #     # some logging
+    #     if len(logs_exp1["frames"]):
+    #         logs_exp1["frames"].append(current_frames + logs_exp1["frames"][-1])
+    #     else:
+    #         logs_exp1["frames"].append(current_frames)
+    #
+    #     if data["next", "done"].any():
+    #         done = data["next", "done"].squeeze(-1)
+    #         logs_exp1["traj_lengths"].append(
+    #             data["next", "step_count"][done].float().mean().item()
+    #         )
+    #
+    #     # check that we have enough data to start training
+    #     if sum(logs_exp1["frames"]) > init_random_frames:
+    #         for _ in range(n_optim):
+    #             # sample from the RB and send to device
+    #             sampled_data = replay_buffer.sample(batch_size)
+    #             sampled_data = sampled_data.to(device, non_blocking=True)
+    #
+    #             # collect data from RB
+    #             reward = sampled_data["next", "reward"].squeeze(-1)
+    #             done = sampled_data["next", "done"].squeeze(-1).to(reward.dtype)
+    #             action = sampled_data["action"].clone()
+    #
+    #             # Compute action value (of the action actually taken) at time t
+    #             # By default, TorchRL uses one-hot encodings for discrete actions
+    #             sampled_data_out = sampled_data.select(*actor.in_keys)
+    #             sampled_data_out = factor(sampled_data_out, params=params)
+    #             action_value = sampled_data_out["action_value"]
+    #             action_value = (action_value * action.to(action_value.dtype)).sum(-1)
+    #             with torch.no_grad():
+    #                 # compute best action value for the next step, using target parameters
+    #                 tdstep = step_mdp(sampled_data)
+    #                 next_value = factor(
+    #                     tdstep.select(*actor.in_keys),
+    #                     params=params_target,
+    #                 )["chosen_action_value"].squeeze(-1)
+    #                 exp_value = reward + gamma * next_value * (1 - done)
+    #             assert exp_value.shape == action_value.shape
+    #             # we use MSE loss but L1 or smooth L1 should also work
+    #             error = nn.functional.mse_loss(exp_value, action_value).mean()
+    #             error.backward()
+    #
+    #             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
+    #
+    #             optim.step()
+    #             optim.zero_grad()
+    #
+    #             # update of the target parameters
+    #             params_target.apply(
+    #                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
+    #                 params.detach(),
+    #                 inplace=True,
+    #             )
+    #
+    #         actor_explore.step(current_frames)
+    #
+    #         # Logging
+    #         logs_exp1["grad_vals"].append(float(gv))
+    #         logs_exp1["losses"].append(error.item())
+    #         logs_exp1["values"].append(action_value.mean().item())
+    #         logs_exp1["traj_count"].append(
+    #             prev_traj_count + data["next", "done"].sum().item()
+    #         )
+    #         prev_traj_count = logs_exp1["traj_count"][-1]
+    #
+    #         if j % 10 == 0:
+    #             with set_exploration_mode("mode"), torch.no_grad():
+    #                 # execute a rollout. The `set_exploration_mode("mode")` has no effect here since the policy is deterministic, but we add it for completeness
+    #                 eval_rollout = test_env.rollout(
+    #                     max_steps=10000,
+    #                     policy=actor,
+    #                 ).cpu()
+    #             logs_exp1["traj_lengths_eval"].append(eval_rollout.shape[-1])
+    #             logs_exp1["evals"].append(eval_rollout["next", "reward"].sum().item())
+    #             if len(logs_exp1["mavgs"]):
+    #                 logs_exp1["mavgs"].append(
+    #                     logs_exp1["evals"][-1] * 0.05 + logs_exp1["mavgs"][-1] * 0.95
+    #                 )
+    #             else:
+    #                 logs_exp1["mavgs"].append(logs_exp1["evals"][-1])
+    #             logs_exp1["traj_count_eval"].append(logs_exp1["traj_count"][-1])
+    #             pbar.set_description(
+    #                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp1['evals'][-1]: 4.4f}"
+    #             )
+    #
+    #     # update policy weights
+    #     data_collector.update_policy_weights_()
+    #
+    # ###############################################################################
+    # # We write a custom plot function to display the performance of our algorithm
+    # #
+    #
+    #
+    # def plot(logs, name):
+    #     plt.figure(figsize=(15, 10))
+    #     plt.subplot(2, 3, 1)
+    #     plt.plot(
+    #         logs["frames"][-len(logs["evals"]) :],
+    #         logs["evals"],
+    #         label="return (eval)",
+    #     )
+    #     plt.plot(
+    #         logs["frames"][-len(logs["mavgs"]) :],
+    #         logs["mavgs"],
+    #         label="mavg of returns (eval)",
+    #     )
+    #     plt.xlabel("frames collected")
+    #     plt.ylabel("trajectory length (= return)")
+    #     plt.subplot(2, 3, 2)
+    #     plt.plot(
+    #         logs["traj_count"][-len(logs["evals"]) :],
+    #         logs["evals"],
+    #         label="return",
+    #     )
+    #     plt.plot(
+    #         logs["traj_count"][-len(logs["mavgs"]) :],
+    #         logs["mavgs"],
+    #         label="mavg",
+    #     )
+    #     plt.xlabel("trajectories collected")
+    #     plt.legend()
+    #     plt.subplot(2, 3, 3)
+    #     plt.plot(logs["frames"][-len(logs["losses"]) :], logs["losses"])
+    #     plt.xlabel("frames collected")
+    #     plt.title("loss")
+    #     plt.subplot(2, 3, 4)
+    #     plt.plot(logs["frames"][-len(logs["values"]) :], logs["values"])
+    #     plt.xlabel("frames collected")
+    #     plt.title("value")
+    #     plt.subplot(2, 3, 5)
+    #     plt.plot(
+    #         logs["frames"][-len(logs["grad_vals"]) :],
+    #         logs["grad_vals"],
+    #     )
+    #     plt.xlabel("frames collected")
+    #     plt.title("grad norm")
+    #     if len(logs["traj_lengths"]):
+    #         plt.subplot(2, 3, 6)
+    #         plt.plot(logs["traj_lengths"])
+    #         plt.xlabel("batches")
+    #         plt.title("traj length (training)")
+    #     plt.savefig(name)
+    #     if is_notebook():
+    #         plt.show()
+    #
+    #
+    # ###############################################################################
+    # # The performance of the policy can be measured as the length of trajectories.
+    # # As we can see on the results of the :func:`plot` function, the performance
+    # # of the policy increases, albeit slowly.
+    # #
+    # # .. code-block:: python
+    # #
+    # #    plot(logs_exp1, "dqn_td0.png")
+    # #
+    # # .. figure:: /_static/img/dqn_td0.png
+    # #    :alt: Cart Pole results with TD(0)
+    # #
+    #
+    # print("shutting down")
+    # data_collector.shutdown()
+    # del data_collector
+    #
+    # ###############################################################################
+    # # DQN with TD(:math:`\lambda`)
+    # # ----------------------------
+    # #
+    # # We can improve the above algorithm by getting a better estimate of the
+    # # return, using not only the next state value but the whole sequence of rewards
+    # # and values that follow a particular step.
+    # #
+    # # TorchRL provides a vectorized version of TD(lambda) named
+    # # :func:`torchrl.objectives.value.functional.vec_td_lambda_advantage_estimate`.
+    # # We'll use this to obtain a target value that the value network will be
+    # # trained to match.
+    # #
+    # # The big difference in this implementation is that we'll store entire
+    # # trajectories and not single steps in the replay buffer. This will be done
+    # # automatically as long as we're not "flattening" the tensordict collected:
+    # # by keeping a shape ``[Batch x timesteps]`` and giving this
+    # # to the RB, we'll be creating a replay buffer of size
+    # # ``[Capacity x timesteps]``.
+    #
+    #
+    # from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
+    #
+    # ###############################################################################
+    # # We reset the actor parameters:
+    # #
+    #
+    # (
+    #     factor,
+    #     actor,
+    #     actor_explore,
+    #     params,
+    #     params_target,
+    # ) = make_model(test_env)
+    # params_flat = params.flatten_keys(".")
+    #
+    # optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
+    # test_env = make_env(
+    #     parallel=False, observation_norm_state_dict=observation_norm_state_dict
+    # )
+    # print(actor_explore(test_env.reset()))
+    #
+    # ###############################################################################
+    # # Data: Replay buffer and collector
+    # # ---------------------------------
+    # #
+    # # We need to build a new replay buffer of the appropriate size:
+    # #
+    #
+    # max_size = frames_per_batch // num_workers
+    #
+    # replay_buffer = TensorDictReplayBuffer(
+    #     storage=LazyMemmapStorage(-(-buffer_size // max_size)),
+    #     prefetch=n_optim,
+    # )
+    #
+    # data_collector = MultiaSyncDataCollector(
+    #     [
+    #         make_env(
+    #             parallel=True, observation_norm_state_dict=observation_norm_state_dict
+    #         ),
+    #     ]
+    #     * num_collectors,
+    #     policy=actor_explore,
+    #     frames_per_batch=frames_per_batch,
+    #     total_frames=total_frames,
+    #     exploration_mode="random",
+    #     devices=[device] * num_collectors,
+    #     storing_devices=[device] * num_collectors,
+    #     # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
+    #     # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
+    #     split_trajs=False,
+    # )
+    #
+    #
+    # logs_exp2 = defaultdict(list)
+    # prev_traj_count = 0
+    #
+    # ###############################################################################
+    # # Training loop
+    # # -------------
+    # #
+    # # There are very few differences with the training loop above:
+    # #
+    # # - The tensordict received by the collector is used as-is, without being
+    # #   flattened (recall the ``data.view(-1)`` above), to keep the temporal
+    # #   relation between consecutive steps.
+    # # - We use :func:`vec_td_lambda_advantage_estimate` to compute the target
+    # #   value.
+    #
+    # pbar = tqdm.tqdm(total=total_frames)
+    # for j, data in enumerate(data_collector):
+    #     current_frames = data.numel()
+    #     pbar.update(current_frames)
+    #
+    #     replay_buffer.extend(data.cpu())
+    #     if len(logs_exp2["frames"]):
+    #         logs_exp2["frames"].append(current_frames + logs_exp2["frames"][-1])
+    #     else:
+    #         logs_exp2["frames"].append(current_frames)
+    #
+    #     if data["next", "done"].any():
+    #         done = data["next", "done"].squeeze(-1)
+    #         logs_exp2["traj_lengths"].append(
+    #             data["next", "step_count"][done].float().mean().item()
+    #         )
+    #
+    #     if sum(logs_exp2["frames"]) > init_random_frames:
+    #         for _ in range(n_optim):
+    #             sampled_data = replay_buffer.sample(batch_size // max_size)
+    #             sampled_data = sampled_data.clone().to(device, non_blocking=True)
+    #
+    #             reward = sampled_data["next", "reward"]
+    #             done = sampled_data["next", "done"].to(reward.dtype)
+    #             action = sampled_data["action"].clone()
+    #
+    #             sampled_data_out = sampled_data.select(*actor.in_keys)
+    #             sampled_data_out = vmap(factor, (0, None))(sampled_data_out, params)
+    #             action_value = sampled_data_out["action_value"]
+    #             action_value = (action_value * action.to(action_value.dtype)).sum(-1, True)
+    #             with torch.no_grad():
+    #                 tdstep = step_mdp(sampled_data)
+    #                 next_value = vmap(factor, (0, None))(
+    #                     tdstep.select(*actor.in_keys), params
+    #                 )
+    #                 next_value = next_value["chosen_action_value"]
+    #             error = vec_td_lambda_advantage_estimate(
+    #                 gamma,
+    #                 lmbda,
+    #                 action_value,
+    #                 next_value,
+    #                 reward,
+    #                 done,
+    #             ).pow(2)
+    #             error = error.mean()
+    #             error.backward()
+    #
+    #             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
+    #
+    #             optim.step()
+    #             optim.zero_grad()
+    #
+    #             # update of the target parameters
+    #             params_target.apply(
+    #                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
+    #                 params.detach(),
+    #                 inplace=True,
+    #             )
+    #
+    #         actor_explore.step(current_frames)
+    #
+    #         # Logging
+    #         logs_exp2["grad_vals"].append(float(gv))
+    #
+    #         logs_exp2["losses"].append(error.item())
+    #         logs_exp2["values"].append(action_value.mean().item())
+    #         logs_exp2["traj_count"].append(
+    #             prev_traj_count + data["next", "done"].sum().item()
+    #         )
+    #         prev_traj_count = logs_exp2["traj_count"][-1]
+    #         if j % 10 == 0:
+    #             with set_exploration_mode("mode"), torch.no_grad():
+    #                 # execute a rollout. The `set_exploration_mode("mode")` has
+    #                 # no effect here since the policy is deterministic, but we add
+    #                 # it for completeness
+    #                 eval_rollout = test_env.rollout(
+    #                     max_steps=10000,
+    #                     policy=actor,
+    #                 ).cpu()
+    #             logs_exp2["traj_lengths_eval"].append(eval_rollout.shape[-1])
+    #             logs_exp2["evals"].append(eval_rollout["next", "reward"].sum().item())
+    #             if len(logs_exp2["mavgs"]):
+    #                 logs_exp2["mavgs"].append(
+    #                     logs_exp2["evals"][-1] * 0.05 + logs_exp2["mavgs"][-1] * 0.95
+    #                 )
+    #             else:
+    #                 logs_exp2["mavgs"].append(logs_exp2["evals"][-1])
+    #             logs_exp2["traj_count_eval"].append(logs_exp2["traj_count"][-1])
+    #             pbar.set_description(
+    #                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp2['evals'][-1]: 4.4f}"
+    #             )
+    #
+    #     # update policy weights
+    #     data_collector.update_policy_weights_()
+    #
+    #
+    # ###############################################################################
+    # # TD(:math:`\lambda`) performs significantly better than TD(0) because it
+    # # retrieves a much less biased estimate of the state-action value.
+    # #
+    # # .. code-block:: python
+    # #
+    # #    plot(logs_exp2, "dqn_tdlambda.png")
+    # #
+    # # .. figure:: /_static/img/dqn_tdlambda.png
+    # #    :alt: Cart Pole results with TD(lambda)
+    # #
+    #
+    #
+    # print("shutting down")
+    # data_collector.shutdown()
+    # del data_collector
+    #
+    # ###############################################################################
+    # # Let's compare the results on a single plot. Because the TD(lambda) version
+    # # works better, we'll have fewer episodes collected for a given number of
+    # # frames (as there are more frames per episode).
+    # #
+    # # **Note**: As already mentioned above, to get a more reasonable performance,
+    # # use a greater value for ``total_frames`` e.g. 500000.
+    #
+    #
+    # def plot_both():
+    #     frames_td0 = logs_exp1["frames"]
+    #     frames_tdlambda = logs_exp2["frames"]
+    #     evals_td0 = logs_exp1["evals"]
+    #     evals_tdlambda = logs_exp2["evals"]
+    #     mavgs_td0 = logs_exp1["mavgs"]
+    #     mavgs_tdlambda = logs_exp2["mavgs"]
+    #     traj_count_td0 = logs_exp1["traj_count_eval"]
+    #     traj_count_tdlambda = logs_exp2["traj_count_eval"]
+    #
+    #     plt.figure(figsize=(15, 10))
+    #     plt.subplot(1, 2, 1)
+    #     plt.plot(frames_td0[-len(evals_td0) :], evals_td0, label="return (td0)", alpha=0.5)
+    #     plt.plot(
+    #         frames_tdlambda[-len(evals_tdlambda) :],
+    #         evals_tdlambda,
+    #         label="return (td(lambda))",
+    #         alpha=0.5,
+    #     )
+    #     plt.plot(frames_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
+    #     plt.plot(
+    #         frames_tdlambda[-len(mavgs_tdlambda) :],
+    #         mavgs_tdlambda,
+    #         label="mavg (td(lambda))",
+    #     )
+    #     plt.xlabel("frames collected")
+    #     plt.ylabel("trajectory length (= return)")
+    #
+    #     plt.subplot(1, 2, 2)
+    #     plt.plot(
+    #         traj_count_td0[-len(evals_td0) :],
+    #         evals_td0,
+    #         label="return (td0)",
+    #         alpha=0.5,
+    #     )
+    #     plt.plot(
+    #         traj_count_tdlambda[-len(evals_tdlambda) :],
+    #         evals_tdlambda,
+    #         label="return (td(lambda))",
+    #         alpha=0.5,
+    #     )
+    #     plt.plot(traj_count_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
+    #     plt.plot(
+    #         traj_count_tdlambda[-len(mavgs_tdlambda) :],
+    #         mavgs_tdlambda,
+    #         label="mavg (td(lambda))",
+    #     )
+    #     plt.xlabel("trajectories collected")
+    #     plt.legend()
+    #
+    #     plt.savefig("dqn.png")
+    #
+    #
+    # ###############################################################################
+    # # .. code-block:: python
+    # #
+    # #    plot_both()
+    # #
+    # # .. figure:: /_static/img/dqn.png
+    # #    :alt: Cart Pole results from the TD(:math:`lambda`) trained policy.
+    # #
+    # # Finally, we generate a new video to check what the algorithm has learnt.
+    # # If all goes well, the duration should be significantly longer than with a
+    # # random rollout.
+    # #
+    # # To get the raw pixels of the rollout, we insert a
+    # # :class:`torchrl.envs.CatTensors` transform that precedes all others and copies
+    # # the ``"pixels"`` key onto a ``"pixels_save"`` key. This is necessary because
+    # # the other transforms that modify this key will update its value in-place in
+    # # the output tensordict.
+    # #
+    #
+    # test_env.transform.insert(0, CatTensors(["pixels"], "pixels_save", del_keys=False))
+    # eval_rollout = test_env.rollout(max_steps=10000, policy=actor, auto_reset=True).cpu()
+    #
+    # # sphinx_gallery_start_ignore
+    # import imageio
+    #
+    # imageio.mimwrite("cartpole.gif", eval_rollout["pixels_save"].numpy(), fps=30)
+    # # sphinx_gallery_end_ignore
+    #
+    # del test_env
+    #
+    # ###############################################################################
+    # # The video of the rollout can be saved using the imageio package:
+    # #
+    # # .. code-block::
+    # #
+    # #   import imageio
+    # #   imageio.mimwrite('cartpole.mp4', eval_rollout["pixels_save"].numpy(), fps=30);
+    # #
+    # # .. figure:: /_static/img/cartpole.gif
+    # #    :alt: Cart Pole results from the TD(:math:`\lambda`) trained policy.
+    #
+    # ###############################################################################
+    # # Conclusion and possible improvements
+    # # ------------------------------------
+    # #
+    # # In this tutorial we have learnt:
+    # #
+    # # - How to train a policy that read pixel-based states, what transforms to
+    # #   include and how to normalize the data;
+    # # - How to create a policy that picks up the action with the highest value
+    # #   with :class:`torchrl.modules.QValueNetwork`;
+    # # - How to build a multiprocessed data collector;
+    # # - How to train a DQN with TD(:math:`\lambda`) returns.
+    # #
+    # # We have seen that using TD(:math:`\lambda`) greatly improved the performance
+    # # of DQN. Other possible improvements could include:
+    # #
+    # # - Using the Multi-Step post-processing. Multi-step will project an action
+    # #   to the nth following step, and create a discounted sum of the rewards in
+    # #   between. This trick can make the algorithm noticebly less myopic. To use
+    # #   this, simply create the collector with
+    # #
+    # #       from torchrl.data.postprocs.postprocs import MultiStep
+    # #       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+    # #
+    # #   where ``n`` is the number of looking-forward steps. Pay attention to the
+    # #   fact that the ``gamma`` factor has to be corrected by the number of
+    # #   steps till the next observation when being passed to
+    # #   ``vec_td_lambda_advantage_estimate``:
+    # #
+    # #       gamma = gamma ** tensordict["steps_to_next_obs"]
+    # # - A prioritized replay buffer could also be used. This will give a
+    # #   higher priority to samples that have the worst value accuracy.
+    # # - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
+    # #   for more information).
+    # # - More fancy exploration techniques, such as NoisyLinear layers and such
+    # #   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
+    # #   ``MLP`` class used in our Dueling DQN).

From bfef8eefae9f7578c79a0ce90760d5de510d244e Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 21 Mar 2023 14:12:46 +0000
Subject: [PATCH 07/89] dqn

---
 torchrl/trainers/trainers.py             |  15 +--
 tutorials/sphinx-tutorials/coding_dqn.py | 147 ++++++++++++++---------
 2 files changed, 96 insertions(+), 66 deletions(-)

diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index b6c53b03ab7..0f7aa74cd0f 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -95,17 +95,16 @@ class Trainer:
         optimizer (optim.Optimizer): An optimizer that trains the parameters
             of the model.
         logger (Logger, optional): a Logger that will handle the logging.
-        optim_steps_per_batch (int, optional): number of optimization steps
+        optim_steps_per_batch (int): number of optimization steps
             per collection of data. An trainer works as follows: a main loop
             collects batches of data (epoch loop), and a sub-loop (training
             loop) performs model updates in between two collections of data.
-            Default is 500
         clip_grad_norm (bool, optional): If True, the gradients will be clipped
             based on the total norm of the model parameters. If False,
             all the partial derivatives will be clamped to
             (-clip_norm, clip_norm). Default is :obj:`True`.
         clip_norm (Number, optional): value to be used for clipping gradients.
-            Default is 100.0.
+            Default is None (no clip norm).
         progress_bar (bool, optional): If True, a progress bar will be
             displayed using tqdm. If tqdm is not installed, this option
             won't have any effect. Default is :obj:`True`
@@ -131,15 +130,16 @@ def __new__(cls, *args, **kwargs):
 
     def __init__(
         self,
+        *,
         collector: _DataCollector,
         total_frames: int,
         frame_skip: int,
+        optim_steps_per_batch: int,
         loss_module: Union[LossModule, Callable[[TensorDictBase], TensorDictBase]],
         optimizer: Optional[optim.Optimizer] = None,
         logger: Optional[Logger] = None,
-        optim_steps_per_batch: int = 500,
         clip_grad_norm: bool = True,
-        clip_norm: float = 100.0,
+        clip_norm: float = None,
         progress_bar: bool = True,
         seed: int = 42,
         save_trainer_interval: int = 10000,
@@ -726,11 +726,12 @@ def _grad_clip(self, clip_grad_norm: bool, clip_norm: float) -> float:
         for param_group in self.optimizer.param_groups:
             params += param_group["params"]
 
-        if clip_grad_norm:
+        if clip_grad_norm and clip_norm is not None:
             gn = nn.utils.clip_grad_norm_(params, clip_norm)
         else:
             gn = sum([p.grad.pow(2).sum() for p in params if p.grad is not None]).sqrt()
-            nn.utils.clip_grad_value_(params, clip_norm)
+            if clip_norm is not None:
+                nn.utils.clip_grad_value_(params, clip_norm)
 
         return float(gn)
 
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index cc6bc232513..9ad17095ee3 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -79,20 +79,14 @@
 if __name__ == "__main__":
     # sphinx_gallery_start_ignore
     import warnings
-    from collections import defaultdict
 
-    from torchrl.objectives import DQNLoss
-    from torchrl.trainers import Trainer
+    from torchrl.objectives import DQNLoss, SoftUpdate
+    from torchrl.trainers import Trainer, ReplayBufferTrainer, UpdateWeights
 
     warnings.filterwarnings("ignore")
     # sphinx_gallery_end_ignore
 
     import torch
-    import tqdm
-    from functorch import vmap
-    from matplotlib import pyplot as plt
-    from tensordict import TensorDict
-    from tensordict.nn import get_functional
     from torch import nn
     from torchrl.collectors import MultiaSyncDataCollector
     from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
@@ -100,7 +94,6 @@
     from torchrl.envs.libs.gym import GymEnv
     from torchrl.envs.transforms import (
         CatFrames,
-        CatTensors,
         Compose,
         GrayScale,
         ObservationNorm,
@@ -108,7 +101,6 @@
         ToTensorImage,
         TransformedEnv,
     )
-    from torchrl.envs.utils import set_exploration_mode, step_mdp
     from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
 
 
@@ -137,11 +129,12 @@ def is_notebook() -> bool:
     #   of vectorization of the operations on device, although this would
     #   technically work with every single environment attached to its own set of
     #   transforms.
-    # - ``observation_norm_state_dict`` will contain the normalizing constants for
-    #   the :class:`torchrl.envs.ObservationNorm` tranform.
+    # - ``obs_norm_sd`` will contain the normalizing constants for
+    #   the :class:`torchrl.envs.ObservationNorm` transform.
     #
     # We will be using five transforms:
     #
+    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
     # - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
     #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
     #   ``[C, W, H]``;
@@ -159,21 +152,21 @@ def is_notebook() -> bool:
     #
 
 
-    def make_env(parallel=False, observation_norm_state_dict=None, frame_skip=1):
-        if observation_norm_state_dict is None:
-            observation_norm_state_dict = {"standard_normal": True}
+    def make_env(parallel=False, obs_norm_sd=None, ):
+        if obs_norm_sd is None:
+            obs_norm_sd = {"standard_normal": True}
         if parallel:
             base_env = ParallelEnv(
                 num_workers,
                 EnvCreator(
                     lambda: GymEnv(
-                        "CartPole-v1", from_pixels=True, pixels_only=True, device=device, frame_skip=frame_skip
+                        "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
                     )
                 ),
             )
         else:
             base_env = GymEnv(
-                "CartPole-v1", from_pixels=True, pixels_only=True, device=device, frame_skip=frame_skip,
+                "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
             )
 
         env = TransformedEnv(
@@ -185,7 +178,7 @@ def make_env(parallel=False, observation_norm_state_dict=None, frame_skip=1):
                 GrayScale(),
                 Resize(64, 64),
                 CatFrames(4, in_keys=["pixels"], dim=-3),
-                ObservationNorm(in_keys=["pixels"], **observation_norm_state_dict),
+                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
             ),
         )
         return env
@@ -197,21 +190,23 @@ def make_env(parallel=False, observation_norm_state_dict=None, frame_skip=1):
     #
     # To normalize images, we don't want to normalize each pixel independently
     # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-    # shaped loc and scale parameters. We will be using the ``reduce_dim`` argument
+    # shaped set of normalizing constants (loc and scale parameters).
+    # We will be using the ``reduce_dim`` argument
     # of :func:`torchrl.envs.ObservationNorm.init_stats` to instruct which
     # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
     # not all dimensions disappear in the process:
+    #
 
     def get_norm_stats():
         test_env = make_env()
         test_env.transform[-1].init_stats(
         num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
     )
-        observation_norm_state_dict = test_env.transform[-1].state_dict()
+        obs_norm_sd = test_env.transform[-1].state_dict()
         # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
         # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-        print(observation_norm_state_dict)
-        return observation_norm_state_dict
+        print(obs_norm_sd)
+        return obs_norm_sd
 
     ###############################################################################
     # Building the model (Deep Q-network)
@@ -229,21 +224,11 @@ def get_norm_stats():
     # where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
     # :math:`\# obs \rightarrow num_actions` function.
     #
-    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`, which will read the state-action
+    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+    # which will read the state-action
     # values, pick up the one with the maximum value and write all those results
     # in the input :class:`tensordict.TensorDict`.
     #
-    # Target parameters
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Many off-policy RL algorithms use the concept of "target parameters" when it
-    # comes to estimate the value of the ``t+1`` state or state-action pair.
-    # The target parameters are lagged copies of the model parameters. Because
-    # their predictions mismatch those of the current model configuration, they
-    # help learning by putting a pessimistic bound on the value being estimated.
-    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-    # in similar algorithms.
-    #
 
     def make_model(dummy_env):
         cnn_kwargs = {
@@ -344,11 +329,11 @@ def get_replay_buffer(buffer_size, n_optim):
     # out training loop must account for. For simplicity, we set the devices to
     # the same value for all sub-collectors.
 
-    def get_collector(observation_norm_state_dict, num_collectors, actor_explore, frames_per_batch, total_frames, device):
+    def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device):
         data_collector = MultiaSyncDataCollector(
             [
                 make_env(
-                    parallel=True, observation_norm_state_dict=observation_norm_state_dict
+                    parallel=True, obs_norm_sd=obs_norm_sd
                 ),
             ]
             * num_collectors,
@@ -365,8 +350,29 @@ def get_collector(observation_norm_state_dict, num_collectors, actor_explore, fr
         )
         return data_collector
 
+    ###############################################################################
+    # Loss function
+    # -------------
+    #
+    # Building our loss function is straightforward: we only need to provide
+    # the model and a bunch of hyperparameters to the DQNLoss class.
+    #
+    # Target parameters
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Many off-policy RL algorithms use the concept of "target parameters" when it
+    # comes to estimate the value of the next state or state-action pair.
+    # The target parameters are lagged copies of the model parameters. Because
+    # their predictions mismatch those of the current model configuration, they
+    # help learning by putting a pessimistic bound on the value being estimated.
+    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+    # in similar algorithms.
+    #
 
-
+    def get_loss_module(actor, gamma):
+        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+        target_updater = SoftUpdate(loss_module)
+        return loss_module, target_updater
 
     ###############################################################################
     # Hyperparameters
@@ -469,26 +475,49 @@ def get_collector(observation_norm_state_dict, num_collectors, actor_explore, fr
     #   value e.g. 500000
     #
 
-    def get_trainer():
-        stats = get_norm_stats()
-        test_env = make_env(parallel=False, observation_norm_state_dict=stats)
-        # Get model
-        actor, actor_explore = make_model(test_env)
-        loss_module = DQNLoss(actor, gamma=0.99)
-        collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
-        optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
-        trainer = Trainer(
-                    collector=collector,
-            total_frames=total_frames,
-            frame_skip=1,
-            loss_module=loss_module,
-            optimizer=optimizer,
-            logger=None,
-            optim_steps_per_batch = n_optim,
-        )
-        return trainer
+    ###############################################################################
+    # Building a Trainer
+    # ------------------
+    #
+    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+    # following keyword-only arguments:
+    #
+    # - ``collector``
+    # - ``loss_module``
+    # - ``optimizer``
+    # - ``logger``: A logger can be
+    # - ``total_frames``: this parameter defines the lifespan of the trainer.
+    # - ``frame_skip``: when a frame-skip is used, the collector must be made
+    #   aware of it in order to accurately count the number of frames
+    #   collected etc. Making the trainer aware of this parameter is not
+    #   mandatory but helps to have a fairer comparison between settings where
+    #   the total number of frames (budget) is fixed but the frame-skip is
+    #   variable.
+
+    stats = get_norm_stats()
+    test_env = make_env(parallel=False, obs_norm_sd=stats)
+    # Get model
+    actor, actor_explore = make_model(test_env)
+    loss_module, target_net_updater = get_loss_module(actor, gamma)
+    collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
+    optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
+    trainer = Trainer(
+                collector=collector,
+        total_frames=total_frames,
+        frame_skip=1,
+        loss_module=loss_module,
+        optimizer=optimizer,
+        logger=None,
+        optim_steps_per_batch = n_optim,
+    )
+
+    buffer_hook = ReplayBufferTrainer(get_replay_buffer(buffer_size, n_optim))
+    buffer_hook.register(trainer)
+    weight_updater = UpdateWeights(collector, update_weights_interval=1)
+    weight_updater.register(trainer)
+
+    trainer.register_op("post_optim", target_net_updater.step)
 
-    trainer = get_trainer()
     trainer.train()
 
     # ###############################################################################
@@ -506,7 +535,7 @@ def get_trainer():
     # # We create a test environment for evaluation of the policy:
     #
     # test_env = make_env(
-    #     parallel=False, observation_norm_state_dict=observation_norm_state_dict
+    #     parallel=False, obs_norm_sd=obs_norm_sd
     # )
     # # sanity check:
     # print(actor_explore(test_env.reset()))
@@ -744,7 +773,7 @@ def get_trainer():
     #
     # optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
     # test_env = make_env(
-    #     parallel=False, observation_norm_state_dict=observation_norm_state_dict
+    #     parallel=False, obs_norm_sd=obs_norm_sd
     # )
     # print(actor_explore(test_env.reset()))
     #
@@ -765,7 +794,7 @@ def get_trainer():
     # data_collector = MultiaSyncDataCollector(
     #     [
     #         make_env(
-    #             parallel=True, observation_norm_state_dict=observation_norm_state_dict
+    #             parallel=True, obs_norm_sd=obs_norm_sd
     #         ),
     #     ]
     #     * num_collectors,

From 972217a2135c0c63e871f53ee6445991785af5a9 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 23 Mar 2023 15:48:57 +0000
Subject: [PATCH 08/89] amend

---
 docs/source/reference/data.rst           |    9 +-
 torchrl/data/__init__.py                 |    1 +
 tutorials/sphinx-tutorials/coding_dqn.py | 1933 +++++++++++-----------
 3 files changed, 987 insertions(+), 956 deletions(-)

diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
index c115514650c..72b66b3ab1e 100644
--- a/docs/source/reference/data.rst
+++ b/docs/source/reference/data.rst
@@ -24,11 +24,12 @@ Composable Replay Buffers
 
 We also give users the ability to compose a replay buffer using the following components:
 
+.. currentmodule:: torchrl.data.replay_buffers
+
 .. autosummary::
     :toctree: generated/
     :template: rl_template.rst
 
-    .. currentmodule:: torchrl.data.replay_buffers
 
     Sampler
     PrioritizedSampler
@@ -176,11 +177,12 @@ Here's an example:
   `the repository <https://github.com/Farama-Foundation/D4RL>`_ is needed as
   the latest wheels are not published on PyPI.
 
+.. currentmodule:: torchrl.data.datasets
+
 .. autosummary::
     :toctree: generated/
     :template: rl_template.rst
 
-    .. currentmodule:: torchrl.data.datasets
 
     D4RLExperienceReplay
 
@@ -193,6 +195,7 @@ It is important that your environment specs match the input and output that it s
 :obj:`ParallelEnv` will create buffers from these specs to communicate with the spawn processes.
 Check the :obj:`torchrl.envs.utils.check_env_specs` method for a sanity check.
 
+.. currentmodule:: torchrl.data
 
 .. autosummary::
     :toctree: generated/
@@ -213,6 +216,8 @@ Check the :obj:`torchrl.envs.utils.check_env_specs` method for a sanity check.
 Utils
 -----
 
+.. currentmodule:: torchrl.data.datasets
+
 .. autosummary::
     :toctree: generated/
     :template: rl_template.rst
diff --git a/torchrl/data/__init__.py b/torchrl/data/__init__.py
index 788a2cce27d..6608b49cade 100644
--- a/torchrl/data/__init__.py
+++ b/torchrl/data/__init__.py
@@ -30,3 +30,4 @@
     UnboundedContinuousTensorSpec,
     UnboundedDiscreteTensorSpec,
 )
+from . import datasets
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 9ad17095ee3..4124d87a492 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -23,15 +23,21 @@
 #   module, replay buffer and optimizer.
 # - Adding hooks to a trainer, such as loggers, target network updaters and such.
 #
-# We will also focus on some other aspects of the library:
+# The trainer is fully customisable and offers a large set of functionalities.
+# The tutorial is organised around its construction.
+# We will be detailing how to build each of the components of the library first,
+# and then put the pieces together using the `torchrl.trainers.Trainer` class.
+#
+# Along the road, we will also focus on some other aspects of the library:
 #
 # - how to build an environment in TorchRL, including transforms (e.g. data
 #   normalization, frame concatenation, resizing and turning to grayscale)
 #   and parallel execution. Unlike what we did in the
 #   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
 #   will normalize the pixels and not the state vector.
-# - how to design a ``QValueActor``, i.e. an actor that estimates the action
-#   values and picks up the action with the highest estimated return;
+# - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
+#   that estimates the action values and picks up the action with the highest
+#   estimated return;
 # - how to collect data from your environment efficiently and store them
 #   in a replay buffer;
 # - how to store trajectories (and not transitions) in your replay buffer),
@@ -76,1005 +82,1024 @@
 # to provide a high-level illustration of TorchRL features in the context
 # of this algorithm.
 
-if __name__ == "__main__":
-    # sphinx_gallery_start_ignore
-    import warnings
-
-    from torchrl.objectives import DQNLoss, SoftUpdate
-    from torchrl.trainers import Trainer, ReplayBufferTrainer, UpdateWeights
+# sphinx_gallery_start_ignore
+import warnings
 
-    warnings.filterwarnings("ignore")
-    # sphinx_gallery_end_ignore
+from torchrl.objectives import DQNLoss, SoftUpdate
+from torchrl.trainers import Trainer, ReplayBufferTrainer, UpdateWeights
 
-    import torch
-    from torch import nn
-    from torchrl.collectors import MultiaSyncDataCollector
-    from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
-    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-    from torchrl.envs.libs.gym import GymEnv
-    from torchrl.envs.transforms import (
-        CatFrames,
-        Compose,
-        GrayScale,
-        ObservationNorm,
-        Resize,
-        ToTensorImage,
-        TransformedEnv,
-    )
-    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+warnings.filterwarnings("ignore")
+# sphinx_gallery_end_ignore
 
+import torch
+from torch import nn
+from torchrl.collectors import MultiaSyncDataCollector
+from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
+from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.envs.transforms import (
+    CatFrames,
+    Compose,
+    GrayScale,
+    ObservationNorm,
+    Resize,
+    ToTensorImage,
+    TransformedEnv,
+)
+from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
 
-    def is_notebook() -> bool:
-        try:
-            shell = get_ipython().__class__.__name__
-            if shell == "ZMQInteractiveShell":
-                return True  # Jupyter notebook or qtconsole
-            elif shell == "TerminalInteractiveShell":
-                return False  # Terminal running IPython
-            else:
-                return False  # Other type (?)
-        except NameError:
-            return False  # Probably standard Python interpreter
 
+def is_notebook() -> bool:
+    try:
+        shell = get_ipython().__class__.__name__
+        if shell == "ZMQInteractiveShell":
+            return True  # Jupyter notebook or qtconsole
+        elif shell == "TerminalInteractiveShell":
+            return False  # Terminal running IPython
+        else:
+            return False  # Other type (?)
+    except NameError:
+        return False  # Probably standard Python interpreter
 
-    ###############################################################################
-    # Building the environment
-    # ------------------------
-    #
-    # Our environment builder has two arguments:
-    #
-    # - ``parallel``: determines whether multiple environments have to be run in
-    #   parallel. We stack the transforms after the
-    #   :class:`torchrl.envs.ParallelEnv` to take advantage
-    #   of vectorization of the operations on device, although this would
-    #   technically work with every single environment attached to its own set of
-    #   transforms.
-    # - ``obs_norm_sd`` will contain the normalizing constants for
-    #   the :class:`torchrl.envs.ObservationNorm` transform.
-    #
-    # We will be using five transforms:
-    #
-    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-    # - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
-    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-    #   ``[C, W, H]``;
-    # - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
-    # - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
-    # - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
-    # - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
-    #   successive frames (``N=4``) in a single tensor along the channel dimension.
-    #   This is useful as a single image does not carry information about the
-    #   motion of the cartpole. Some memory about past observations and actions
-    #   is needed, either via a recurrent neural network or using a stack of
-    #   frames.
-    # - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
-    #   given some custom summary statistics.
-    #
 
+###############################################################################
+# Let's get started with the various pieces we need for our algorithm:
+#
+# - An environment;
+# - A policy (and related modules that we group under the "model" umbrella);
+# - A data collector, which makes the policy play in the environment and
+#   delivers training data;
+# - A replay buffer to store the training data;
+# - A loss module, which computes the objective function to train our policy
+#   to maximise the return;
+# - An optimizer, which performs parameter updates based on our loss.
+#
+# Additional modules include a logger, a recorder (executes the policy in
+# "eval" mode) and a target network updater. With all these components into
+# place, it is easy to see how one could misplace or misuse one component in
+# the training script. The trainer is there to orchestrate everything for you!
+#
+# Building the environment
+# ------------------------
+#
+# First let's write a helper function that will output an environment. As usual,
+# the "raw" environment may be too simple to be used in practice and we'll need
+# some data transformation to expose its output to the policy.
+#
+# We will be using five transforms:
+#
+# - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
+# - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
+#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+#   ``[C, W, H]``;
+# - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
+# - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
+# - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
+# - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
+#   successive frames (``N=4``) in a single tensor along the channel dimension.
+#   This is useful as a single image does not carry information about the
+#   motion of the cartpole. Some memory about past observations and actions
+#   is needed, either via a recurrent neural network or using a stack of
+#   frames.
+# - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
+#   given some custom summary statistics.
+#
+# In practice, our environment builder has two arguments:
+#
+# - ``parallel``: determines whether multiple environments have to be run in
+#   parallel. We stack the transforms after the
+#   :class:`torchrl.envs.ParallelEnv` to take advantage
+#   of vectorization of the operations on device, although this would
+#   technically work with every single environment attached to its own set of
+#   transforms.
+# - ``obs_norm_sd`` will contain the normalizing constants for
+#   the :class:`torchrl.envs.ObservationNorm` transform.
+#
 
-    def make_env(parallel=False, obs_norm_sd=None, ):
-        if obs_norm_sd is None:
-            obs_norm_sd = {"standard_normal": True}
-        if parallel:
-            base_env = ParallelEnv(
-                num_workers,
-                EnvCreator(
-                    lambda: GymEnv(
-                        "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
-                    )
-                ),
-            )
-        else:
-            base_env = GymEnv(
-                "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
-            )
 
-        env = TransformedEnv(
-            base_env,
-            Compose(
-                StepCounter(),  # to count the steps of each trajectory
-                ToTensorImage(),
-                RewardScaling(loc=0.0, scale=0.1),
-                GrayScale(),
-                Resize(64, 64),
-                CatFrames(4, in_keys=["pixels"], dim=-3),
-                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+def make_env(parallel=False, obs_norm_sd=None, ):
+    if obs_norm_sd is None:
+        obs_norm_sd = {"standard_normal": True}
+    if parallel:
+        base_env = ParallelEnv(
+            num_workers,
+            EnvCreator(
+                lambda: GymEnv(
+                    "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
+                )
             ),
         )
-        return env
+    else:
+        base_env = GymEnv(
+            "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
+        )
 
+    env = TransformedEnv(
+        base_env,
+        Compose(
+            StepCounter(),  # to count the steps of each trajectory
+            ToTensorImage(),
+            RewardScaling(loc=0.0, scale=0.1),
+            GrayScale(),
+            Resize(64, 64),
+            CatFrames(4, in_keys=["pixels"], dim=-3),
+            ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+        ),
+    )
+    return env
 
-    ###############################################################################
-    # Compute normalizing constants
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # To normalize images, we don't want to normalize each pixel independently
-    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-    # shaped set of normalizing constants (loc and scale parameters).
-    # We will be using the ``reduce_dim`` argument
-    # of :func:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-    # not all dimensions disappear in the process:
-    #
 
-    def get_norm_stats():
-        test_env = make_env()
-        test_env.transform[-1].init_stats(
-        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
-    )
-        obs_norm_sd = test_env.transform[-1].state_dict()
-        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-        print(obs_norm_sd)
-        return obs_norm_sd
+###############################################################################
+# Compute normalizing constants
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To normalize images, we don't want to normalize each pixel independently
+# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+# shaped set of normalizing constants (loc and scale parameters).
+# We will be using the ``reduce_dim`` argument
+# of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+# not all dimensions disappear in the process:
+#
 
-    ###############################################################################
-    # Building the model (Deep Q-network)
-    # -----------------------------------
-    #
-    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-    # object which is a simple CNN followed by a two-layer MLP. The only trick used
-    # here is that the action values (i.e. left and right action value) are
-    # computed using
-    #
-    # .. math::
-    #
-    #    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-    #
-    # where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
-    # :math:`\# obs \rightarrow num_actions` function.
-    #
-    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
-    # which will read the state-action
-    # values, pick up the one with the maximum value and write all those results
-    # in the input :class:`tensordict.TensorDict`.
-    #
+def get_norm_stats():
+    test_env = make_env()
+    test_env.transform[-1].init_stats(
+    num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+)
+    obs_norm_sd = test_env.transform[-1].state_dict()
+    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+    print(obs_norm_sd)
+    return obs_norm_sd
 
-    def make_model(dummy_env):
-        cnn_kwargs = {
-            "num_cells": [32, 64, 64],
-            "kernel_sizes": [6, 4, 3],
-            "strides": [2, 2, 1],
-            "activation_class": nn.ELU,
-            # This can be used to reduce the size of the last layer of the CNN
-            # "squeeze_output": True,
-            # "aggregator_class": nn.AdaptiveAvgPool2d,
-            # "aggregator_kwargs": {"output_size": (1, 1)},
-        }
-        mlp_kwargs = {
-            "depth": 2,
-            "num_cells": [
-                64,
-                64,
-            ],
-            "activation_class": nn.ELU,
-        }
-        net = DuelingCnnDQNet(
-            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-        ).to(device)
-        net.value[-1].bias.data.fill_(init_bias)
+###############################################################################
+# Building the model (Deep Q-network)
+# -----------------------------------
+#
+# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+# object which is a simple CNN followed by a two-layer MLP. The only trick used
+# here is that the action values (i.e. left and right action value) are
+# computed using
+#
+# .. math::
+#
+#    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+#
+# where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
+# :math:`\# obs \rightarrow num_actions` function.
+#
+# Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+# which will read the state-action
+# values, pick up the one with the maximum value and write all those results
+# in the input :class:`tensordict.TensorDict`.
+#
 
-        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
-        # init actor: because the model is composed of lazy conv/linear layers,
-        # we must pass a fake batch of data through it to instantiate them.
-        tensordict = dummy_env.fake_tensordict()
-        actor(tensordict)
+def make_model(dummy_env):
+    cnn_kwargs = {
+        "num_cells": [32, 64, 64],
+        "kernel_sizes": [6, 4, 3],
+        "strides": [2, 2, 1],
+        "activation_class": nn.ELU,
+        # This can be used to reduce the size of the last layer of the CNN
+        # "squeeze_output": True,
+        # "aggregator_class": nn.AdaptiveAvgPool2d,
+        # "aggregator_kwargs": {"output_size": (1, 1)},
+    }
+    mlp_kwargs = {
+        "depth": 2,
+        "num_cells": [
+            64,
+            64,
+        ],
+        "activation_class": nn.ELU,
+    }
+    net = DuelingCnnDQNet(
+        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+    ).to(device)
+    net.value[-1].bias.data.fill_(init_bias)
 
-        # we wrap our actor in an EGreedyWrapper for data collection
-        actor_explore = EGreedyWrapper(
-            actor,
-            annealing_num_steps=total_frames,
-            eps_init=eps_greedy_val,
-            eps_end=eps_greedy_val_env,
-        )
+    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
+    # init actor: because the model is composed of lazy conv/linear layers,
+    # we must pass a fake batch of data through it to instantiate them.
+    tensordict = dummy_env.fake_tensordict()
+    actor(tensordict)
 
-        return actor, actor_explore
+    # we wrap our actor in an EGreedyWrapper for data collection
+    actor_explore = EGreedyWrapper(
+        actor,
+        annealing_num_steps=total_frames,
+        eps_init=eps_greedy_val,
+        eps_end=eps_greedy_val_env,
+    )
 
+    return actor, actor_explore
 
-    ###############################################################################
-    # Collecting and storing data
-    # ---------------------------
-    #
-    # Replay buffers
-    # ~~~~~~~~~~~~~~
-    #
-    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
-    # They constitute the dataset we will be sampling from during training.
-    #
-    # Here, we will use a regular sampling strategy, although a prioritized RB
-    # could improve the performance significantly.
-    #
-    # We place the storage on disk using
-    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-    # storage is created in a lazy manner: it will only be instantiated once the
-    # first batch of data is passed to it.
-    #
-    # The only requirement of this storage is that the data passed to it at write
-    # time must always have the same shape.
 
-    def get_replay_buffer(buffer_size, n_optim):
-        replay_buffer = TensorDictReplayBuffer(
-            storage=LazyMemmapStorage(buffer_size),
-            prefetch=n_optim,
-        )
-        return replay_buffer
+###############################################################################
+# Collecting and storing data
+# ---------------------------
+#
+# Replay buffers
+# ~~~~~~~~~~~~~~
+#
+# Replay buffers play a central role in off-policy RL algorithms such as DQN.
+# They constitute the dataset we will be sampling from during training.
+#
+# Here, we will use a regular sampling strategy, although a prioritized RB
+# could improve the performance significantly.
+#
+# We place the storage on disk using
+# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+# storage is created in a lazy manner: it will only be instantiated once the
+# first batch of data is passed to it.
+#
+# The only requirement of this storage is that the data passed to it at write
+# time must always have the same shape.
 
-    ###############################################################################
-    # Data collector
-    # ~~~~~~~~~~~~~~
-    #
-    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
-    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
-    # a data collector as a dataloader in the outer loop.
-    #
-    # We choose the following configuration: we will be running a series of
-    # parallel environments synchronously in parallel in different collectors,
-    # themselves running in parallel but asynchronously.
-    # The advantage of this configuration is that we can balance the amount of
-    # compute that is executed in batch with what we want to be executed
-    # asynchronously. We encourage the reader to experiment how the collection
-    # speed is impacted by modifying the number of collectors (ie the number of
-    # environment constructors passed to the collector) and the number of
-    # environment executed in parallel in each collector (controlled by the
-    # ``num_workers`` hyperparameter).
-    #
-    # When building the collector, we can choose on which device we want the
-    # environment and policy to execute the operations through the ``device``
-    # keyword argument. The ``storing_devices`` argument will modify the
-    # location of the data being collected: if the batches that we are gathering
-    # have a considerable size, we may want to store them on a different location
-    # than the device where the computation is happening. For asynchronous data
-    # collectors such as ours, different storing devices mean that the data that
-    # we collect won't sit on the same device each time, which is something that
-    # out training loop must account for. For simplicity, we set the devices to
-    # the same value for all sub-collectors.
+def get_replay_buffer(buffer_size, n_optim):
+    replay_buffer = TensorDictReplayBuffer(
+        storage=LazyMemmapStorage(buffer_size),
+        prefetch=n_optim,
+    )
+    return replay_buffer
 
-    def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device):
-        data_collector = MultiaSyncDataCollector(
-            [
-                make_env(
-                    parallel=True, obs_norm_sd=obs_norm_sd
-                ),
-            ]
-            * num_collectors,
-            policy=actor_explore,
-            frames_per_batch=frames_per_batch,
-            total_frames=total_frames,
-            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-            exploration_mode="random",
-            # We set the all the devices to be identical. Below is an example of
-            # heterogeneous devices
-            device=device,
-            storing_device=device,
-            split_trajs=False,
-        )
-        return data_collector
+###############################################################################
+# Data collector
+# ~~~~~~~~~~~~~~
+#
+# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
+# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
+# a data collector as a dataloader in the outer loop.
+#
+# We choose the following configuration: we will be running a series of
+# parallel environments synchronously in parallel in different collectors,
+# themselves running in parallel but asynchronously.
+# The advantage of this configuration is that we can balance the amount of
+# compute that is executed in batch with what we want to be executed
+# asynchronously. We encourage the reader to experiment how the collection
+# speed is impacted by modifying the number of collectors (ie the number of
+# environment constructors passed to the collector) and the number of
+# environment executed in parallel in each collector (controlled by the
+# ``num_workers`` hyperparameter).
+#
+# When building the collector, we can choose on which device we want the
+# environment and policy to execute the operations through the ``device``
+# keyword argument. The ``storing_devices`` argument will modify the
+# location of the data being collected: if the batches that we are gathering
+# have a considerable size, we may want to store them on a different location
+# than the device where the computation is happening. For asynchronous data
+# collectors such as ours, different storing devices mean that the data that
+# we collect won't sit on the same device each time, which is something that
+# out training loop must account for. For simplicity, we set the devices to
+# the same value for all sub-collectors.
 
-    ###############################################################################
-    # Loss function
-    # -------------
-    #
-    # Building our loss function is straightforward: we only need to provide
-    # the model and a bunch of hyperparameters to the DQNLoss class.
-    #
-    # Target parameters
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Many off-policy RL algorithms use the concept of "target parameters" when it
-    # comes to estimate the value of the next state or state-action pair.
-    # The target parameters are lagged copies of the model parameters. Because
-    # their predictions mismatch those of the current model configuration, they
-    # help learning by putting a pessimistic bound on the value being estimated.
-    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-    # in similar algorithms.
-    #
+def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device):
+    data_collector = MultiaSyncDataCollector(
+        [
+            make_env(
+                parallel=True, obs_norm_sd=obs_norm_sd
+            ),
+        ]
+        * num_collectors,
+        policy=actor_explore,
+        frames_per_batch=frames_per_batch,
+        total_frames=total_frames,
+        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+        exploration_mode="random",
+        # We set the all the devices to be identical. Below is an example of
+        # heterogeneous devices
+        device=device,
+        storing_device=device,
+        split_trajs=False,
+    )
+    return data_collector
 
-    def get_loss_module(actor, gamma):
-        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
-        target_updater = SoftUpdate(loss_module)
-        return loss_module, target_updater
+###############################################################################
+# Loss function
+# -------------
+#
+# Building our loss function is straightforward: we only need to provide
+# the model and a bunch of hyperparameters to the DQNLoss class.
+#
+# Target parameters
+# ~~~~~~~~~~~~~~~~~
+#
+# Many off-policy RL algorithms use the concept of "target parameters" when it
+# comes to estimate the value of the next state or state-action pair.
+# The target parameters are lagged copies of the model parameters. Because
+# their predictions mismatch those of the current model configuration, they
+# help learning by putting a pessimistic bound on the value being estimated.
+# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+# in similar algorithms.
+#
 
-    ###############################################################################
-    # Hyperparameters
-    # ---------------
-    #
-    # Let's start with our hyperparameters. The following setting should work well
-    # in practice, and the performance of the algorithm should hopefully not be
-    # too sensitive to slight variations of these.
+def get_loss_module(actor, gamma):
+    loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+    target_updater = SoftUpdate(loss_module)
+    return loss_module, target_updater
 
-    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+###############################################################################
+# Hyperparameters
+# ---------------
+#
+# Let's start with our hyperparameters. The following setting should work well
+# in practice, and the performance of the algorithm should hopefully not be
+# too sensitive to slight variations of these.
 
-    ###############################################################################
-    # Optimizer
-    # ~~~~~~~~~
+device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
 
-    # the learning rate of the optimizer
-    lr = 2e-3
-    # weight decay
-    wd = 1e-5
-    # the beta parameters of Adam
-    betas = (0.9, 0.999)
-    # Optimization steps per batch collected (aka UPD or updates per data)
-    n_optim = 8
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
 
-    ###############################################################################
-    # DQN parameters
-    # ~~~~~~~~~~~~~~
+# the learning rate of the optimizer
+lr = 2e-3
+# weight decay
+wd = 1e-5
+# the beta parameters of Adam
+betas = (0.9, 0.999)
+# Optimization steps per batch collected (aka UPD or updates per data)
+n_optim = 8
 
-    ###############################################################################
-    # gamma decay factor
-    gamma = 0.99
+###############################################################################
+# DQN parameters
+# ~~~~~~~~~~~~~~
 
-    ###############################################################################
-    # lambda decay factor (see second the part with TD(:math:`\lambda`)
-    lmbda = 0.95
+###############################################################################
+# gamma decay factor
+gamma = 0.99
 
-    ###############################################################################
-    # Smooth target network update decay parameter.
-    # This loosely corresponds to a 1/(1-tau) interval with hard target network
-    # update
-    tau = 0.005
+###############################################################################
+# lambda decay factor (see second the part with TD(:math:`\lambda`)
+lmbda = 0.95
 
-    ###############################################################################
-    # Data collection and replay buffer
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    # Values to be used for proper training have been commented.
-    #
-    # Total frames collected in the environment. In other implementations, the
-    # user defines a maximum number of episodes.
-    # This is harder to do with our data collectors since they return batches
-    # of N collected frames, where N is a constant.
-    # However, one can easily get the same restriction on number of episodes by
-    # breaking the training loop when a certain number
-    # episodes has been collected.
-    total_frames = 4096  # 500000
+###############################################################################
+# Smooth target network update decay parameter.
+# This loosely corresponds to a 1/(1-tau) interval with hard target network
+# update
+tau = 0.005
 
-    ###############################################################################
-    # Random frames used to initialize the replay buffer.
-    init_random_frames = 100  # 1000
+###############################################################################
+# Data collection and replay buffer
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Values to be used for proper training have been commented.
+#
+# Total frames collected in the environment. In other implementations, the
+# user defines a maximum number of episodes.
+# This is harder to do with our data collectors since they return batches
+# of N collected frames, where N is a constant.
+# However, one can easily get the same restriction on number of episodes by
+# breaking the training loop when a certain number
+# episodes has been collected.
+total_frames = 4096  # 500000
 
-    ###############################################################################
-    # Frames in each batch collected.
-    frames_per_batch = 32  # 128
+###############################################################################
+# Random frames used to initialize the replay buffer.
+init_random_frames = 100  # 1000
 
-    ###############################################################################
-    # Frames sampled from the replay buffer at each optimization step
-    batch_size = 32  # 256
+###############################################################################
+# Frames in each batch collected.
+frames_per_batch = 32  # 128
 
-    ###############################################################################
-    # Size of the replay buffer in terms of frames
-    buffer_size = min(total_frames, 100000)
+###############################################################################
+# Frames sampled from the replay buffer at each optimization step
+batch_size = 32  # 256
 
-    ###############################################################################
-    # Number of environments run in parallel in each data collector
-    num_workers = 2  # 8
-    num_collectors = 2  # 4
+###############################################################################
+# Size of the replay buffer in terms of frames
+buffer_size = min(total_frames, 100000)
 
+###############################################################################
+# Number of environments run in parallel in each data collector
+num_workers = 2  # 8
+num_collectors = 2  # 4
 
-    ###############################################################################
-    # Environment and exploration
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # We set the initial and final value of the epsilon factor in Epsilon-greedy
-    # exploration.
-    # Since our policy is deterministic, exploration is crucial: without it, the
-    # only source of randomness would be the environment reset.
 
-    eps_greedy_val = 0.1
-    eps_greedy_val_env = 0.005
+###############################################################################
+# Environment and exploration
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We set the initial and final value of the epsilon factor in Epsilon-greedy
+# exploration.
+# Since our policy is deterministic, exploration is crucial: without it, the
+# only source of randomness would be the environment reset.
 
-    ###############################################################################
-    # To speed up learning, we set the bias of the last layer of our value network
-    # to a predefined value (this is not mandatory)
-    init_bias = 2.0
+eps_greedy_val = 0.1
+eps_greedy_val_env = 0.005
 
-    ###############################################################################
-    # .. note::
-    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
-    #   was set to a very low number. To get a reasonable performance, use a greater
-    #   value e.g. 500000
-    #
+###############################################################################
+# To speed up learning, we set the bias of the last layer of our value network
+# to a predefined value (this is not mandatory)
+init_bias = 2.0
 
-    ###############################################################################
-    # Building a Trainer
-    # ------------------
-    #
-    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
-    # following keyword-only arguments:
-    #
-    # - ``collector``
-    # - ``loss_module``
-    # - ``optimizer``
-    # - ``logger``: A logger can be
-    # - ``total_frames``: this parameter defines the lifespan of the trainer.
-    # - ``frame_skip``: when a frame-skip is used, the collector must be made
-    #   aware of it in order to accurately count the number of frames
-    #   collected etc. Making the trainer aware of this parameter is not
-    #   mandatory but helps to have a fairer comparison between settings where
-    #   the total number of frames (budget) is fixed but the frame-skip is
-    #   variable.
+###############################################################################
+# .. note::
+#   For fast rendering of the tutorial ``total_frames`` hyperparameter
+#   was set to a very low number. To get a reasonable performance, use a greater
+#   value e.g. 500000
+#
 
-    stats = get_norm_stats()
-    test_env = make_env(parallel=False, obs_norm_sd=stats)
-    # Get model
-    actor, actor_explore = make_model(test_env)
-    loss_module, target_net_updater = get_loss_module(actor, gamma)
-    collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
-    optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
-    trainer = Trainer(
-                collector=collector,
-        total_frames=total_frames,
-        frame_skip=1,
-        loss_module=loss_module,
-        optimizer=optimizer,
-        logger=None,
-        optim_steps_per_batch = n_optim,
-    )
+###############################################################################
+# Building a Trainer
+# ------------------
+#
+# TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+# following keyword-only arguments:
+#
+# - ``collector``
+# - ``loss_module``
+# - ``optimizer``
+# - ``logger``: A logger can be
+# - ``total_frames``: this parameter defines the lifespan of the trainer.
+# - ``frame_skip``: when a frame-skip is used, the collector must be made
+#   aware of it in order to accurately count the number of frames
+#   collected etc. Making the trainer aware of this parameter is not
+#   mandatory but helps to have a fairer comparison between settings where
+#   the total number of frames (budget) is fixed but the frame-skip is
+#   variable.
+
+stats = get_norm_stats()
+test_env = make_env(parallel=False, obs_norm_sd=stats)
+# Get model
+actor, actor_explore = make_model(test_env)
+loss_module, target_net_updater = get_loss_module(actor, gamma)
+collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
+optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
+trainer = Trainer(
+            collector=collector,
+    total_frames=total_frames,
+    frame_skip=1,
+    loss_module=loss_module,
+    optimizer=optimizer,
+    logger=None,
+    optim_steps_per_batch = n_optim,
+)
 
-    buffer_hook = ReplayBufferTrainer(get_replay_buffer(buffer_size, n_optim))
-    buffer_hook.register(trainer)
-    weight_updater = UpdateWeights(collector, update_weights_interval=1)
-    weight_updater.register(trainer)
+buffer_hook = ReplayBufferTrainer(get_replay_buffer(buffer_size, n_optim))
+buffer_hook.register(trainer)
+weight_updater = UpdateWeights(collector, update_weights_interval=1)
+weight_updater.register(trainer)
 
-    trainer.register_op("post_optim", target_net_updater.step)
+trainer.register_op("post_optim", target_net_updater.step)
 
-    trainer.train()
+trainer.train()
 
-    # ###############################################################################
-    # # We represent the parameters and targets as flat structures, but unflattening
-    # # them is quite easy:
-    #
-    # params_flat = params.flatten_keys(".")
-    #
-    # ###############################################################################
-    # # We will be using the adam optimizer:
-    #
-    # optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-    #
-    # ###############################################################################
-    # # We create a test environment for evaluation of the policy:
-    #
-    # test_env = make_env(
-    #     parallel=False, obs_norm_sd=obs_norm_sd
-    # )
-    # # sanity check:
-    # print(actor_explore(test_env.reset()))
-    #
-    # ###############################################################################
-    # # Training loop of a regular DQN
-    # # ------------------------------
-    # #
-    # # We'll start with a simple implementation of DQN where the returns are
-    # # computed without bootstrapping, i.e.
-    # #
-    # # .. math::
-    # #
-    # #       Q_{t}(s, a) = R(s, a) + \gamma * V_{t+1}(s)
-    # #
-    # # where :math:`Q(s, a)` is the Q-value of the current state-action pair,
-    # # :math:`R(s, a)` is the result of the reward function, and :math:`V(s)` is a
-    # # value function that returns 0 for terminating states.
-    # #
-    # # We store the logs in a defaultdict:
-    #
-    # logs_exp1 = defaultdict(list)
-    # prev_traj_count = 0
-    #
-    # pbar = tqdm.tqdm(total=total_frames)
-    # for j, data in enumerate(data_collector):
-    #     current_frames = data.numel()
-    #     pbar.update(current_frames)
-    #     data = data.view(-1)
-    #
-    #     # We store the values on the replay buffer, after placing them on CPU.
-    #     # When called for the first time, this will instantiate our storage
-    #     # object which will print its content.
-    #     replay_buffer.extend(data.cpu())
-    #
-    #     # some logging
-    #     if len(logs_exp1["frames"]):
-    #         logs_exp1["frames"].append(current_frames + logs_exp1["frames"][-1])
-    #     else:
-    #         logs_exp1["frames"].append(current_frames)
-    #
-    #     if data["next", "done"].any():
-    #         done = data["next", "done"].squeeze(-1)
-    #         logs_exp1["traj_lengths"].append(
-    #             data["next", "step_count"][done].float().mean().item()
-    #         )
-    #
-    #     # check that we have enough data to start training
-    #     if sum(logs_exp1["frames"]) > init_random_frames:
-    #         for _ in range(n_optim):
-    #             # sample from the RB and send to device
-    #             sampled_data = replay_buffer.sample(batch_size)
-    #             sampled_data = sampled_data.to(device, non_blocking=True)
-    #
-    #             # collect data from RB
-    #             reward = sampled_data["next", "reward"].squeeze(-1)
-    #             done = sampled_data["next", "done"].squeeze(-1).to(reward.dtype)
-    #             action = sampled_data["action"].clone()
-    #
-    #             # Compute action value (of the action actually taken) at time t
-    #             # By default, TorchRL uses one-hot encodings for discrete actions
-    #             sampled_data_out = sampled_data.select(*actor.in_keys)
-    #             sampled_data_out = factor(sampled_data_out, params=params)
-    #             action_value = sampled_data_out["action_value"]
-    #             action_value = (action_value * action.to(action_value.dtype)).sum(-1)
-    #             with torch.no_grad():
-    #                 # compute best action value for the next step, using target parameters
-    #                 tdstep = step_mdp(sampled_data)
-    #                 next_value = factor(
-    #                     tdstep.select(*actor.in_keys),
-    #                     params=params_target,
-    #                 )["chosen_action_value"].squeeze(-1)
-    #                 exp_value = reward + gamma * next_value * (1 - done)
-    #             assert exp_value.shape == action_value.shape
-    #             # we use MSE loss but L1 or smooth L1 should also work
-    #             error = nn.functional.mse_loss(exp_value, action_value).mean()
-    #             error.backward()
-    #
-    #             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-    #
-    #             optim.step()
-    #             optim.zero_grad()
-    #
-    #             # update of the target parameters
-    #             params_target.apply(
-    #                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-    #                 params.detach(),
-    #                 inplace=True,
-    #             )
-    #
-    #         actor_explore.step(current_frames)
-    #
-    #         # Logging
-    #         logs_exp1["grad_vals"].append(float(gv))
-    #         logs_exp1["losses"].append(error.item())
-    #         logs_exp1["values"].append(action_value.mean().item())
-    #         logs_exp1["traj_count"].append(
-    #             prev_traj_count + data["next", "done"].sum().item()
-    #         )
-    #         prev_traj_count = logs_exp1["traj_count"][-1]
-    #
-    #         if j % 10 == 0:
-    #             with set_exploration_mode("mode"), torch.no_grad():
-    #                 # execute a rollout. The `set_exploration_mode("mode")` has no effect here since the policy is deterministic, but we add it for completeness
-    #                 eval_rollout = test_env.rollout(
-    #                     max_steps=10000,
-    #                     policy=actor,
-    #                 ).cpu()
-    #             logs_exp1["traj_lengths_eval"].append(eval_rollout.shape[-1])
-    #             logs_exp1["evals"].append(eval_rollout["next", "reward"].sum().item())
-    #             if len(logs_exp1["mavgs"]):
-    #                 logs_exp1["mavgs"].append(
-    #                     logs_exp1["evals"][-1] * 0.05 + logs_exp1["mavgs"][-1] * 0.95
-    #                 )
-    #             else:
-    #                 logs_exp1["mavgs"].append(logs_exp1["evals"][-1])
-    #             logs_exp1["traj_count_eval"].append(logs_exp1["traj_count"][-1])
-    #             pbar.set_description(
-    #                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp1['evals'][-1]: 4.4f}"
-    #             )
-    #
-    #     # update policy weights
-    #     data_collector.update_policy_weights_()
-    #
-    # ###############################################################################
-    # # We write a custom plot function to display the performance of our algorithm
-    # #
-    #
-    #
-    # def plot(logs, name):
-    #     plt.figure(figsize=(15, 10))
-    #     plt.subplot(2, 3, 1)
-    #     plt.plot(
-    #         logs["frames"][-len(logs["evals"]) :],
-    #         logs["evals"],
-    #         label="return (eval)",
-    #     )
-    #     plt.plot(
-    #         logs["frames"][-len(logs["mavgs"]) :],
-    #         logs["mavgs"],
-    #         label="mavg of returns (eval)",
-    #     )
-    #     plt.xlabel("frames collected")
-    #     plt.ylabel("trajectory length (= return)")
-    #     plt.subplot(2, 3, 2)
-    #     plt.plot(
-    #         logs["traj_count"][-len(logs["evals"]) :],
-    #         logs["evals"],
-    #         label="return",
-    #     )
-    #     plt.plot(
-    #         logs["traj_count"][-len(logs["mavgs"]) :],
-    #         logs["mavgs"],
-    #         label="mavg",
-    #     )
-    #     plt.xlabel("trajectories collected")
-    #     plt.legend()
-    #     plt.subplot(2, 3, 3)
-    #     plt.plot(logs["frames"][-len(logs["losses"]) :], logs["losses"])
-    #     plt.xlabel("frames collected")
-    #     plt.title("loss")
-    #     plt.subplot(2, 3, 4)
-    #     plt.plot(logs["frames"][-len(logs["values"]) :], logs["values"])
-    #     plt.xlabel("frames collected")
-    #     plt.title("value")
-    #     plt.subplot(2, 3, 5)
-    #     plt.plot(
-    #         logs["frames"][-len(logs["grad_vals"]) :],
-    #         logs["grad_vals"],
-    #     )
-    #     plt.xlabel("frames collected")
-    #     plt.title("grad norm")
-    #     if len(logs["traj_lengths"]):
-    #         plt.subplot(2, 3, 6)
-    #         plt.plot(logs["traj_lengths"])
-    #         plt.xlabel("batches")
-    #         plt.title("traj length (training)")
-    #     plt.savefig(name)
-    #     if is_notebook():
-    #         plt.show()
-    #
-    #
-    # ###############################################################################
-    # # The performance of the policy can be measured as the length of trajectories.
-    # # As we can see on the results of the :func:`plot` function, the performance
-    # # of the policy increases, albeit slowly.
-    # #
-    # # .. code-block:: python
-    # #
-    # #    plot(logs_exp1, "dqn_td0.png")
-    # #
-    # # .. figure:: /_static/img/dqn_td0.png
-    # #    :alt: Cart Pole results with TD(0)
-    # #
-    #
-    # print("shutting down")
-    # data_collector.shutdown()
-    # del data_collector
-    #
-    # ###############################################################################
-    # # DQN with TD(:math:`\lambda`)
-    # # ----------------------------
-    # #
-    # # We can improve the above algorithm by getting a better estimate of the
-    # # return, using not only the next state value but the whole sequence of rewards
-    # # and values that follow a particular step.
-    # #
-    # # TorchRL provides a vectorized version of TD(lambda) named
-    # # :func:`torchrl.objectives.value.functional.vec_td_lambda_advantage_estimate`.
-    # # We'll use this to obtain a target value that the value network will be
-    # # trained to match.
-    # #
-    # # The big difference in this implementation is that we'll store entire
-    # # trajectories and not single steps in the replay buffer. This will be done
-    # # automatically as long as we're not "flattening" the tensordict collected:
-    # # by keeping a shape ``[Batch x timesteps]`` and giving this
-    # # to the RB, we'll be creating a replay buffer of size
-    # # ``[Capacity x timesteps]``.
-    #
-    #
-    # from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
-    #
-    # ###############################################################################
-    # # We reset the actor parameters:
-    # #
-    #
-    # (
-    #     factor,
-    #     actor,
-    #     actor_explore,
-    #     params,
-    #     params_target,
-    # ) = make_model(test_env)
-    # params_flat = params.flatten_keys(".")
-    #
-    # optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-    # test_env = make_env(
-    #     parallel=False, obs_norm_sd=obs_norm_sd
-    # )
-    # print(actor_explore(test_env.reset()))
-    #
-    # ###############################################################################
-    # # Data: Replay buffer and collector
-    # # ---------------------------------
-    # #
-    # # We need to build a new replay buffer of the appropriate size:
-    # #
-    #
-    # max_size = frames_per_batch // num_workers
-    #
-    # replay_buffer = TensorDictReplayBuffer(
-    #     storage=LazyMemmapStorage(-(-buffer_size // max_size)),
-    #     prefetch=n_optim,
-    # )
-    #
-    # data_collector = MultiaSyncDataCollector(
-    #     [
-    #         make_env(
-    #             parallel=True, obs_norm_sd=obs_norm_sd
-    #         ),
-    #     ]
-    #     * num_collectors,
-    #     policy=actor_explore,
-    #     frames_per_batch=frames_per_batch,
-    #     total_frames=total_frames,
-    #     exploration_mode="random",
-    #     devices=[device] * num_collectors,
-    #     storing_devices=[device] * num_collectors,
-    #     # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    #     # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-    #     split_trajs=False,
-    # )
-    #
-    #
-    # logs_exp2 = defaultdict(list)
-    # prev_traj_count = 0
-    #
-    # ###############################################################################
-    # # Training loop
-    # # -------------
-    # #
-    # # There are very few differences with the training loop above:
-    # #
-    # # - The tensordict received by the collector is used as-is, without being
-    # #   flattened (recall the ``data.view(-1)`` above), to keep the temporal
-    # #   relation between consecutive steps.
-    # # - We use :func:`vec_td_lambda_advantage_estimate` to compute the target
-    # #   value.
-    #
-    # pbar = tqdm.tqdm(total=total_frames)
-    # for j, data in enumerate(data_collector):
-    #     current_frames = data.numel()
-    #     pbar.update(current_frames)
-    #
-    #     replay_buffer.extend(data.cpu())
-    #     if len(logs_exp2["frames"]):
-    #         logs_exp2["frames"].append(current_frames + logs_exp2["frames"][-1])
-    #     else:
-    #         logs_exp2["frames"].append(current_frames)
-    #
-    #     if data["next", "done"].any():
-    #         done = data["next", "done"].squeeze(-1)
-    #         logs_exp2["traj_lengths"].append(
-    #             data["next", "step_count"][done].float().mean().item()
-    #         )
-    #
-    #     if sum(logs_exp2["frames"]) > init_random_frames:
-    #         for _ in range(n_optim):
-    #             sampled_data = replay_buffer.sample(batch_size // max_size)
-    #             sampled_data = sampled_data.clone().to(device, non_blocking=True)
-    #
-    #             reward = sampled_data["next", "reward"]
-    #             done = sampled_data["next", "done"].to(reward.dtype)
-    #             action = sampled_data["action"].clone()
-    #
-    #             sampled_data_out = sampled_data.select(*actor.in_keys)
-    #             sampled_data_out = vmap(factor, (0, None))(sampled_data_out, params)
-    #             action_value = sampled_data_out["action_value"]
-    #             action_value = (action_value * action.to(action_value.dtype)).sum(-1, True)
-    #             with torch.no_grad():
-    #                 tdstep = step_mdp(sampled_data)
-    #                 next_value = vmap(factor, (0, None))(
-    #                     tdstep.select(*actor.in_keys), params
-    #                 )
-    #                 next_value = next_value["chosen_action_value"]
-    #             error = vec_td_lambda_advantage_estimate(
-    #                 gamma,
-    #                 lmbda,
-    #                 action_value,
-    #                 next_value,
-    #                 reward,
-    #                 done,
-    #             ).pow(2)
-    #             error = error.mean()
-    #             error.backward()
-    #
-    #             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-    #
-    #             optim.step()
-    #             optim.zero_grad()
-    #
-    #             # update of the target parameters
-    #             params_target.apply(
-    #                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-    #                 params.detach(),
-    #                 inplace=True,
-    #             )
-    #
-    #         actor_explore.step(current_frames)
-    #
-    #         # Logging
-    #         logs_exp2["grad_vals"].append(float(gv))
-    #
-    #         logs_exp2["losses"].append(error.item())
-    #         logs_exp2["values"].append(action_value.mean().item())
-    #         logs_exp2["traj_count"].append(
-    #             prev_traj_count + data["next", "done"].sum().item()
-    #         )
-    #         prev_traj_count = logs_exp2["traj_count"][-1]
-    #         if j % 10 == 0:
-    #             with set_exploration_mode("mode"), torch.no_grad():
-    #                 # execute a rollout. The `set_exploration_mode("mode")` has
-    #                 # no effect here since the policy is deterministic, but we add
-    #                 # it for completeness
-    #                 eval_rollout = test_env.rollout(
-    #                     max_steps=10000,
-    #                     policy=actor,
-    #                 ).cpu()
-    #             logs_exp2["traj_lengths_eval"].append(eval_rollout.shape[-1])
-    #             logs_exp2["evals"].append(eval_rollout["next", "reward"].sum().item())
-    #             if len(logs_exp2["mavgs"]):
-    #                 logs_exp2["mavgs"].append(
-    #                     logs_exp2["evals"][-1] * 0.05 + logs_exp2["mavgs"][-1] * 0.95
-    #                 )
-    #             else:
-    #                 logs_exp2["mavgs"].append(logs_exp2["evals"][-1])
-    #             logs_exp2["traj_count_eval"].append(logs_exp2["traj_count"][-1])
-    #             pbar.set_description(
-    #                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp2['evals'][-1]: 4.4f}"
-    #             )
-    #
-    #     # update policy weights
-    #     data_collector.update_policy_weights_()
-    #
-    #
-    # ###############################################################################
-    # # TD(:math:`\lambda`) performs significantly better than TD(0) because it
-    # # retrieves a much less biased estimate of the state-action value.
-    # #
-    # # .. code-block:: python
-    # #
-    # #    plot(logs_exp2, "dqn_tdlambda.png")
-    # #
-    # # .. figure:: /_static/img/dqn_tdlambda.png
-    # #    :alt: Cart Pole results with TD(lambda)
-    # #
-    #
-    #
-    # print("shutting down")
-    # data_collector.shutdown()
-    # del data_collector
-    #
-    # ###############################################################################
-    # # Let's compare the results on a single plot. Because the TD(lambda) version
-    # # works better, we'll have fewer episodes collected for a given number of
-    # # frames (as there are more frames per episode).
-    # #
-    # # **Note**: As already mentioned above, to get a more reasonable performance,
-    # # use a greater value for ``total_frames`` e.g. 500000.
-    #
-    #
-    # def plot_both():
-    #     frames_td0 = logs_exp1["frames"]
-    #     frames_tdlambda = logs_exp2["frames"]
-    #     evals_td0 = logs_exp1["evals"]
-    #     evals_tdlambda = logs_exp2["evals"]
-    #     mavgs_td0 = logs_exp1["mavgs"]
-    #     mavgs_tdlambda = logs_exp2["mavgs"]
-    #     traj_count_td0 = logs_exp1["traj_count_eval"]
-    #     traj_count_tdlambda = logs_exp2["traj_count_eval"]
-    #
-    #     plt.figure(figsize=(15, 10))
-    #     plt.subplot(1, 2, 1)
-    #     plt.plot(frames_td0[-len(evals_td0) :], evals_td0, label="return (td0)", alpha=0.5)
-    #     plt.plot(
-    #         frames_tdlambda[-len(evals_tdlambda) :],
-    #         evals_tdlambda,
-    #         label="return (td(lambda))",
-    #         alpha=0.5,
-    #     )
-    #     plt.plot(frames_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-    #     plt.plot(
-    #         frames_tdlambda[-len(mavgs_tdlambda) :],
-    #         mavgs_tdlambda,
-    #         label="mavg (td(lambda))",
-    #     )
-    #     plt.xlabel("frames collected")
-    #     plt.ylabel("trajectory length (= return)")
-    #
-    #     plt.subplot(1, 2, 2)
-    #     plt.plot(
-    #         traj_count_td0[-len(evals_td0) :],
-    #         evals_td0,
-    #         label="return (td0)",
-    #         alpha=0.5,
-    #     )
-    #     plt.plot(
-    #         traj_count_tdlambda[-len(evals_tdlambda) :],
-    #         evals_tdlambda,
-    #         label="return (td(lambda))",
-    #         alpha=0.5,
-    #     )
-    #     plt.plot(traj_count_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-    #     plt.plot(
-    #         traj_count_tdlambda[-len(mavgs_tdlambda) :],
-    #         mavgs_tdlambda,
-    #         label="mavg (td(lambda))",
-    #     )
-    #     plt.xlabel("trajectories collected")
-    #     plt.legend()
-    #
-    #     plt.savefig("dqn.png")
-    #
-    #
-    # ###############################################################################
-    # # .. code-block:: python
-    # #
-    # #    plot_both()
-    # #
-    # # .. figure:: /_static/img/dqn.png
-    # #    :alt: Cart Pole results from the TD(:math:`lambda`) trained policy.
-    # #
-    # # Finally, we generate a new video to check what the algorithm has learnt.
-    # # If all goes well, the duration should be significantly longer than with a
-    # # random rollout.
-    # #
-    # # To get the raw pixels of the rollout, we insert a
-    # # :class:`torchrl.envs.CatTensors` transform that precedes all others and copies
-    # # the ``"pixels"`` key onto a ``"pixels_save"`` key. This is necessary because
-    # # the other transforms that modify this key will update its value in-place in
-    # # the output tensordict.
-    # #
-    #
-    # test_env.transform.insert(0, CatTensors(["pixels"], "pixels_save", del_keys=False))
-    # eval_rollout = test_env.rollout(max_steps=10000, policy=actor, auto_reset=True).cpu()
-    #
-    # # sphinx_gallery_start_ignore
-    # import imageio
-    #
-    # imageio.mimwrite("cartpole.gif", eval_rollout["pixels_save"].numpy(), fps=30)
-    # # sphinx_gallery_end_ignore
-    #
-    # del test_env
-    #
-    # ###############################################################################
-    # # The video of the rollout can be saved using the imageio package:
-    # #
-    # # .. code-block::
-    # #
-    # #   import imageio
-    # #   imageio.mimwrite('cartpole.mp4', eval_rollout["pixels_save"].numpy(), fps=30);
-    # #
-    # # .. figure:: /_static/img/cartpole.gif
-    # #    :alt: Cart Pole results from the TD(:math:`\lambda`) trained policy.
-    #
-    # ###############################################################################
-    # # Conclusion and possible improvements
-    # # ------------------------------------
-    # #
-    # # In this tutorial we have learnt:
-    # #
-    # # - How to train a policy that read pixel-based states, what transforms to
-    # #   include and how to normalize the data;
-    # # - How to create a policy that picks up the action with the highest value
-    # #   with :class:`torchrl.modules.QValueNetwork`;
-    # # - How to build a multiprocessed data collector;
-    # # - How to train a DQN with TD(:math:`\lambda`) returns.
-    # #
-    # # We have seen that using TD(:math:`\lambda`) greatly improved the performance
-    # # of DQN. Other possible improvements could include:
-    # #
-    # # - Using the Multi-Step post-processing. Multi-step will project an action
-    # #   to the nth following step, and create a discounted sum of the rewards in
-    # #   between. This trick can make the algorithm noticebly less myopic. To use
-    # #   this, simply create the collector with
-    # #
-    # #       from torchrl.data.postprocs.postprocs import MultiStep
-    # #       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-    # #
-    # #   where ``n`` is the number of looking-forward steps. Pay attention to the
-    # #   fact that the ``gamma`` factor has to be corrected by the number of
-    # #   steps till the next observation when being passed to
-    # #   ``vec_td_lambda_advantage_estimate``:
-    # #
-    # #       gamma = gamma ** tensordict["steps_to_next_obs"]
-    # # - A prioritized replay buffer could also be used. This will give a
-    # #   higher priority to samples that have the worst value accuracy.
-    # # - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
-    # #   for more information).
-    # # - More fancy exploration techniques, such as NoisyLinear layers and such
-    # #   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
-    # #   ``MLP`` class used in our Dueling DQN).
+# ###############################################################################
+# # We represent the parameters and targets as flat structures, but unflattening
+# # them is quite easy:
+#
+# params_flat = params.flatten_keys(".")
+#
+# ###############################################################################
+# # We will be using the adam optimizer:
+#
+# optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
+#
+# ###############################################################################
+# # We create a test environment for evaluation of the policy:
+#
+# test_env = make_env(
+#     parallel=False, obs_norm_sd=obs_norm_sd
+# )
+# # sanity check:
+# print(actor_explore(test_env.reset()))
+#
+# ###############################################################################
+# # Training loop of a regular DQN
+# # ------------------------------
+# #
+# # We'll start with a simple implementation of DQN where the returns are
+# # computed without bootstrapping, i.e.
+# #
+# # .. math::
+# #
+# #       Q_{t}(s, a) = R(s, a) + \gamma * V_{t+1}(s)
+# #
+# # where :math:`Q(s, a)` is the Q-value of the current state-action pair,
+# # :math:`R(s, a)` is the result of the reward function, and :math:`V(s)` is a
+# # value function that returns 0 for terminating states.
+# #
+# # We store the logs in a defaultdict:
+#
+# logs_exp1 = defaultdict(list)
+# prev_traj_count = 0
+#
+# pbar = tqdm.tqdm(total=total_frames)
+# for j, data in enumerate(data_collector):
+#     current_frames = data.numel()
+#     pbar.update(current_frames)
+#     data = data.view(-1)
+#
+#     # We store the values on the replay buffer, after placing them on CPU.
+#     # When called for the first time, this will instantiate our storage
+#     # object which will print its content.
+#     replay_buffer.extend(data.cpu())
+#
+#     # some logging
+#     if len(logs_exp1["frames"]):
+#         logs_exp1["frames"].append(current_frames + logs_exp1["frames"][-1])
+#     else:
+#         logs_exp1["frames"].append(current_frames)
+#
+#     if data["next", "done"].any():
+#         done = data["next", "done"].squeeze(-1)
+#         logs_exp1["traj_lengths"].append(
+#             data["next", "step_count"][done].float().mean().item()
+#         )
+#
+#     # check that we have enough data to start training
+#     if sum(logs_exp1["frames"]) > init_random_frames:
+#         for _ in range(n_optim):
+#             # sample from the RB and send to device
+#             sampled_data = replay_buffer.sample(batch_size)
+#             sampled_data = sampled_data.to(device, non_blocking=True)
+#
+#             # collect data from RB
+#             reward = sampled_data["next", "reward"].squeeze(-1)
+#             done = sampled_data["next", "done"].squeeze(-1).to(reward.dtype)
+#             action = sampled_data["action"].clone()
+#
+#             # Compute action value (of the action actually taken) at time t
+#             # By default, TorchRL uses one-hot encodings for discrete actions
+#             sampled_data_out = sampled_data.select(*actor.in_keys)
+#             sampled_data_out = factor(sampled_data_out, params=params)
+#             action_value = sampled_data_out["action_value"]
+#             action_value = (action_value * action.to(action_value.dtype)).sum(-1)
+#             with torch.no_grad():
+#                 # compute best action value for the next step, using target parameters
+#                 tdstep = step_mdp(sampled_data)
+#                 next_value = factor(
+#                     tdstep.select(*actor.in_keys),
+#                     params=params_target,
+#                 )["chosen_action_value"].squeeze(-1)
+#                 exp_value = reward + gamma * next_value * (1 - done)
+#             assert exp_value.shape == action_value.shape
+#             # we use MSE loss but L1 or smooth L1 should also work
+#             error = nn.functional.mse_loss(exp_value, action_value).mean()
+#             error.backward()
+#
+#             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
+#
+#             optim.step()
+#             optim.zero_grad()
+#
+#             # update of the target parameters
+#             params_target.apply(
+#                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
+#                 params.detach(),
+#                 inplace=True,
+#             )
+#
+#         actor_explore.step(current_frames)
+#
+#         # Logging
+#         logs_exp1["grad_vals"].append(float(gv))
+#         logs_exp1["losses"].append(error.item())
+#         logs_exp1["values"].append(action_value.mean().item())
+#         logs_exp1["traj_count"].append(
+#             prev_traj_count + data["next", "done"].sum().item()
+#         )
+#         prev_traj_count = logs_exp1["traj_count"][-1]
+#
+#         if j % 10 == 0:
+#             with set_exploration_mode("mode"), torch.no_grad():
+#                 # execute a rollout. The `set_exploration_mode("mode")` has no effect here since the policy is deterministic, but we add it for completeness
+#                 eval_rollout = test_env.rollout(
+#                     max_steps=10000,
+#                     policy=actor,
+#                 ).cpu()
+#             logs_exp1["traj_lengths_eval"].append(eval_rollout.shape[-1])
+#             logs_exp1["evals"].append(eval_rollout["next", "reward"].sum().item())
+#             if len(logs_exp1["mavgs"]):
+#                 logs_exp1["mavgs"].append(
+#                     logs_exp1["evals"][-1] * 0.05 + logs_exp1["mavgs"][-1] * 0.95
+#                 )
+#             else:
+#                 logs_exp1["mavgs"].append(logs_exp1["evals"][-1])
+#             logs_exp1["traj_count_eval"].append(logs_exp1["traj_count"][-1])
+#             pbar.set_description(
+#                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp1['evals'][-1]: 4.4f}"
+#             )
+#
+#     # update policy weights
+#     data_collector.update_policy_weights_()
+#
+# ###############################################################################
+# # We write a custom plot function to display the performance of our algorithm
+# #
+#
+#
+# def plot(logs, name):
+#     plt.figure(figsize=(15, 10))
+#     plt.subplot(2, 3, 1)
+#     plt.plot(
+#         logs["frames"][-len(logs["evals"]) :],
+#         logs["evals"],
+#         label="return (eval)",
+#     )
+#     plt.plot(
+#         logs["frames"][-len(logs["mavgs"]) :],
+#         logs["mavgs"],
+#         label="mavg of returns (eval)",
+#     )
+#     plt.xlabel("frames collected")
+#     plt.ylabel("trajectory length (= return)")
+#     plt.subplot(2, 3, 2)
+#     plt.plot(
+#         logs["traj_count"][-len(logs["evals"]) :],
+#         logs["evals"],
+#         label="return",
+#     )
+#     plt.plot(
+#         logs["traj_count"][-len(logs["mavgs"]) :],
+#         logs["mavgs"],
+#         label="mavg",
+#     )
+#     plt.xlabel("trajectories collected")
+#     plt.legend()
+#     plt.subplot(2, 3, 3)
+#     plt.plot(logs["frames"][-len(logs["losses"]) :], logs["losses"])
+#     plt.xlabel("frames collected")
+#     plt.title("loss")
+#     plt.subplot(2, 3, 4)
+#     plt.plot(logs["frames"][-len(logs["values"]) :], logs["values"])
+#     plt.xlabel("frames collected")
+#     plt.title("value")
+#     plt.subplot(2, 3, 5)
+#     plt.plot(
+#         logs["frames"][-len(logs["grad_vals"]) :],
+#         logs["grad_vals"],
+#     )
+#     plt.xlabel("frames collected")
+#     plt.title("grad norm")
+#     if len(logs["traj_lengths"]):
+#         plt.subplot(2, 3, 6)
+#         plt.plot(logs["traj_lengths"])
+#         plt.xlabel("batches")
+#         plt.title("traj length (training)")
+#     plt.savefig(name)
+#     if is_notebook():
+#         plt.show()
+#
+#
+# ###############################################################################
+# # The performance of the policy can be measured as the length of trajectories.
+# # As we can see on the results of the :func:`plot` function, the performance
+# # of the policy increases, albeit slowly.
+# #
+# # .. code-block:: python
+# #
+# #    plot(logs_exp1, "dqn_td0.png")
+# #
+# # .. figure:: /_static/img/dqn_td0.png
+# #    :alt: Cart Pole results with TD(0)
+# #
+#
+# print("shutting down")
+# data_collector.shutdown()
+# del data_collector
+#
+# ###############################################################################
+# # DQN with TD(:math:`\lambda`)
+# # ----------------------------
+# #
+# # We can improve the above algorithm by getting a better estimate of the
+# # return, using not only the next state value but the whole sequence of rewards
+# # and values that follow a particular step.
+# #
+# # TorchRL provides a vectorized version of TD(lambda) named
+# # :func:`torchrl.objectives.value.functional.vec_td_lambda_advantage_estimate`.
+# # We'll use this to obtain a target value that the value network will be
+# # trained to match.
+# #
+# # The big difference in this implementation is that we'll store entire
+# # trajectories and not single steps in the replay buffer. This will be done
+# # automatically as long as we're not "flattening" the tensordict collected:
+# # by keeping a shape ``[Batch x timesteps]`` and giving this
+# # to the RB, we'll be creating a replay buffer of size
+# # ``[Capacity x timesteps]``.
+#
+#
+# from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
+#
+# ###############################################################################
+# # We reset the actor parameters:
+# #
+#
+# (
+#     factor,
+#     actor,
+#     actor_explore,
+#     params,
+#     params_target,
+# ) = make_model(test_env)
+# params_flat = params.flatten_keys(".")
+#
+# optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
+# test_env = make_env(
+#     parallel=False, obs_norm_sd=obs_norm_sd
+# )
+# print(actor_explore(test_env.reset()))
+#
+# ###############################################################################
+# # Data: Replay buffer and collector
+# # ---------------------------------
+# #
+# # We need to build a new replay buffer of the appropriate size:
+# #
+#
+# max_size = frames_per_batch // num_workers
+#
+# replay_buffer = TensorDictReplayBuffer(
+#     storage=LazyMemmapStorage(-(-buffer_size // max_size)),
+#     prefetch=n_optim,
+# )
+#
+# data_collector = MultiaSyncDataCollector(
+#     [
+#         make_env(
+#             parallel=True, obs_norm_sd=obs_norm_sd
+#         ),
+#     ]
+#     * num_collectors,
+#     policy=actor_explore,
+#     frames_per_batch=frames_per_batch,
+#     total_frames=total_frames,
+#     exploration_mode="random",
+#     devices=[device] * num_collectors,
+#     storing_devices=[device] * num_collectors,
+#     # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
+#     # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
+#     split_trajs=False,
+# )
+#
+#
+# logs_exp2 = defaultdict(list)
+# prev_traj_count = 0
+#
+# ###############################################################################
+# # Training loop
+# # -------------
+# #
+# # There are very few differences with the training loop above:
+# #
+# # - The tensordict received by the collector is used as-is, without being
+# #   flattened (recall the ``data.view(-1)`` above), to keep the temporal
+# #   relation between consecutive steps.
+# # - We use :func:`vec_td_lambda_advantage_estimate` to compute the target
+# #   value.
+#
+# pbar = tqdm.tqdm(total=total_frames)
+# for j, data in enumerate(data_collector):
+#     current_frames = data.numel()
+#     pbar.update(current_frames)
+#
+#     replay_buffer.extend(data.cpu())
+#     if len(logs_exp2["frames"]):
+#         logs_exp2["frames"].append(current_frames + logs_exp2["frames"][-1])
+#     else:
+#         logs_exp2["frames"].append(current_frames)
+#
+#     if data["next", "done"].any():
+#         done = data["next", "done"].squeeze(-1)
+#         logs_exp2["traj_lengths"].append(
+#             data["next", "step_count"][done].float().mean().item()
+#         )
+#
+#     if sum(logs_exp2["frames"]) > init_random_frames:
+#         for _ in range(n_optim):
+#             sampled_data = replay_buffer.sample(batch_size // max_size)
+#             sampled_data = sampled_data.clone().to(device, non_blocking=True)
+#
+#             reward = sampled_data["next", "reward"]
+#             done = sampled_data["next", "done"].to(reward.dtype)
+#             action = sampled_data["action"].clone()
+#
+#             sampled_data_out = sampled_data.select(*actor.in_keys)
+#             sampled_data_out = vmap(factor, (0, None))(sampled_data_out, params)
+#             action_value = sampled_data_out["action_value"]
+#             action_value = (action_value * action.to(action_value.dtype)).sum(-1, True)
+#             with torch.no_grad():
+#                 tdstep = step_mdp(sampled_data)
+#                 next_value = vmap(factor, (0, None))(
+#                     tdstep.select(*actor.in_keys), params
+#                 )
+#                 next_value = next_value["chosen_action_value"]
+#             error = vec_td_lambda_advantage_estimate(
+#                 gamma,
+#                 lmbda,
+#                 action_value,
+#                 next_value,
+#                 reward,
+#                 done,
+#             ).pow(2)
+#             error = error.mean()
+#             error.backward()
+#
+#             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
+#
+#             optim.step()
+#             optim.zero_grad()
+#
+#             # update of the target parameters
+#             params_target.apply(
+#                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
+#                 params.detach(),
+#                 inplace=True,
+#             )
+#
+#         actor_explore.step(current_frames)
+#
+#         # Logging
+#         logs_exp2["grad_vals"].append(float(gv))
+#
+#         logs_exp2["losses"].append(error.item())
+#         logs_exp2["values"].append(action_value.mean().item())
+#         logs_exp2["traj_count"].append(
+#             prev_traj_count + data["next", "done"].sum().item()
+#         )
+#         prev_traj_count = logs_exp2["traj_count"][-1]
+#         if j % 10 == 0:
+#             with set_exploration_mode("mode"), torch.no_grad():
+#                 # execute a rollout. The `set_exploration_mode("mode")` has
+#                 # no effect here since the policy is deterministic, but we add
+#                 # it for completeness
+#                 eval_rollout = test_env.rollout(
+#                     max_steps=10000,
+#                     policy=actor,
+#                 ).cpu()
+#             logs_exp2["traj_lengths_eval"].append(eval_rollout.shape[-1])
+#             logs_exp2["evals"].append(eval_rollout["next", "reward"].sum().item())
+#             if len(logs_exp2["mavgs"]):
+#                 logs_exp2["mavgs"].append(
+#                     logs_exp2["evals"][-1] * 0.05 + logs_exp2["mavgs"][-1] * 0.95
+#                 )
+#             else:
+#                 logs_exp2["mavgs"].append(logs_exp2["evals"][-1])
+#             logs_exp2["traj_count_eval"].append(logs_exp2["traj_count"][-1])
+#             pbar.set_description(
+#                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp2['evals'][-1]: 4.4f}"
+#             )
+#
+#     # update policy weights
+#     data_collector.update_policy_weights_()
+#
+#
+# ###############################################################################
+# # TD(:math:`\lambda`) performs significantly better than TD(0) because it
+# # retrieves a much less biased estimate of the state-action value.
+# #
+# # .. code-block:: python
+# #
+# #    plot(logs_exp2, "dqn_tdlambda.png")
+# #
+# # .. figure:: /_static/img/dqn_tdlambda.png
+# #    :alt: Cart Pole results with TD(lambda)
+# #
+#
+#
+# print("shutting down")
+# data_collector.shutdown()
+# del data_collector
+#
+# ###############################################################################
+# # Let's compare the results on a single plot. Because the TD(lambda) version
+# # works better, we'll have fewer episodes collected for a given number of
+# # frames (as there are more frames per episode).
+# #
+# # **Note**: As already mentioned above, to get a more reasonable performance,
+# # use a greater value for ``total_frames`` e.g. 500000.
+#
+#
+# def plot_both():
+#     frames_td0 = logs_exp1["frames"]
+#     frames_tdlambda = logs_exp2["frames"]
+#     evals_td0 = logs_exp1["evals"]
+#     evals_tdlambda = logs_exp2["evals"]
+#     mavgs_td0 = logs_exp1["mavgs"]
+#     mavgs_tdlambda = logs_exp2["mavgs"]
+#     traj_count_td0 = logs_exp1["traj_count_eval"]
+#     traj_count_tdlambda = logs_exp2["traj_count_eval"]
+#
+#     plt.figure(figsize=(15, 10))
+#     plt.subplot(1, 2, 1)
+#     plt.plot(frames_td0[-len(evals_td0) :], evals_td0, label="return (td0)", alpha=0.5)
+#     plt.plot(
+#         frames_tdlambda[-len(evals_tdlambda) :],
+#         evals_tdlambda,
+#         label="return (td(lambda))",
+#         alpha=0.5,
+#     )
+#     plt.plot(frames_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
+#     plt.plot(
+#         frames_tdlambda[-len(mavgs_tdlambda) :],
+#         mavgs_tdlambda,
+#         label="mavg (td(lambda))",
+#     )
+#     plt.xlabel("frames collected")
+#     plt.ylabel("trajectory length (= return)")
+#
+#     plt.subplot(1, 2, 2)
+#     plt.plot(
+#         traj_count_td0[-len(evals_td0) :],
+#         evals_td0,
+#         label="return (td0)",
+#         alpha=0.5,
+#     )
+#     plt.plot(
+#         traj_count_tdlambda[-len(evals_tdlambda) :],
+#         evals_tdlambda,
+#         label="return (td(lambda))",
+#         alpha=0.5,
+#     )
+#     plt.plot(traj_count_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
+#     plt.plot(
+#         traj_count_tdlambda[-len(mavgs_tdlambda) :],
+#         mavgs_tdlambda,
+#         label="mavg (td(lambda))",
+#     )
+#     plt.xlabel("trajectories collected")
+#     plt.legend()
+#
+#     plt.savefig("dqn.png")
+#
+#
+# ###############################################################################
+# # .. code-block:: python
+# #
+# #    plot_both()
+# #
+# # .. figure:: /_static/img/dqn.png
+# #    :alt: Cart Pole results from the TD(:math:`lambda`) trained policy.
+# #
+# # Finally, we generate a new video to check what the algorithm has learnt.
+# # If all goes well, the duration should be significantly longer than with a
+# # random rollout.
+# #
+# # To get the raw pixels of the rollout, we insert a
+# # :class:`torchrl.envs.CatTensors` transform that precedes all others and copies
+# # the ``"pixels"`` key onto a ``"pixels_save"`` key. This is necessary because
+# # the other transforms that modify this key will update its value in-place in
+# # the output tensordict.
+# #
+#
+# test_env.transform.insert(0, CatTensors(["pixels"], "pixels_save", del_keys=False))
+# eval_rollout = test_env.rollout(max_steps=10000, policy=actor, auto_reset=True).cpu()
+#
+# # sphinx_gallery_start_ignore
+# import imageio
+#
+# imageio.mimwrite("cartpole.gif", eval_rollout["pixels_save"].numpy(), fps=30)
+# # sphinx_gallery_end_ignore
+#
+# del test_env
+#
+# ###############################################################################
+# # The video of the rollout can be saved using the imageio package:
+# #
+# # .. code-block::
+# #
+# #   import imageio
+# #   imageio.mimwrite('cartpole.mp4', eval_rollout["pixels_save"].numpy(), fps=30);
+# #
+# # .. figure:: /_static/img/cartpole.gif
+# #    :alt: Cart Pole results from the TD(:math:`\lambda`) trained policy.
+#
+# ###############################################################################
+# # Conclusion and possible improvements
+# # ------------------------------------
+# #
+# # In this tutorial we have learnt:
+# #
+# # - How to train a policy that read pixel-based states, what transforms to
+# #   include and how to normalize the data;
+# # - How to create a policy that picks up the action with the highest value
+# #   with :class:`torchrl.modules.QValueNetwork`;
+# # - How to build a multiprocessed data collector;
+# # - How to train a DQN with TD(:math:`\lambda`) returns.
+# #
+# # We have seen that using TD(:math:`\lambda`) greatly improved the performance
+# # of DQN. Other possible improvements could include:
+# #
+# # - Using the Multi-Step post-processing. Multi-step will project an action
+# #   to the nth following step, and create a discounted sum of the rewards in
+# #   between. This trick can make the algorithm noticebly less myopic. To use
+# #   this, simply create the collector with
+# #
+# #       from torchrl.data.postprocs.postprocs import MultiStep
+# #       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+# #
+# #   where ``n`` is the number of looking-forward steps. Pay attention to the
+# #   fact that the ``gamma`` factor has to be corrected by the number of
+# #   steps till the next observation when being passed to
+# #   ``vec_td_lambda_advantage_estimate``:
+# #
+# #       gamma = gamma ** tensordict["steps_to_next_obs"]
+# # - A prioritized replay buffer could also be used. This will give a
+# #   higher priority to samples that have the worst value accuracy.
+# # - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
+# #   for more information).
+# # - More fancy exploration techniques, such as NoisyLinear layers and such
+# #   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
+# #   ``MLP`` class used in our Dueling DQN).

From fb81fc318f97faf032c6ddf8c760e24f3fadbd73 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 24 Mar 2023 17:10:33 +0000
Subject: [PATCH 09/89] empty


From d13704849087fc8c36b2258c6c0d73eed9eee113 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 23 Mar 2023 17:31:38 +0000
Subject: [PATCH 10/89] init

---
 torchrl/modules/tensordict_module/sequence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchrl/modules/tensordict_module/sequence.py b/torchrl/modules/tensordict_module/sequence.py
index ee7c181db5d..94bc3aa4970 100644
--- a/torchrl/modules/tensordict_module/sequence.py
+++ b/torchrl/modules/tensordict_module/sequence.py
@@ -13,7 +13,7 @@
 
 
 class SafeSequential(TensorDictSequential, SafeModule):
-    """A sequence of TensorDictModules.
+    """A safe sequence of TensorDictModules.
 
     Similarly to :obj:`nn.Sequence` which passes a tensor through a chain of mappings that read and write a single tensor
     each, this module will read and write over a tensordict by querying each of the input modules.

From adad97d884bac1ca7e7a5b5cc5d4ebc3d19eda11 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 23 Mar 2023 16:41:18 +0000
Subject: [PATCH 11/89] init

---
 torchrl/objectives/common.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 3db3df0e9cd..5ff7c171d7a 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -37,9 +37,12 @@
 class LossModule(nn.Module):
     """A parent class for RL losses.
 
-    LossModule inherits from nn.Module. It is designed to read an input TensorDict and return another tensordict
-    with loss keys named "loss_*".
-    Splitting the loss in its component can then be used by the trainer to log the various loss values throughout
+    LossModule inherits from nn.Module. It is designed to read an input
+    TensorDict and return another tensordict
+    with loss keys named ``"loss_*"``.
+
+    Splitting the loss in its component can then be used by the trainer to log
+    the various loss values throughout
     training. Other scalars present in the output tensordict will be logged too.
 
     """
@@ -75,6 +78,8 @@ def convert_to_functional(
         compare_against: Optional[List[Parameter]] = None,
         funs_to_decorate=None,
     ) -> None:
+        """Converts a module to functional to be used in the loss.
+        """
         if funs_to_decorate is None:
             funs_to_decorate = ["forward"]
         # To make it robust to device casting, we must register list of

From ea206037540abb02630cea02a86fe28339bb376d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 24 Mar 2023 08:39:17 +0000
Subject: [PATCH 12/89] amend

---
 docs/source/reference/objectives.rst   |  1 +
 torchrl/objectives/common.py           | 10 +++++
 torchrl/objectives/dqn.py              | 54 ++++++++++++++++++--------
 torchrl/objectives/utils.py            |  3 ++
 torchrl/objectives/value/__init__.py   |  2 +-
 torchrl/objectives/value/advantages.py | 41 +++++++++++++++++--
 6 files changed, 89 insertions(+), 22 deletions(-)

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index 84b7d0a2cb7..2a7a2f92be7 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -108,6 +108,7 @@ Returns
     :toctree: generated/
     :template: rl_template_noinherit.rst
 
+    ValueFunctionBase
     GAE
     TDLambdaEstimate
     TDEstimate
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 5ff7c171d7a..d3be7b75c3e 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -18,6 +18,7 @@
 from torch.nn import Parameter
 
 from torchrl.modules.utils import Buffer
+from torchrl.objectives.value import ValueFunctionBase
 
 _has_functorch = False
 try:
@@ -353,3 +354,12 @@ def half(self) -> LossModule:
 
     def cpu(self) -> LossModule:
         return self.to(torch.device("cpu"))
+
+    def _default_value_function(self) -> ValueFunctionBase:
+        """A value-function constructor when none is provided.
+
+        No kwarg should be present as default parameters should be retrieved
+        from :obj:`torchrl.objectives.utils.DEFAULT_VALUE_FUN_PARAMS`.
+
+        """
+        raise NotImplementedError
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index d79f202fca4..05ec29fe492 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -3,11 +3,11 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Union
+from typing import Union, Optional
 
 import torch
-from tensordict import TensorDict
-from tensordict.tensordict import TensorDictBase
+from tensordict import TensorDict, TensorDictBase
+from tensordict.nn import make_functional
 from torch import nn
 
 from torchrl.envs.utils import step_mdp
@@ -15,7 +15,8 @@
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 
 from .common import LossModule
-from .utils import distance_loss, next_state_value
+from .utils import distance_loss, next_state_value, DEFAULT_VALUE_FUN_PARAMS
+from .value import ValueFunctionBase, TDLambdaEstimate
 
 
 class DQNLoss(LossModule):
@@ -33,7 +34,7 @@ class DQNLoss(LossModule):
     def __init__(
         self,
         value_network: Union[QValueActor, nn.Module],
-        gamma: float,
+        value_function: Optional[ValueFunctionBase]=None,
         loss_function: str = "l2",
         priority_key: str = "td_error",
         delay_value: bool = False,
@@ -41,10 +42,12 @@ def __init__(
 
         super().__init__()
         self.delay_value = delay_value
-
+        if value_function is not None and value_function.value_network is not value_network:
+            raise RuntimeError("value_function.value_network and value_network must match.")
         value_network = ensure_tensordict_compatible(
             module=value_network, wrapper_type=QValueActor
         )
+        self.value_function.value_key = "chosen_action_value"
 
         self.convert_to_functional(
             value_network,
@@ -52,13 +55,30 @@ def __init__(
             create_target_params=self.delay_value,
         )
 
+        make_functional(self.value_network)
+
+        if value_function is None:
+            value_function = self._default_value_function()
+        self.value_function = value_function
+
         self.value_network_in_keys = value_network.in_keys
 
-        self.register_buffer("gamma", torch.tensor(gamma))
         self.loss_function = loss_function
         self.priority_key = priority_key
         self.action_space = self.value_network.action_space
 
+    def _default_value_function(self):
+        return TDLambdaEstimate(gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
+        lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
+        value_network=self.value_network,
+        average_rewards=True,
+        differentiable=False,
+        vectorized=True,
+        advantage_key="advantage",
+        value_target_key = "value_target",
+        value_key="chosen_action_value",
+        )
+
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         """Computes the DQN loss given a tensordict sampled from the replay buffer.
 
@@ -106,14 +126,9 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             action = action.to(torch.float)
             pred_val_index = (pred_val * action).sum(-1)
 
-        with torch.no_grad():
-            target_value = next_state_value(
-                tensordict,
-                self.value_network,
-                gamma=self.gamma,
-                params=self.target_value_network_params,
-                next_val_key="chosen_action_value",
-            )
+        self.value_function(tensordict, self.value_network_parameters, self.target_value_network_parameters)
+        target_value = tensordict[self.value_function.value_target_key]
+
         priority_tensor = (pred_val_index - target_value).pow(2)
         priority_tensor = priority_tensor.detach().unsqueeze(-1)
         if input_tensordict.device is not None:
@@ -150,12 +165,14 @@ class DistributionalDQNLoss(LossModule):
     def __init__(
         self,
         value_network: Union[DistributionalQValueActor, nn.Module],
-        gamma: float,
+        value_function: ValueFunctionBase,
         priority_key: str = "td_error",
         delay_value: bool = False,
     ):
         super().__init__()
-        self.register_buffer("gamma", torch.tensor(gamma))
+        self.value_function = value_function
+        if self.value_function.value_network is not value_network:
+            raise RuntimeError("value_function.value_network and value_network must match.")
         self.priority_key = priority_key
         self.delay_value = delay_value
 
@@ -168,6 +185,9 @@ def __init__(
             "value_network",
             create_target_params=self.delay_value,
         )
+
+        make_functional(self.value_function.value_network)
+
         self.action_space = self.value_network.action_space
 
     @staticmethod
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 5c30e7d2244..7c85075b19b 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -14,6 +14,9 @@
 
 from torchrl.envs.utils import step_mdp
 
+class DEFAULT_VALUE_FUN_PARAMS:
+    gamma: 0.99
+    lmbda: 0.95
 
 class _context_manager:
     def __init__(self, value=True):
diff --git a/torchrl/objectives/value/__init__.py b/torchrl/objectives/value/__init__.py
index 11e8f316f0b..6152732f411 100644
--- a/torchrl/objectives/value/__init__.py
+++ b/torchrl/objectives/value/__init__.py
@@ -3,4 +3,4 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .advantages import GAE, TDEstimate, TDLambdaEstimate
+from .advantages import GAE, TDEstimate, TDLambdaEstimate, ValueFunctionBase
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index d2e9b05dcf2..6ed0188502e 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import abc
 from functools import wraps
 from typing import List, Optional, Tuple, Union
 
@@ -30,8 +30,41 @@ def new_fun(self, *args, **kwargs):
 
     return new_fun
 
+class ValueFunctionBase(nn.Module):
+    """An abstract parent class for value function modules."""
+
+    value_network: TensorDictModule
+    value_key: Union[Tuple[str], str]
+
+    @abc.abstractmethod
+    def forward(
+        self,
+        tensordict: TensorDictBase,
+        params: Optional[TensorDictBase] = None,
+        target_params: Optional[TensorDictBase] = None,
+    ) -> TensorDictBase:
+        """Computes the a value estimate given the data in tensordict.
+
+        If a functional module is provided, a nested TensorDict containing the parameters
+        (and if relevant the target parameters) can be passed to the module.
+
+        Args:
+            tensordict (TensorDictBase): A TensorDict containing the data
+                (an observation key, "action", ("next", "reward"), ("next", "done") and "next" tensordict state
+                as returned by the environment) necessary to compute the value estimates and the TDEstimate.
+                The data passed to this module should be structured as :obj:`[*B, T, F]` where :obj:`B` are
+                the batch size, :obj:`T` the time dimension and :obj:`F` the feature dimension(s).
+            params (TensorDictBase, optional): A nested TensorDict containing the params
+                to be passed to the functional value network module.
+            target_params (TensorDictBase, optional): A nested TensorDict containing the
+                target params to be passed to the functional value network module.
+
+        Returns:
+            An updated TensorDict with an advantage and a value_error keys as defined in the constructor.
+        """
+        raise NotImplementedError
 
-class TDEstimate(nn.Module):
+class TDEstimate(ValueFunctionBase):
     """Temporal Difference estimate of advantage function.
 
     Args:
@@ -198,7 +231,7 @@ def forward(
         return tensordict
 
 
-class TDLambdaEstimate(nn.Module):
+class TDLambdaEstimate(ValueFunctionBase):
     """TD-Lambda estimate of advantage function.
 
     Args:
@@ -384,7 +417,7 @@ def forward(
         return tensordict
 
 
-class GAE(nn.Module):
+class GAE(ValueFunctionBase):
     """A class wrapper around the generalized advantage estimate functional.
 
     Refer to "HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION"

From e8bf4c1395c565acb3de051a43f28c58f167578f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 24 Mar 2023 11:07:02 +0000
Subject: [PATCH 13/89] amend

---
 test/test_cost.py                      |  6 ++--
 torchrl/objectives/common.py           |  3 +-
 torchrl/objectives/dqn.py              | 50 ++++++++++++++++----------
 torchrl/objectives/utils.py            |  6 ++--
 torchrl/objectives/value/advantages.py |  2 ++
 5 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/test/test_cost.py b/test/test_cost.py
index ccb6a798d6c..2359c34f6df 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -109,7 +109,7 @@ def __enter__(self):
         pass
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        assert (self.td.select(*self.td_clone.keys()) == self.td_clone).all()
+        assert (self.td.select(*self.td_clone.keys()) == self.td_clone).all(), "Some keys have been modified in the tensordict!"
 
 
 def get_devices():
@@ -301,7 +301,7 @@ def test_dqn(self, delay_value, device, action_spec_type):
         td = self._create_mock_data_dqn(
             action_spec_type=action_spec_type, device=device
         )
-        loss_fn = DQNLoss(actor, gamma=0.9, loss_function="l2", delay_value=delay_value)
+        loss_fn = DQNLoss(actor, loss_function="l2", delay_value=delay_value)
         with _check_td_steady(td):
             loss = loss_fn(td)
         assert loss_fn.priority_key in td.keys()
@@ -341,7 +341,7 @@ def test_dqn_batcher(self, n, delay_value, device, action_spec_type, gamma=0.9):
             action_spec_type=action_spec_type, device=device
         )
         loss_fn = DQNLoss(
-            actor, gamma=gamma, loss_function="l2", delay_value=delay_value
+            actor, loss_function="l2", delay_value=delay_value
         )
 
         ms = MultiStep(gamma=gamma, n_steps=n).to(device)
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index d3be7b75c3e..134da3b1e34 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -79,8 +79,7 @@ def convert_to_functional(
         compare_against: Optional[List[Parameter]] = None,
         funs_to_decorate=None,
     ) -> None:
-        """Converts a module to functional to be used in the loss.
-        """
+        """Converts a module to functional to be used in the loss."""
         if funs_to_decorate is None:
             funs_to_decorate = ["forward"]
         # To make it robust to device casting, we must register list of
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 05ec29fe492..1ffe6d7c4eb 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Union, Optional
+from typing import Optional, Union
 
 import torch
 from tensordict import TensorDict, TensorDictBase
@@ -15,8 +15,8 @@
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 
 from .common import LossModule
-from .utils import distance_loss, next_state_value, DEFAULT_VALUE_FUN_PARAMS
-from .value import ValueFunctionBase, TDLambdaEstimate
+from .utils import DEFAULT_VALUE_FUN_PARAMS, distance_loss, next_state_value
+from .value import TDLambdaEstimate, ValueFunctionBase
 
 
 class DQNLoss(LossModule):
@@ -34,7 +34,7 @@ class DQNLoss(LossModule):
     def __init__(
         self,
         value_network: Union[QValueActor, nn.Module],
-        value_function: Optional[ValueFunctionBase]=None,
+        value_function: Optional[ValueFunctionBase] = None,
         loss_function: str = "l2",
         priority_key: str = "td_error",
         delay_value: bool = False,
@@ -42,12 +42,16 @@ def __init__(
 
         super().__init__()
         self.delay_value = delay_value
-        if value_function is not None and value_function.value_network is not value_network:
-            raise RuntimeError("value_function.value_network and value_network must match.")
+        if (
+            value_function is not None
+            and value_function.value_network is not value_network
+        ):
+            raise RuntimeError(
+                "value_function.value_network and value_network must match."
+            )
         value_network = ensure_tensordict_compatible(
             module=value_network, wrapper_type=QValueActor
         )
-        self.value_function.value_key = "chosen_action_value"
 
         self.convert_to_functional(
             value_network,
@@ -59,6 +63,8 @@ def __init__(
 
         if value_function is None:
             value_function = self._default_value_function()
+        else:
+            value_function.value_key = "chosen_action_value"
         self.value_function = value_function
 
         self.value_network_in_keys = value_network.in_keys
@@ -68,15 +74,16 @@ def __init__(
         self.action_space = self.value_network.action_space
 
     def _default_value_function(self):
-        return TDLambdaEstimate(gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
-        lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
-        value_network=self.value_network,
-        average_rewards=True,
-        differentiable=False,
-        vectorized=True,
-        advantage_key="advantage",
-        value_target_key = "value_target",
-        value_key="chosen_action_value",
+        return TDLambdaEstimate(
+            gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
+            lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
+            value_network=self.value_network,
+            average_rewards=True,
+            differentiable=False,
+            vectorized=True,
+            advantage_key="advantage",
+            value_target_key="value_target",
+            value_key="chosen_action_value",
         )
 
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
@@ -126,8 +133,11 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             action = action.to(torch.float)
             pred_val_index = (pred_val * action).sum(-1)
 
-        self.value_function(tensordict, self.value_network_parameters, self.target_value_network_parameters)
-        target_value = tensordict[self.value_function.value_target_key]
+        target_value = self.value_function(
+            tensordict.clone(False),
+            self.value_network_params,
+            self.target_value_network_params,
+        ).get(self.value_function.value_target_key).squeeze(-1)
 
         priority_tensor = (pred_val_index - target_value).pow(2)
         priority_tensor = priority_tensor.detach().unsqueeze(-1)
@@ -172,7 +182,9 @@ def __init__(
         super().__init__()
         self.value_function = value_function
         if self.value_function.value_network is not value_network:
-            raise RuntimeError("value_function.value_network and value_network must match.")
+            raise RuntimeError(
+                "value_function.value_network and value_network must match."
+            )
         self.priority_key = priority_key
         self.delay_value = delay_value
 
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 7c85075b19b..e18b6c841c4 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -14,9 +14,11 @@
 
 from torchrl.envs.utils import step_mdp
 
+
 class DEFAULT_VALUE_FUN_PARAMS:
-    gamma: 0.99
-    lmbda: 0.95
+    gamma = 0.99
+    lmbda = 0.95
+
 
 class _context_manager:
     def __init__(self, value=True):
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 6ed0188502e..7f91bc349c3 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -30,6 +30,7 @@ def new_fun(self, *args, **kwargs):
 
     return new_fun
 
+
 class ValueFunctionBase(nn.Module):
     """An abstract parent class for value function modules."""
 
@@ -64,6 +65,7 @@ def forward(
         """
         raise NotImplementedError
 
+
 class TDEstimate(ValueFunctionBase):
     """Temporal Difference estimate of advantage function.
 

From d178f93c391bf9227d2494eb4cb4141cd37f9e1c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 24 Mar 2023 13:00:31 +0000
Subject: [PATCH 14/89] value_estimate and sac init

---
 torchrl/objectives/dqn.py              |  23 +-
 torchrl/objectives/sac.py              | 308 +++----------------------
 torchrl/objectives/value/advantages.py |  35 ++-
 3 files changed, 72 insertions(+), 294 deletions(-)

diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 1ffe6d7c4eb..57329cb0404 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -24,7 +24,8 @@ class DQNLoss(LossModule):
 
     Args:
         value_network (QValueActor or nn.Module): a Q value operator.
-        gamma (scalar): a discount factor for return computation.
+        value_function (ValueFunctionBase, optional): the value function module
+            to be used. Defaults to :class:`torchrl.objectives.values.TDLambdaEstimate`.
         loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
         delay_value (bool, optional): whether to duplicate the value network into a new target value network to
             create a double DQN. Default is :obj:`False`.
@@ -59,8 +60,6 @@ def __init__(
             create_target_params=self.delay_value,
         )
 
-        make_functional(self.value_network)
-
         if value_function is None:
             value_function = self._default_value_function()
         else:
@@ -169,22 +168,23 @@ class DistributionalDQNLoss(LossModule):
         value_network (DistributionalQValueActor or nn.Module): the distributional Q
             value operator.
         gamma (scalar): a discount factor for return computation.
-        delay_value (bool): whether to duplicate the value network into a new target value network to create double DQN
+            .. note::
+              Unlike :class:`DQNLoss`, this class does not currently support
+              custom value functions. The next value estimation is not
+              bootstrapped.
+        delay_value (bool): whether to duplicate the value network into a new
+            target value network to create double DQN
     """
 
     def __init__(
         self,
         value_network: Union[DistributionalQValueActor, nn.Module],
-        value_function: ValueFunctionBase,
+        gamma: float,
         priority_key: str = "td_error",
         delay_value: bool = False,
     ):
         super().__init__()
-        self.value_function = value_function
-        if self.value_function.value_network is not value_network:
-            raise RuntimeError(
-                "value_function.value_network and value_network must match."
-            )
+        self.register_buffer("gamma", torch.tensor(gamma))
         self.priority_key = priority_key
         self.delay_value = delay_value
 
@@ -197,9 +197,6 @@ def __init__(
             "value_network",
             create_target_params=self.delay_value,
         )
-
-        make_functional(self.value_function.value_network)
-
         self.action_space = self.value_network.action_space
 
     @staticmethod
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 48ca586245e..f5318957d1a 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -15,7 +15,9 @@
 
 from torchrl.modules import ProbabilisticActor
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import distance_loss, next_state_value
+from torchrl.objectives.utils import distance_loss, next_state_value, \
+    DEFAULT_VALUE_FUN_PARAMS
+from .value import ValueFunctionBase, TDLambdaEstimate
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
@@ -39,14 +41,18 @@ class SACLoss(LossModule):
 
     Args:
         actor_network (ProbabilisticActor): stochastic actor
-        qvalue_network (TensorDictModule): Q(s, a) parametric model
-        value_network (TensorDictModule, optional): V(s) parametric model. If not
-            provided, the second version of SAC is assumed.
-        gamma (number, optional): discount for return computation
-            Default is 0.99
+        qvalue_network (TensorDictModule): Q(s, a) parametric model.
+            This module typically outputs a ``"state_action_value"`` entry.
+        value_network (TensorDictModule, optional): V(s) parametric model.
+            This module typically outputs a ``"state_value"`` entry.
+            .. note::
+              If not provided, the second version of SAC is assumed, where
+              only the Q-Value network is needed.
+        value_function (ValueFunctionBase, optional): the value function module
+            to be used. Defaults to :class:`torchrl.objectives.values.TDLambdaEstimate`.
         priority_key (str, optional): tensordict key where to write the
-            priority (for prioritized replay buffer usage). Default is
-            `"td_error"`.
+            priority (for prioritized replay buffer usage). Defaults to
+            ``"td_error"``.
         loss_function (str, optional): loss function to be used with
             the value function loss. Default is `"smooth_l1"`.
         alpha_init (float, optional): initial entropy multiplier.
@@ -78,8 +84,8 @@ def __init__(
         actor_network: ProbabilisticActor,
         qvalue_network: TensorDictModule,
         value_network: Optional[TensorDictModule] = None,
+        value_function: Optional[ValueFunctionBase] = None,
         num_qvalue_nets: int = 2,
-        gamma: Number = 0.99,
         priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         alpha_init: float = 1.0,
@@ -132,7 +138,6 @@ def __init__(
             compare_against=list(actor_network.parameters()) + value_params,
         )
 
-        self.register_buffer("gamma", torch.tensor(gamma))
         self.priority_key = priority_key
         self.loss_function = loss_function
         try:
@@ -174,6 +179,26 @@ def __init__(
             )
             make_functional(self.actor_critic)
 
+        if value_function is None:
+            value_function = self._default_value_function()
+        else:
+            value_function.value_key = "chosen_action_value"
+        self.value_function = value_function
+
+
+    def _default_value_function(self):
+        return TDLambdaEstimate(
+            gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
+            lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
+            value_network=self.value_network if self._version == 1 else self.qvalue_network,
+            average_rewards=True,
+            differentiable=False,
+            vectorized=True,
+            advantage_key="advantage",
+            value_target_key="value_target",
+            value_key="state_action_value" if self._version == 2 else "state_value",
+        )
+
     @property
     def device(self) -> torch.device:
         for p in self.parameters():
@@ -409,266 +434,3 @@ def _alpha(self):
         with torch.no_grad():
             alpha = self.log_alpha.exp()
         return alpha
-
-
-class DiscreteSACLoss(LossModule):
-    """Discrete SAC Loss module.
-
-    Args:
-        actor_network (ProbabilisticActor): the actor to be trained
-        qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
-        num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        gamma (Number, optional): gamma decay factor. Default is 0.99.
-        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
-            `"td_error"`.
-        loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
-            "l1", Default is "smooth_l1".
-        alpha_init (float, optional): initial entropy multiplier.
-            Default is 1.0.
-        min_alpha (float, optional): min value of alpha.
-            Default is 0.1.
-        max_alpha (float, optional): max value of alpha.
-            Default is 10.0.
-        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is :obj:`False`.
-        target_entropy_weight (float, optional): weight for the target entropy term.
-        target_entropy (Union[str, Number], optional): Target entropy for the stochastic policy. Default is "auto".
-        delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
-            for data collection. Default is :obj:`False`.
-    """
-
-    delay_actor: bool = False
-
-    def __init__(
-        self,
-        actor_network: ProbabilisticActor,
-        qvalue_network: TensorDictModule,
-        num_actions: int,
-        num_qvalue_nets: int = 2,
-        gamma: Number = 0.99,
-        priotity_key: str = "td_error",
-        loss_function: str = "smooth_l1",
-        alpha_init: float = 1.0,
-        min_alpha: float = 0.1,
-        max_alpha: float = 10.0,
-        fixed_alpha: bool = False,
-        target_entropy_weight: float = 0.98,
-        target_entropy: Union[str, Number] = "auto",
-        delay_qvalue: bool = True,
-    ):
-        if not _has_functorch:
-            raise ImportError("Failed to import functorch.") from FUNCTORCH_ERROR
-        super().__init__()
-        self.convert_to_functional(
-            actor_network,
-            "actor_network",
-            create_target_params=self.delay_actor,
-            funs_to_decorate=["forward", "get_dist_params"],
-        )
-
-        self.delay_qvalue = delay_qvalue
-        self.convert_to_functional(
-            qvalue_network,
-            "qvalue_network",
-            num_qvalue_nets,
-            create_target_params=self.delay_qvalue,
-            compare_against=list(actor_network.parameters()),
-        )
-        self.num_qvalue_nets = num_qvalue_nets
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
-        self.loss_function = loss_function
-
-        try:
-            device = next(self.parameters()).device
-        except AttributeError:
-            device = torch.device("cpu")
-
-        self.register_buffer("alpha_init", torch.tensor(alpha_init, device=device))
-        self.register_buffer(
-            "min_log_alpha", torch.tensor(min_alpha, device=device).log()
-        )
-        self.register_buffer(
-            "max_log_alpha", torch.tensor(max_alpha, device=device).log()
-        )
-        self.fixed_alpha = fixed_alpha
-        if fixed_alpha:
-            self.register_buffer(
-                "log_alpha", torch.tensor(math.log(alpha_init), device=device)
-            )
-        else:
-            self.register_parameter(
-                "log_alpha",
-                torch.nn.Parameter(torch.tensor(math.log(alpha_init), device=device)),
-            )
-
-        if target_entropy == "auto":
-            target_entropy = -float(np.log(1.0 / num_actions) * target_entropy_weight)
-        self.register_buffer(
-            "target_entropy", torch.tensor(target_entropy, device=device)
-        )
-
-    @property
-    def alpha(self):
-        self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
-        with torch.no_grad():
-            alpha = self.log_alpha.exp()
-        return alpha
-
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        obs_keys = self.actor_network.in_keys
-        tensordict_select = tensordict.select("next", *obs_keys, "action")
-
-        actor_params = torch.stack(
-            [self.actor_network_params, self.target_actor_network_params], 0
-        )
-
-        tensordict_actor_grad = tensordict_select.select(
-            *obs_keys
-        )  # to avoid overwriting keys
-        next_td_actor = step_mdp(tensordict_select).select(
-            *self.actor_network.in_keys
-        )  # next_observation ->
-        tensordict_actor = torch.stack([tensordict_actor_grad, next_td_actor], 0)
-        tensordict_actor = tensordict_actor.contiguous()
-
-        with set_exploration_mode("random"):
-            # vmap doesn't support sampling, so we take it out from the vmap
-            td_params = vmap(self.actor_network.get_dist_params)(
-                tensordict_actor,
-                actor_params,
-            )
-            if isinstance(self.actor_network, ProbabilisticActor):
-                tensordict_actor_dist = self.actor_network.build_dist_from_params(
-                    td_params
-                )
-            else:
-                tensordict_actor_dist = self.actor_network.build_dist_from_params(
-                    td_params
-                )
-            probs = tensordict_actor_dist.probs
-            z = (probs == 0.0).float() * 1e-8
-            logp_pi = torch.log(probs + z)
-            logp_pi_pol = torch.sum(probs * logp_pi, dim=-1, keepdim=True)
-
-        # repeat tensordict_actor to match the qvalue size
-        _actor_loss_td = (
-            tensordict_actor[0]
-            .select(*self.qvalue_network.in_keys)
-            .expand(self.num_qvalue_nets, *tensordict_actor[0].batch_size)
-        )  # for actor loss
-        _qval_td = tensordict_select.select(*self.qvalue_network.in_keys).expand(
-            self.num_qvalue_nets,
-            *tensordict_select.select(*self.qvalue_network.in_keys).batch_size,
-        )  # for qvalue loss
-        _next_val_td = (
-            tensordict_actor[1]
-            .select(*self.qvalue_network.in_keys)
-            .expand(self.num_qvalue_nets, *tensordict_actor[1].batch_size)
-        )  # for next value estimation
-        tensordict_qval = torch.cat(
-            [
-                _actor_loss_td,
-                _next_val_td,
-                _qval_td,
-            ],
-            0,
-        )
-
-        # cat params
-        q_params_detach = self.qvalue_network_params.detach()
-        qvalue_params = torch.cat(
-            [
-                q_params_detach,
-                self.target_qvalue_network_params,
-                self.qvalue_network_params,
-            ],
-            0,
-        )
-        tensordict_qval = vmap(self.qvalue_network)(
-            tensordict_qval,
-            qvalue_params,
-        )
-
-        state_action_value = tensordict_qval.get("state_value").squeeze(-1)
-        (
-            state_action_value_actor,
-            next_state_action_value_qvalue,
-            state_action_value_qvalue,
-        ) = state_action_value.split(
-            [self.num_qvalue_nets, self.num_qvalue_nets, self.num_qvalue_nets],
-            dim=0,
-        )
-
-        loss_actor = -(
-            (state_action_value_actor.min(0)[0] * probs[0]).sum(-1, keepdim=True)
-            - self.alpha * logp_pi_pol[0]
-        ).mean()
-
-        pred_next_val = (
-            probs[1]
-            * (next_state_action_value_qvalue.min(0)[0] - self.alpha * logp_pi[1])
-        ).sum(dim=-1, keepdim=True)
-
-        target_value = next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=pred_next_val,
-        )
-
-        actions = torch.argmax(tensordict_select["action"], dim=-1)
-
-        pred_val_1 = (
-            state_action_value_qvalue[0].gather(-1, actions.unsqueeze(-1)).unsqueeze(0)
-        )
-        pred_val_2 = (
-            state_action_value_qvalue[1].gather(-1, actions.unsqueeze(-1)).unsqueeze(0)
-        )
-        pred_val = torch.cat([pred_val_1, pred_val_2], dim=0).squeeze()
-        td_error = (pred_val - target_value.expand_as(pred_val)).pow(2)
-        loss_qval = (
-            distance_loss(
-                pred_val,
-                target_value.expand_as(pred_val),
-                loss_function=self.loss_function,
-            )
-            .mean(-1)
-            .sum()
-            * 0.5
-        )
-
-        tensordict.set("td_error", td_error.detach().max(0)[0])
-
-        loss_alpha = self._loss_alpha(logp_pi_pol)
-        if not loss_qval.shape == loss_actor.shape:
-            raise RuntimeError(
-                f"QVal and actor loss have different shape: {loss_qval.shape} and {loss_actor.shape}"
-            )
-        td_out = TensorDict(
-            {
-                "loss_actor": loss_actor.mean(),
-                "loss_qvalue": loss_qval.mean(),
-                "loss_alpha": loss_alpha.mean(),
-                "alpha": self.alpha.detach(),
-                "entropy": -logp_pi.mean().detach(),
-                "state_action_value_actor": state_action_value_actor.mean().detach(),
-                "action_log_prob_actor": logp_pi.mean().detach(),
-                "next.state_value": pred_next_val.mean().detach(),
-                "target_value": target_value.mean().detach(),
-            },
-            [],
-        )
-
-        return td_out
-
-    def _loss_alpha(self, log_pi: Tensor) -> Tensor:
-        if torch.is_grad_enabled() and not log_pi.requires_grad:
-            raise RuntimeError(
-                "expected log_pi to require gradient for the alpha loss)"
-            )
-        if self.target_entropy is not None:
-            # we can compute this loss even if log_alpha is not a parameter
-            alpha_loss = -self.log_alpha.exp() * (log_pi.detach() + self.target_entropy)
-        else:
-            # placeholder
-            alpha_loss = torch.zeros_like(log_pi)
-        return alpha_loss
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 7f91bc349c3..65a0f81c7c8 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -65,6 +65,20 @@ def forward(
         """
         raise NotImplementedError
 
+    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+        """Gets a value estimate, usually used as a target value for the value network.
+
+        Args:
+            tensordict (TensorDictBase): the tensordict containing the data to
+                read.
+            requires_grad (bool, optional): whether the estimate should be part
+                of a computational graph.
+                Defaults to ``False``.
+            target_params (TensorDictBase, optional): A nested TensorDict containing the
+                target params to be passed to the functional value network module.
+
+        """
+        raise NotImplementedError
 
 class TDEstimate(ValueFunctionBase):
     """Temporal Difference estimate of advantage function.
@@ -202,7 +216,6 @@ def forward(
                 ("next", "reward"), reward
             )  # we must update the rewards if they are used later in the code
 
-        gamma = self.gamma
         kwargs = {}
         if self.is_functional and params is None:
             raise RuntimeError(
@@ -214,24 +227,30 @@ def forward(
             self.value_network(tensordict, **kwargs)
             value = tensordict.get(self.value_key)
 
+        if params is not None and target_params is None:
+            target_params = params.detach()
+        value_target = self.value_estimate(tensordict, target_params=target_params)
+        tensordict.set("advantage", value_target - value)
+        tensordict.set("value_target", value_target)
+        return tensordict
+
+    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+        kwargs = {}
+        gamma = self.gamma
         # we may still need to pass gradient, but we don't want to assign grads to
         # value net params
+        reward = tensordict.get(("next", "reward"))
         step_td = step_mdp(tensordict)
         if target_params is not None:
             # we assume that target parameters are not differentiable
             kwargs["params"] = target_params
-        elif "params" in kwargs:
-            kwargs["params"] = kwargs["params"].detach()
         with hold_out_net(self.value_network):
             self.value_network(step_td, **kwargs)
             next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
-        adv = td_advantage_estimate(gamma, value, next_value, reward, done)
-        tensordict.set("advantage", adv)
-        tensordict.set("value_target", adv + value)
-        return tensordict
-
+        value_target = td_advantage_estimate(gamma, torch.zeros_like(next_value), next_value, reward, done)
+        return value_target
 
 class TDLambdaEstimate(ValueFunctionBase):
     """TD-Lambda estimate of advantage function.

From efb57a85cf6ff851b2d9beb54501ed732b1fefca Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 24 Mar 2023 14:08:07 +0000
Subject: [PATCH 15/89] temp

---
 torchrl/objectives/value/advantages.py | 52 ++++++++++++++------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 65a0f81c7c8..d339ebf8c3b 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -208,13 +208,6 @@ def forward(
                 "Expected input tensordict to have at least one dimensions, got"
                 f"tensordict.batch_size = {tensordict.batch_size}"
             )
-        reward = tensordict.get(("next", "reward"))
-        if self.average_rewards:
-            reward = reward - reward.mean()
-            reward = reward / reward.std().clamp_min(1e-4)
-            tensordict.set(
-                ("next", "reward"), reward
-            )  # we must update the rewards if they are used later in the code
 
         kwargs = {}
         if self.is_functional and params is None:
@@ -240,6 +233,12 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
         # we may still need to pass gradient, but we don't want to assign grads to
         # value net params
         reward = tensordict.get(("next", "reward"))
+        if self.average_rewards:
+            reward = reward - reward.mean()
+            reward = reward / reward.std().clamp_min(1e-4)
+            tensordict.set(
+                ("next", "reward"), reward
+            )  # we must update the rewards if they are used later in the code
         step_td = step_mdp(tensordict)
         if target_params is not None:
             # we assume that target parameters are not differentiable
@@ -389,17 +388,6 @@ def forward(
                 "Expected input tensordict to have at least one dimensions, got"
                 f"tensordict.batch_size = {tensordict.batch_size}"
             )
-        reward = tensordict.get(("next", "reward"))
-        if self.average_rewards:
-            reward = reward - reward.mean()
-            reward = reward / reward.std().clamp_min(1e-4)
-            tensordict.set(
-                ("next", "reward"), reward
-            )  # we must update the rewards if they are used later in the code
-
-        gamma = self.gamma
-        lmbda = self.lmbda
-
         kwargs = {}
         if self.is_functional and params is None:
             raise RuntimeError(
@@ -410,13 +398,33 @@ def forward(
         with hold_out_net(self.value_network):
             self.value_network(tensordict, **kwargs)
             value = tensordict.get(self.value_key)
+        if params is not None and target_params is None:
+            target_params = params.detach()
+        value_target = self.value_estimate(tensordict, target_params=target_params)
+
+        tensordict.set(self.advantage_key, value_target-value)
+        tensordict.set(self.value_target_key, value_target)
+        return tensordict
+
+    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+
+        gamma = self.gamma
+        lmbda = self.lmbda
+        reward = tensordict.get(("next", "reward"))
+        if self.average_rewards:
+            reward = reward - reward.mean()
+            reward = reward / reward.std().clamp_min(1e-4)
+            tensordict.set(
+                ("next", "reward"), reward
+            )  # we must update the rewards if they are used later in the code
+
+
+        kwargs = {}
 
         step_td = step_mdp(tensordict)
         if target_params is not None:
             # we assume that target parameters are not differentiable
             kwargs["params"] = target_params
-        elif "params" in kwargs:
-            kwargs["params"] = kwargs["params"].detach()
         with hold_out_net(self.value_network):
             # we may still need to pass gradient, but we don't want to assign grads to
             # value net params
@@ -433,10 +441,6 @@ def forward(
                 gamma, lmbda, value, next_value, reward, done
             )
 
-        tensordict.set(self.advantage_key, adv)
-        tensordict.set(self.value_target_key, adv + value)
-        return tensordict
-
 
 class GAE(ValueFunctionBase):
     """A class wrapper around the generalized advantage estimate functional.

From 48c227ae6fa3fc306b959e41471d932e9386d563 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 27 Mar 2023 07:51:26 +0100
Subject: [PATCH 16/89] tmp

---
 torchrl/objectives/common.py           |  40 ++++
 torchrl/objectives/dqn.py              |  32 ++-
 torchrl/objectives/sac.py              | 300 +++++++++++++++++++++++--
 torchrl/objectives/utils.py            |  34 ++-
 torchrl/objectives/value/advantages.py | 217 +++++++++++++++---
 5 files changed, 568 insertions(+), 55 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 134da3b1e34..c53c9214de1 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -18,6 +18,7 @@
 from torch.nn import Parameter
 
 from torchrl.modules.utils import Buffer
+from torchrl.objectives.utils import ValueFunctions
 from torchrl.objectives.value import ValueFunctionBase
 
 _has_functorch = False
@@ -362,3 +363,42 @@ def _default_value_function(self) -> ValueFunctionBase:
 
         """
         raise NotImplementedError
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        """Value-function constructor.
+
+        If the non-default value function is wanted, it must be built using
+        this method.
+
+        Args:
+            value_type (ValueFunctions): A :class:`torchrl.objectives.utils.ValueFunctions`
+                enum type indicating the value function to use.
+            **hyperparams: hyperparameters to use for the value function.
+                If not provided, the value indicated by
+                :func:`torchrl.objectives.utils.default_value_kwargs` will be
+                used.
+
+        Examples:
+            >>> # initialize the DQN loss
+            >>> dqn_loss = DQNLoss(actor)
+            >>> dqn_loss.make_value_function(
+            ...     ValueFunctions.TD1,
+            ...     gamma=0.9)
+
+        """
+        if value_type == ValueFunctions.TD1:
+            raise NotImplementedError(f"Value type {value_type} it not implemented for loss {type(self)}.")
+        elif value_type == ValueFunctions.TD0:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+                )
+        elif value_type == ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+                )
+        elif value_type == ValueFunctions.TDLambda:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+                )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 57329cb0404..41a6887a858 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -7,7 +7,6 @@
 
 import torch
 from tensordict import TensorDict, TensorDictBase
-from tensordict.nn import make_functional
 from torch import nn
 
 from torchrl.envs.utils import step_mdp
@@ -15,7 +14,7 @@
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 
 from .common import LossModule
-from .utils import DEFAULT_VALUE_FUN_PARAMS, distance_loss, next_state_value
+from .utils import distance_loss, ValueFunctions, default_value_kwargs
 from .value import TDLambdaEstimate, ValueFunctionBase
 
 
@@ -72,6 +71,35 @@ def __init__(
         self.priority_key = priority_key
         self.action_space = self.value_network.action_space
 
+    def make_value_function(
+        self,
+        value_type: ValueFunctions,
+        **hyperparams
+    ):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        if value_type == ValueFunctions.TD1:
+            raise NotImplementedError(f"Value type {value_type} it not implemented for loss {type(self)}.")
+        elif value_type == ValueFunctions.TD0:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+                )
+        elif value_type == ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+                )
+        elif value_type == ValueFunctions.TDLambda:
+            return TDLambdaEstimate(
+                **hp,
+                value_network=self.value_network,
+                advantage_key="advantage",
+                value_target_key="value_target",
+                value_key="chosen_action_value",
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
+
+
     def _default_value_function(self):
         return TDLambdaEstimate(
             gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index f5318957d1a..424e2f28bfb 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -15,8 +15,7 @@
 
 from torchrl.modules import ProbabilisticActor
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import distance_loss, next_state_value, \
-    DEFAULT_VALUE_FUN_PARAMS
+from torchrl.objectives.utils import distance_loss, next_state_value
 from .value import ValueFunctionBase, TDLambdaEstimate
 
 from ..envs.utils import set_exploration_mode, step_mdp
@@ -181,8 +180,13 @@ def __init__(
 
         if value_function is None:
             value_function = self._default_value_function()
+        elif self._version == 1:
+            # in v1, the next value requires an action to be sampled
+            value_function.value_network = self.actor_critic
         else:
-            value_function.value_key = "chosen_action_value"
+            # TODO
+            pass
+
         self.value_function = value_function
 
 
@@ -190,7 +194,7 @@ def _default_value_function(self):
         return TDLambdaEstimate(
             gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
             lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
-            value_network=self.value_network if self._version == 1 else self.qvalue_network,
+            value_network=self.actor_critic if self._version == 1 else self.qvalue_network,
             average_rewards=True,
             differentiable=False,
             vectorized=True,
@@ -244,10 +248,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         }
         if self._version == 1:
             out["loss_value"] = loss_value.mean()
-        return TensorDict(
-            out,
-            [],
-        )
+        return TensorDict(out,[])
 
     def _loss_actor(self, tensordict: TensorDictBase) -> Tensor:
         # KL lossa
@@ -278,8 +279,7 @@ def _loss_actor(self, tensordict: TensorDictBase) -> Tensor:
         return self._alpha * log_prob - min_q_logprob
 
     def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
-        actor_critic = self.actor_critic
-        params = TensorDict(
+        target_params = TensorDict(
             {
                 "module": {
                     "0": self.target_actor_network_params,
@@ -290,19 +290,16 @@ def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
             _run_checks=False,
         )
         with set_exploration_mode("mode"):
-            target_value = next_state_value(
+            target_value = self.value_function.value_estimate(
                 tensordict,
-                actor_critic,
-                gamma=self.gamma,
-                next_val_key="state_value",
-                params=params,
+                target_params=target_params
             )
 
         # value loss
         qvalue_network = self.qvalue_network
 
-        # Q-nets must be trained independently: as such, we split the data in 2 if required and train each q-net on
-        # one half of the data.
+        # Q-nets must be trained independently: as such, we split the data in 2
+        # if required and train each q-net on one half of the data.
         shape = tensordict.shape
         if shape[0] % self.num_qvalue_nets != 0:
             raise RuntimeError(
@@ -341,8 +338,8 @@ def _loss_qvalue_v2(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
                     next_td,
                     params=self.target_actor_network_params,
                 )
-                next_td["action"] = dist.rsample()
-                next_td["sample_log_prob"] = dist.log_prob(next_td["action"])
+                next_td.set("action", dist.rsample())
+                next_td.set("sample_log_prob", dist.log_prob(next_td["action"]))
             sample_log_prob = next_td.get("sample_log_prob")
             # get q-values
             next_td = vmap(self.qvalue_network, (None, 0))(
@@ -356,7 +353,7 @@ def _loss_qvalue_v2(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
             ):
                 sample_log_prob = sample_log_prob.unsqueeze(-1)
             state_value = (
-                next_td.get("state_action_value") - self._alpha * sample_log_prob
+                state_action_value - self._alpha * sample_log_prob
             )
             state_value = state_value.min(0)[0]
 
@@ -434,3 +431,266 @@ def _alpha(self):
         with torch.no_grad():
             alpha = self.log_alpha.exp()
         return alpha
+
+
+class DiscreteSACLoss(LossModule):
+    """Discrete SAC Loss module.
+
+    Args:
+        actor_network (ProbabilisticActor): the actor to be trained
+        qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
+        num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
+        gamma (Number, optional): gamma decay factor. Default is 0.99.
+        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
+            `"td_error"`.
+        loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
+            "l1", Default is "smooth_l1".
+        alpha_init (float, optional): initial entropy multiplier.
+            Default is 1.0.
+        min_alpha (float, optional): min value of alpha.
+            Default is 0.1.
+        max_alpha (float, optional): max value of alpha.
+            Default is 10.0.
+        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is :obj:`False`.
+        target_entropy_weight (float, optional): weight for the target entropy term.
+        target_entropy (Union[str, Number], optional): Target entropy for the stochastic policy. Default is "auto".
+        delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
+            for data collection. Default is :obj:`False`.
+    """
+
+    delay_actor: bool = False
+
+    def __init__(
+        self,
+        actor_network: ProbabilisticActor,
+        qvalue_network: TensorDictModule,
+        num_actions: int,
+        num_qvalue_nets: int = 2,
+        gamma: Number = 0.99,
+        priotity_key: str = "td_error",
+        loss_function: str = "smooth_l1",
+        alpha_init: float = 1.0,
+        min_alpha: float = 0.1,
+        max_alpha: float = 10.0,
+        fixed_alpha: bool = False,
+        target_entropy_weight: float = 0.98,
+        target_entropy: Union[str, Number] = "auto",
+        delay_qvalue: bool = True,
+    ):
+        if not _has_functorch:
+            raise ImportError("Failed to import functorch.") from FUNCTORCH_ERROR
+        super().__init__()
+        self.convert_to_functional(
+            actor_network,
+            "actor_network",
+            create_target_params=self.delay_actor,
+            funs_to_decorate=["forward", "get_dist_params"],
+        )
+
+        self.delay_qvalue = delay_qvalue
+        self.convert_to_functional(
+            qvalue_network,
+            "qvalue_network",
+            num_qvalue_nets,
+            create_target_params=self.delay_qvalue,
+            compare_against=list(actor_network.parameters()),
+        )
+        self.num_qvalue_nets = num_qvalue_nets
+        self.register_buffer("gamma", torch.tensor(gamma))
+        self.priority_key = priotity_key
+        self.loss_function = loss_function
+
+        try:
+            device = next(self.parameters()).device
+        except AttributeError:
+            device = torch.device("cpu")
+
+        self.register_buffer("alpha_init", torch.tensor(alpha_init, device=device))
+        self.register_buffer(
+            "min_log_alpha", torch.tensor(min_alpha, device=device).log()
+        )
+        self.register_buffer(
+            "max_log_alpha", torch.tensor(max_alpha, device=device).log()
+        )
+        self.fixed_alpha = fixed_alpha
+        if fixed_alpha:
+            self.register_buffer(
+                "log_alpha", torch.tensor(math.log(alpha_init), device=device)
+            )
+        else:
+            self.register_parameter(
+                "log_alpha",
+                torch.nn.Parameter(torch.tensor(math.log(alpha_init), device=device)),
+            )
+
+        if target_entropy == "auto":
+            target_entropy = -float(np.log(1.0 / num_actions) * target_entropy_weight)
+        self.register_buffer(
+            "target_entropy", torch.tensor(target_entropy, device=device)
+        )
+
+    @property
+    def alpha(self):
+        self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
+        with torch.no_grad():
+            alpha = self.log_alpha.exp()
+        return alpha
+
+    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
+        obs_keys = self.actor_network.in_keys
+        tensordict_select = tensordict.select("next", *obs_keys, "action")
+
+        actor_params = torch.stack(
+            [self.actor_network_params, self.target_actor_network_params], 0
+        )
+
+        tensordict_actor_grad = tensordict_select.select(
+            *obs_keys
+        )  # to avoid overwriting keys
+        next_td_actor = step_mdp(tensordict_select).select(
+            *self.actor_network.in_keys
+        )  # next_observation ->
+        tensordict_actor = torch.stack([tensordict_actor_grad, next_td_actor], 0)
+        tensordict_actor = tensordict_actor.contiguous()
+
+        with set_exploration_mode("random"):
+            # vmap doesn't support sampling, so we take it out from the vmap
+            td_params = vmap(self.actor_network.get_dist_params)(
+                tensordict_actor,
+                actor_params,
+            )
+            if isinstance(self.actor_network, ProbabilisticActor):
+                tensordict_actor_dist = self.actor_network.build_dist_from_params(
+                    td_params
+                )
+            else:
+                tensordict_actor_dist = self.actor_network.build_dist_from_params(
+                    td_params
+                )
+            probs = tensordict_actor_dist.probs
+            z = (probs == 0.0).float() * 1e-8
+            logp_pi = torch.log(probs + z)
+            logp_pi_pol = torch.sum(probs * logp_pi, dim=-1, keepdim=True)
+
+        # repeat tensordict_actor to match the qvalue size
+        _actor_loss_td = (
+            tensordict_actor[0]
+            .select(*self.qvalue_network.in_keys)
+            .expand(self.num_qvalue_nets, *tensordict_actor[0].batch_size)
+        )  # for actor loss
+        _qval_td = tensordict_select.select(*self.qvalue_network.in_keys).expand(
+            self.num_qvalue_nets,
+            *tensordict_select.select(*self.qvalue_network.in_keys).batch_size,
+        )  # for qvalue loss
+        _next_val_td = (
+            tensordict_actor[1]
+            .select(*self.qvalue_network.in_keys)
+            .expand(self.num_qvalue_nets, *tensordict_actor[1].batch_size)
+        )  # for next value estimation
+        tensordict_qval = torch.cat(
+            [
+                _actor_loss_td,
+                _next_val_td,
+                _qval_td,
+            ],
+            0,
+        )
+
+        # cat params
+        q_params_detach = self.qvalue_network_params.detach()
+        qvalue_params = torch.cat(
+            [
+                q_params_detach,
+                self.target_qvalue_network_params,
+                self.qvalue_network_params,
+            ],
+            0,
+        )
+        tensordict_qval = vmap(self.qvalue_network)(
+            tensordict_qval,
+            qvalue_params,
+        )
+
+        state_action_value = tensordict_qval.get("state_value").squeeze(-1)
+        (
+            state_action_value_actor,
+            next_state_action_value_qvalue,
+            state_action_value_qvalue,
+        ) = state_action_value.split(
+            [self.num_qvalue_nets, self.num_qvalue_nets, self.num_qvalue_nets],
+            dim=0,
+        )
+
+        loss_actor = -(
+            (state_action_value_actor.min(0)[0] * probs[0]).sum(-1, keepdim=True)
+            - self.alpha * logp_pi_pol[0]
+        ).mean()
+
+        pred_next_val = (
+            probs[1]
+            * (next_state_action_value_qvalue.min(0)[0] - self.alpha * logp_pi[1])
+        ).sum(dim=-1, keepdim=True)
+
+        target_value = next_state_value(
+            tensordict,
+            gamma=self.gamma,
+            pred_next_val=pred_next_val,
+        )
+
+        actions = torch.argmax(tensordict_select["action"], dim=-1)
+
+        pred_val_1 = (
+            state_action_value_qvalue[0].gather(-1, actions.unsqueeze(-1)).unsqueeze(0)
+        )
+        pred_val_2 = (
+            state_action_value_qvalue[1].gather(-1, actions.unsqueeze(-1)).unsqueeze(0)
+        )
+        pred_val = torch.cat([pred_val_1, pred_val_2], dim=0).squeeze()
+        td_error = (pred_val - target_value.expand_as(pred_val)).pow(2)
+        loss_qval = (
+            distance_loss(
+                pred_val,
+                target_value.expand_as(pred_val),
+                loss_function=self.loss_function,
+            )
+            .mean(-1)
+            .sum()
+            * 0.5
+        )
+
+        tensordict.set("td_error", td_error.detach().max(0)[0])
+
+        loss_alpha = self._loss_alpha(logp_pi_pol)
+        if not loss_qval.shape == loss_actor.shape:
+            raise RuntimeError(
+                f"QVal and actor loss have different shape: {loss_qval.shape} and {loss_actor.shape}"
+            )
+        td_out = TensorDict(
+            {
+                "loss_actor": loss_actor.mean(),
+                "loss_qvalue": loss_qval.mean(),
+                "loss_alpha": loss_alpha.mean(),
+                "alpha": self.alpha.detach(),
+                "entropy": -logp_pi.mean().detach(),
+                "state_action_value_actor": state_action_value_actor.mean().detach(),
+                "action_log_prob_actor": logp_pi.mean().detach(),
+                "next.state_value": pred_next_val.mean().detach(),
+                "target_value": target_value.mean().detach(),
+            },
+            [],
+        )
+
+        return td_out
+
+    def _loss_alpha(self, log_pi: Tensor) -> Tensor:
+        if torch.is_grad_enabled() and not log_pi.requires_grad:
+            raise RuntimeError(
+                "expected log_pi to require gradient for the alpha loss)"
+            )
+        if self.target_entropy is not None:
+            # we can compute this loss even if log_alpha is not a parameter
+            alpha_loss = -self.log_alpha.exp() * (log_pi.detach() + self.target_entropy)
+        else:
+            # placeholder
+            alpha_loss = torch.zeros_like(log_pi)
+        return alpha_loss
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index e18b6c841c4..fc164430cb5 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import functools
+from enum import Enum
 from typing import Iterable, Optional, Union
 
 import torch
@@ -15,9 +16,36 @@
 from torchrl.envs.utils import step_mdp
 
 
-class DEFAULT_VALUE_FUN_PARAMS:
-    gamma = 0.99
-    lmbda = 0.95
+class ValueFunctions(Enum):
+    TD0 = 1
+    TD1 = 2
+    TDLambda = 3
+    GAE = 4
+
+def default_value_kwargs(value_type: ValueFunctions):
+    """Default value function keyword argument generator.
+
+    Args:
+        value_type (Enum.value): the value function type, from the
+        :class:`torchrl.objectives.utils.ValueFunctions` class.
+
+    Examples:
+        >>> kwargs = default_value_kwargs(ValueFunctions.TDLambda)
+        {"gamma": 0.99, "lmbda": 0.95}
+
+    """
+    if value_type == ValueFunctions.TD1:
+        return {"gamma": 0.99}
+    elif value_type == ValueFunctions.TD0:
+        return {"gamma": 0.99}
+    elif value_type == ValueFunctions.GAE:
+        return {"gamma": 0.99, "lmbda": 0.95}
+    elif value_type == ValueFunctions.TDLambda:
+        return {"gamma": 0.99, "lmbda": 0.95}
+    else:
+        raise NotImplementedError(f"Unknown value type {value_type}.")
+
+
 
 
 class _context_manager:
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index d339ebf8c3b..7465206ce78 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -7,7 +7,7 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
-from tensordict.nn import dispatch, TensorDictModule
+from tensordict.nn import dispatch, TensorDictModule, is_functional
 from tensordict.tensordict import TensorDictBase
 from torch import nn, Tensor
 
@@ -65,7 +65,7 @@ def forward(
         """
         raise NotImplementedError
 
-    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+    def value_estimate(self, tensordict, requires_grad=True, target_params: Optional[TensorDictBase] = None):
         """Gets a value estimate, usually used as a target value for the value network.
 
         Args:
@@ -73,15 +73,29 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
                 read.
             requires_grad (bool, optional): whether the estimate should be part
                 of a computational graph.
-                Defaults to ``False``.
+                .. note::
+                  To avoid carrying gradient with respect to the parameters,
+                  one can also use ``val_fun.value_estimate(tensordict, target_params=params.detach())``
+                  which allows gradients to pass through the value function
+                  without including the parameters in the computational graph.
+
+                Defaults to ``True``.
             target_params (TensorDictBase, optional): A nested TensorDict containing the
                 target params to be passed to the functional value network module.
 
         """
         raise NotImplementedError
 
-class TDEstimate(ValueFunctionBase):
-    """Temporal Difference estimate of advantage function.
+    @property
+    def is_functional(self):
+        if isinstance(self.value_network, nn.Module):
+            return is_functional(self.value_network)
+        else:
+            raise RuntimeError("Cannot determine if value network is functional.")
+
+
+class TD0Estimate(ValueFunctionBase):
+    """Myopic Temporal Difference (TD(0)) estimate of advantage function.
 
     Args:
         gamma (scalar): exponential mean discount.
@@ -135,12 +149,169 @@ def __init__(
         )
         self.out_keys = [self.advantage_key, self.value_target_key]
 
-    @property
-    def is_functional(self):
-        return (
-            "_is_stateless" in self.value_network.__dict__
-            and self.value_network.__dict__["_is_stateless"]
+    @_self_set_grad_enabled
+    @dispatch
+    def forward(
+        self,
+        tensordict: TensorDictBase,
+        params: Optional[TensorDictBase] = None,
+        target_params: Optional[TensorDictBase] = None,
+    ) -> TensorDictBase:
+        """Computes the TDEstimate given the data in tensordict.
+
+        If a functional module is provided, a nested TensorDict containing the parameters
+        (and if relevant the target parameters) can be passed to the module.
+
+        Args:
+            tensordict (TensorDictBase): A TensorDict containing the data
+                (an observation key, "action", ("next", "reward"), ("next", "done") and "next" tensordict state
+                as returned by the environment) necessary to compute the value estimates and the TDEstimate.
+                The data passed to this module should be structured as :obj:`[*B, T, F]` where :obj:`B` are
+                the batch size, :obj:`T` the time dimension and :obj:`F` the feature dimension(s).
+            params (TensorDictBase, optional): A nested TensorDict containing the params
+                to be passed to the functional value network module.
+            target_params (TensorDictBase, optional): A nested TensorDict containing the
+                target params to be passed to the functional value network module.
+
+        Returns:
+            An updated TensorDict with an advantage and a value_error keys as defined in the constructor.
+
+        Examples:
+            >>> from tensordict import TensorDict
+            >>> value_net = TensorDictModule(
+            ...     nn.Linear(3, 1), in_keys=["obs"], out_keys=["state_value"]
+            ... )
+            >>> module = TDEstimate(
+            ...     gamma=0.98,
+            ...     value_network=value_net,
+            ...     differentiable=False,
+            ... )
+            >>> obs, next_obs = torch.randn(2, 1, 10, 3)
+            >>> reward = torch.randn(1, 10, 1)
+            >>> done = torch.zeros(1, 10, 1, dtype=torch.bool)
+            >>> tensordict = TensorDict({"obs": obs, "next": {"obs": next_obs, "done": done, "reward": reward}}, [1, 10])
+            >>> _ = module(tensordict)
+            >>> assert "advantage" in tensordict.keys()
+
+        The module supports non-tensordict (i.e. unpacked tensordict) inputs too:
+
+        Examples:
+            >>> value_net = TensorDictModule(
+            ...     nn.Linear(3, 1), in_keys=["obs"], out_keys=["state_value"]
+            ... )
+            >>> module = TDEstimate(
+            ...     gamma=0.98,
+            ...     value_network=value_net,
+            ...     differentiable=False,
+            ... )
+            >>> obs, next_obs = torch.randn(2, 1, 10, 3)
+            >>> reward = torch.randn(1, 10, 1)
+            >>> done = torch.zeros(1, 10, 1, dtype=torch.bool)
+            >>> advantage, value_target = module(obs=obs, reward=reward, done=done, next_obs=next_obs)
+
+        """
+        if tensordict.batch_dims < 1:
+            raise RuntimeError(
+                "Expected input tensordict to have at least one dimensions, got"
+                f"tensordict.batch_size = {tensordict.batch_size}"
+            )
+
+        kwargs = {}
+        if self.is_functional and params is None:
+            raise RuntimeError(
+                "Expected params to be passed to advantage module but got none."
+            )
+        if params is not None:
+            kwargs["params"] = params.detach()
+        with hold_out_net(self.value_network):
+            self.value_network(tensordict, **kwargs)
+            value = tensordict.get(self.value_key)
+
+        if params is not None and target_params is None:
+            target_params = params.detach()
+        value_target = self.value_estimate(tensordict, target_params=target_params)
+        tensordict.set("advantage", value_target - value)
+        tensordict.set("value_target", value_target)
+        return tensordict
+
+    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+        kwargs = {}
+        gamma = self.gamma
+        # we may still need to pass gradient, but we don't want to assign grads to
+        # value net params
+        reward = tensordict.get(("next", "reward"))
+        if self.average_rewards:
+            reward = reward - reward.mean()
+            reward = reward / reward.std().clamp_min(1e-4)
+            tensordict.set(
+                ("next", "reward"), reward
+            )  # we must update the rewards if they are used later in the code
+        step_td = step_mdp(tensordict)
+        if target_params is not None:
+            # we assume that target parameters are not differentiable
+            kwargs["params"] = target_params
+        with hold_out_net(self.value_network):
+            self.value_network(step_td, **kwargs)
+            next_value = step_td.get(self.value_key)
+
+        done = tensordict.get(("next", "done"))
+        value_target = reward + gamma * (1 - done.to(reward.dtype)) * next_value
+        return value_target
+
+class TD1Estimate(ValueFunctionBase):
+    """Bootstrapped Temporal Difference (TD(1)) estimate of advantage function.
+
+    Args:
+        gamma (scalar): exponential mean discount.
+        value_network (TensorDictModule): value operator used to retrieve the value estimates.
+        average_rewards (bool, optional): if True, rewards will be standardized
+            before the TD is computed.
+        differentiable (bool, optional): if True, gradients are propagated throught
+            the computation of the value function. Default is :obj:`False`.
+        advantage_key (str or tuple of str, optional): the key of the advantage entry.
+            Defaults to "advantage".
+        value_target_key (str or tuple of str, optional): the key of the advantage entry.
+            Defaults to "value_target".
+        value_key (str or tuple of str, optional): the value key to read from the input tensordict.
+            Defaults to "state_value".
+
+    """
+
+    def __init__(
+        self,
+        gamma: Union[float, torch.Tensor],
+        value_network: TensorDictModule,
+        average_rewards: bool = False,
+        differentiable: bool = False,
+        advantage_key: Union[str, Tuple] = "advantage",
+        value_target_key: Union[str, Tuple] = "value_target",
+        value_key: Union[str, Tuple] = "state_value",
+    ):
+        super().__init__()
+        try:
+            device = next(value_network.parameters()).device
+        except StopIteration:
+            device = torch.device("cpu")
+        self.register_buffer("gamma", torch.tensor(gamma, device=device))
+        self.value_network = value_network
+
+        self.average_rewards = average_rewards
+        self.differentiable = differentiable
+        self.value_key = value_key
+        if value_key not in value_network.out_keys:
+            raise KeyError(
+                f"value key '{value_key}' not found in value network out_keys."
+            )
+
+        self.advantage_key = advantage_key
+        self.value_target_key = value_target_key
+
+        self.in_keys = (
+            value_network.in_keys
+            + [("next", "reward"), ("next", "done")]
+            + [("next", in_key) for in_key in value_network.in_keys]
         )
+        self.out_keys = [self.advantage_key, self.value_target_key]
 
     @_self_set_grad_enabled
     @dispatch
@@ -252,7 +423,7 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
         return value_target
 
 class TDLambdaEstimate(ValueFunctionBase):
-    """TD-Lambda estimate of advantage function.
+    """TD(:math:`\lambda`) estimate of advantage function.
 
     Args:
         gamma (scalar): exponential mean discount.
@@ -313,13 +484,6 @@ def __init__(
         )
         self.out_keys = [self.advantage_key, self.value_target_key]
 
-    @property
-    def is_functional(self):
-        return (
-            "_is_stateless" in self.value_network.__dict__
-            and self.value_network.__dict__["_is_stateless"]
-        )
-
     @_self_set_grad_enabled
     @dispatch
     def forward(
@@ -433,14 +597,14 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
 
         done = tensordict.get(("next", "done"))
         if self.vectorized:
-            adv = vec_td_lambda_advantage_estimate(
-                gamma, lmbda, value, next_value, reward, done
+            val = vec_td_lambda_advantage_estimate(
+                gamma, lmbda, torch.zeros_like(next_value), next_value, reward, done
             )
         else:
-            adv = td_lambda_advantage_estimate(
-                gamma, lmbda, value, next_value, reward, done
+            val = td_lambda_advantage_estimate(
+                gamma, lmbda, torch.zeros_like(next_value), next_value, reward, done
             )
-
+        return val
 
 class GAE(ValueFunctionBase):
     """A class wrapper around the generalized advantage estimate functional.
@@ -510,13 +674,6 @@ def __init__(
         )
         self.out_keys = [self.advantage_key, self.value_target_key]
 
-    @property
-    def is_functional(self):
-        return (
-            "_is_stateless" in self.value_network.__dict__
-            and self.value_network.__dict__["_is_stateless"]
-        )
-
     @_self_set_grad_enabled
     @dispatch
     def forward(

From 1e34ef9d320aeac147e178f9f3273c519decbeb4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 27 Mar 2023 11:46:05 +0100
Subject: [PATCH 17/89] SAC

---
 docs/source/reference/objectives.rst   |   7 +-
 test/test_cost.py                      |  45 ++++---
 torchrl/data/postprocs/postprocs.py    |   2 +-
 torchrl/objectives/__init__.py         |   2 +
 torchrl/objectives/common.py           |  25 +++-
 torchrl/objectives/dqn.py              | 111 +++++++--------
 torchrl/objectives/sac.py              | 179 +++++++++++++++----------
 torchrl/objectives/utils.py            |  14 +-
 torchrl/objectives/value/__init__.py   |   8 +-
 torchrl/objectives/value/advantages.py | 162 +++++++++++++++++-----
 10 files changed, 368 insertions(+), 187 deletions(-)

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index 2a7a2f92be7..ff178fba548 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -109,9 +109,10 @@ Returns
     :template: rl_template_noinherit.rst
 
     ValueFunctionBase
-    GAE
+    TD0Estimate
+    TD1Estimate
     TDLambdaEstimate
-    TDEstimate
+    GAE
     functional.generalized_advantage_estimate
     functional.vec_generalized_advantage_estimate
     functional.vec_td_lambda_return_estimate
@@ -135,3 +136,5 @@ Utils
     next_state_value
     SoftUpdate
     HardUpdate
+    ValueFunctions
+    default_value_kwargs
diff --git a/test/test_cost.py b/test/test_cost.py
index 2359c34f6df..409bfa0e540 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -89,7 +89,7 @@
 from torchrl.objectives.redq import REDQLoss
 from torchrl.objectives.reinforce import ReinforceLoss
 from torchrl.objectives.utils import HardUpdate, hold_out_net, SoftUpdate
-from torchrl.objectives.value.advantages import GAE, TDEstimate, TDLambdaEstimate
+from torchrl.objectives.value.advantages import GAE, TD1Estimate, TDLambdaEstimate
 from torchrl.objectives.value.functional import (
     generalized_advantage_estimate,
     td_advantage_estimate,
@@ -109,7 +109,9 @@ def __enter__(self):
         pass
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        assert (self.td.select(*self.td_clone.keys()) == self.td_clone).all(), "Some keys have been modified in the tensordict!"
+        assert (
+            self.td.select(*self.td_clone.keys()) == self.td_clone
+        ).all(), "Some keys have been modified in the tensordict!"
 
 
 def get_devices():
@@ -340,9 +342,7 @@ def test_dqn_batcher(self, n, delay_value, device, action_spec_type, gamma=0.9):
         td = self._create_seq_mock_data_dqn(
             action_spec_type=action_spec_type, device=device
         )
-        loss_fn = DQNLoss(
-            actor, loss_function="l2", delay_value=delay_value
-        )
+        loss_fn = DQNLoss(actor, loss_function="l2", delay_value=delay_value)
 
         ms = MultiStep(gamma=gamma, n_steps=n).to(device)
         ms_td = ms(td.clone())
@@ -1072,7 +1072,6 @@ def test_sac(
             qvalue_network=qvalue,
             value_network=value,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             **kwargs,
         )
@@ -1197,7 +1196,6 @@ def test_sac_batcher(
         num_qvalue,
         device,
         version,
-        gamma=0.9,
     ):
         if (delay_actor or delay_qvalue) and not delay_value:
             pytest.skip("incompatible config")
@@ -1224,12 +1222,11 @@ def test_sac_batcher(
             qvalue_network=qvalue,
             value_network=value,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             **kwargs,
         )
 
-        ms = MultiStep(gamma=gamma, n_steps=n).to(device)
+        ms = MultiStep(gamma=0.9, n_steps=n).to(device)
 
         td_clone = td.clone()
         ms_td = ms(td_clone)
@@ -2256,7 +2253,7 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
@@ -2319,7 +2316,7 @@ def test_ppo_shared(self, loss_class, device, advantage):
                 value_network=value,
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=0.9,
                 value_network=value,
             )
@@ -2395,7 +2392,7 @@ def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
@@ -2531,7 +2528,7 @@ def test_a2c(self, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
@@ -2607,7 +2604,7 @@ def test_a2c_diff(self, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
@@ -2682,7 +2679,7 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
                 differentiable=gradient_mode,
             )
         elif advantage == "td":
-            advantage = TDEstimate(
+            advantage = TD1Estimate(
                 gamma=gamma,
                 value_network=get_functional(value_net),
                 differentiable=gradient_mode,
@@ -4208,7 +4205,11 @@ def __init__(self, actor_network, qvalue_network):
 class TestAdv:
     @pytest.mark.parametrize(
         "adv,kwargs",
-        [[GAE, {"lmbda": 0.95}], [TDEstimate, {}], [TDLambdaEstimate, {"lmbda": 0.95}]],
+        [
+            [GAE, {"lmbda": 0.95}],
+            [TD1Estimate, {}],
+            [TDLambdaEstimate, {"lmbda": 0.95}],
+        ],
     )
     def test_dispatch(
         self,
@@ -4236,7 +4237,11 @@ def test_dispatch(
 
     @pytest.mark.parametrize(
         "adv,kwargs",
-        [[GAE, {"lmbda": 0.95}], [TDEstimate, {}], [TDLambdaEstimate, {"lmbda": 0.95}]],
+        [
+            [GAE, {"lmbda": 0.95}],
+            [TD1Estimate, {}],
+            [TDLambdaEstimate, {"lmbda": 0.95}],
+        ],
     )
     def test_diff_reward(
         self,
@@ -4273,7 +4278,11 @@ def test_diff_reward(
 
     @pytest.mark.parametrize(
         "adv,kwargs",
-        [[GAE, {"lmbda": 0.95}], [TDEstimate, {}], [TDLambdaEstimate, {"lmbda": 0.95}]],
+        [
+            [GAE, {"lmbda": 0.95}],
+            [TD1Estimate, {}],
+            [TDLambdaEstimate, {"lmbda": 0.95}],
+        ],
     )
     def test_non_differentiable(self, adv, kwargs):
         value_net = TensorDictModule(
diff --git a/torchrl/data/postprocs/postprocs.py b/torchrl/data/postprocs/postprocs.py
index 26cdc470824..2ec0bfb4d97 100644
--- a/torchrl/data/postprocs/postprocs.py
+++ b/torchrl/data/postprocs/postprocs.py
@@ -201,7 +201,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
         tensordict.set("steps_to_next_obs", time_to_obs + 1)
         tensordict.rename_key_(("next", "reward"), ("next", "original_reward"))
-        tensordict["next"].update(tensordict_gather)
+        tensordict.get("next").update(tensordict_gather)
         tensordict.set(("next", "reward"), summed_rewards)
         tensordict.set("gamma", self.gamma ** (time_to_obs + 1))
         nonterminal = time_to_obs != 0
diff --git a/torchrl/objectives/__init__.py b/torchrl/objectives/__init__.py
index 73be0fe5e99..e74ccbac808 100644
--- a/torchrl/objectives/__init__.py
+++ b/torchrl/objectives/__init__.py
@@ -15,12 +15,14 @@
 from .sac import DiscreteSACLoss, SACLoss
 from .td3 import TD3Loss
 from .utils import (
+    default_value_kwargs,
     distance_loss,
     HardUpdate,
     hold_out_net,
     hold_out_params,
     next_state_value,
     SoftUpdate,
+    ValueFunctions,
 )
 
 # from .value import bellman_max, c_val, dv_val, vtrace, GAE, TDLambdaEstimate, TDEstimate
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index c53c9214de1..a105b23da98 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -52,6 +52,7 @@ class LossModule(nn.Module):
     def __init__(self):
         super().__init__()
         self._param_maps = {}
+        self._value_function = None
         # self.register_forward_pre_hook(_parameters_to_tensordict)
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
@@ -355,7 +356,19 @@ def half(self) -> LossModule:
     def cpu(self) -> LossModule:
         return self.to(torch.device("cpu"))
 
-    def _default_value_function(self) -> ValueFunctionBase:
+    @property
+    def value_function(self) -> ValueFunctionBase:
+        out = self._value_function
+        if out is None:
+            self._default_value_function()
+            return self._value_function
+        return out
+
+    @value_function.setter
+    def value_function(self, value):
+        self._value_function = value
+
+    def _default_value_function(self):
         """A value-function constructor when none is provided.
 
         No kwarg should be present as default parameters should be retrieved
@@ -387,18 +400,20 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
 
         """
         if value_type == ValueFunctions.TD1:
-            raise NotImplementedError(f"Value type {value_type} it not implemented for loss {type(self)}.")
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
         elif value_type == ValueFunctions.TD0:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
-                )
+            )
         elif value_type == ValueFunctions.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
-                )
+            )
         elif value_type == ValueFunctions.TDLambda:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
-                )
+            )
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 41a6887a858..0bb6a31a319 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Optional, Union
+from typing import Union
 
 import torch
 from tensordict import TensorDict, TensorDictBase
@@ -14,8 +14,9 @@
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 
 from .common import LossModule
-from .utils import distance_loss, ValueFunctions, default_value_kwargs
-from .value import TDLambdaEstimate, ValueFunctionBase
+from .utils import default_value_kwargs, distance_loss, ValueFunctions
+from .value import GAE, TDLambdaEstimate
+from .value.advantages import TD0Estimate, TD1Estimate
 
 
 class DQNLoss(LossModule):
@@ -23,8 +24,6 @@ class DQNLoss(LossModule):
 
     Args:
         value_network (QValueActor or nn.Module): a Q value operator.
-        value_function (ValueFunctionBase, optional): the value function module
-            to be used. Defaults to :class:`torchrl.objectives.values.TDLambdaEstimate`.
         loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
         delay_value (bool, optional): whether to duplicate the value network into a new target value network to
             create a double DQN. Default is :obj:`False`.
@@ -34,7 +33,6 @@ class DQNLoss(LossModule):
     def __init__(
         self,
         value_network: Union[QValueActor, nn.Module],
-        value_function: Optional[ValueFunctionBase] = None,
         loss_function: str = "l2",
         priority_key: str = "td_error",
         delay_value: bool = False,
@@ -42,13 +40,6 @@ def __init__(
 
         super().__init__()
         self.delay_value = delay_value
-        if (
-            value_function is not None
-            and value_function.value_network is not value_network
-        ):
-            raise RuntimeError(
-                "value_function.value_network and value_network must match."
-            )
         value_network = ensure_tensordict_compatible(
             module=value_network, wrapper_type=QValueActor
         )
@@ -59,37 +50,41 @@ def __init__(
             create_target_params=self.delay_value,
         )
 
-        if value_function is None:
-            value_function = self._default_value_function()
-        else:
-            value_function.value_key = "chosen_action_value"
-        self.value_function = value_function
-
         self.value_network_in_keys = value_network.in_keys
 
         self.loss_function = loss_function
         self.priority_key = priority_key
         self.action_space = self.value_network.action_space
 
-    def make_value_function(
-        self,
-        value_type: ValueFunctions,
-        **hyperparams
-    ):
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
-        if value_type == ValueFunctions.TD1:
-            raise NotImplementedError(f"Value type {value_type} it not implemented for loss {type(self)}.")
-        elif value_type == ValueFunctions.TD0:
-            raise NotImplementedError(
-                f"Value type {value_type} it not implemented for loss {type(self)}."
-                )
-        elif value_type == ValueFunctions.GAE:
-            raise NotImplementedError(
-                f"Value type {value_type} it not implemented for loss {type(self)}."
-                )
-        elif value_type == ValueFunctions.TDLambda:
-            return TDLambdaEstimate(
+        if value_type is ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                **hp,
+                value_network=self.value_network,
+                advantage_key="advantage",
+                value_target_key="value_target",
+                value_key="chosen_action_value",
+            )
+        elif value_type is ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                **hp,
+                value_network=self.value_network,
+                advantage_key="advantage",
+                value_target_key="value_target",
+                value_key="chosen_action_value",
+            )
+        elif value_type is ValueFunctions.GAE:
+            self._value_function = GAE(
+                **hp,
+                value_network=self.value_network,
+                advantage_key="advantage",
+                value_target_key="value_target",
+                value_key="chosen_action_value",
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
@@ -99,19 +94,8 @@ def make_value_function(
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
 
-
     def _default_value_function(self):
-        return TDLambdaEstimate(
-            gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
-            lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
-            value_network=self.value_network,
-            average_rewards=True,
-            differentiable=False,
-            vectorized=True,
-            advantage_key="advantage",
-            value_target_key="value_target",
-            value_key="chosen_action_value",
-        )
+        self.make_value_function(ValueFunctions.TDLambda)
 
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         """Computes the DQN loss given a tensordict sampled from the replay buffer.
@@ -160,11 +144,9 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             action = action.to(torch.float)
             pred_val_index = (pred_val * action).sum(-1)
 
-        target_value = self.value_function(
-            tensordict.clone(False),
-            self.value_network_params,
-            self.target_value_network_params,
-        ).get(self.value_function.value_target_key).squeeze(-1)
+        target_value = self.value_function.value_estimate(
+            tensordict.clone(False), target_params=self.target_value_network_params
+        ).squeeze(-1)
 
         priority_tensor = (pred_val_index - target_value).pow(2)
         priority_tensor = priority_tensor.detach().unsqueeze(-1)
@@ -317,6 +299,7 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             reward = reward.to("cpu")
             support = support.to("cpu")
             pns_a = pns_a.to("cpu")
+
             Tz = reward + (1 - done.to(reward.dtype)) * discount * support
             if Tz.shape != torch.Size([batch_size, atoms]):
                 raise RuntimeError(
@@ -363,3 +346,25 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         )
         loss_td = TensorDict({"loss": loss.mean()}, [])
         return loss_td
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        if value_type is ValueFunctions.TD1:
+            raise NotImplementedError(
+                f"value type {value_type} is not implemented for {self.__class__.__name__}."
+            )
+        elif value_type is ValueFunctions.TD0:
+            # see forward call
+            pass
+        elif value_type is ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"value type {value_type} is not implemented for {self.__class__.__name__}."
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            raise NotImplementedError(
+                f"value type {value_type} is not implemented for {self.__class__.__name__}."
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
+
+    def _default_value_function(self):
+        self.make_value_function(ValueFunctions.TD0)
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 424e2f28bfb..4045dcfc119 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -9,17 +9,22 @@
 
 import numpy as np
 import torch
-from tensordict.nn import make_functional, TensorDictModule
+from tensordict.nn import make_functional, TensorDictModule, TensorDictSequential
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import Tensor
 
 from torchrl.modules import ProbabilisticActor
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import distance_loss, next_state_value
-from .value import ValueFunctionBase, TDLambdaEstimate
+from torchrl.objectives.utils import (
+    default_value_kwargs,
+    distance_loss,
+    next_state_value,
+    ValueFunctions,
+)
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
+from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -31,6 +36,53 @@
     FUNCTORCH_ERROR = err
 
 
+class _SACValueNet(TensorDictSequential):
+    r"""Value network for SAC v2.
+
+    SAC v2 is based on a value estimate of the form:
+
+    .. math::
+
+      V = Q(s,a) - \alpha * \log p(a | s)
+
+    This class computes this value given the actor and qvalue network
+
+    """
+
+    def __init__(self, actor_network, qvalue_network):
+        super().__init__(actor_network, qvalue_network)
+        # we highjack the forward so the out_keys must be re-written
+        self.out_keys = ["state_value"]
+
+    def forward(self, tensordict, _alpha, actor_params, qval_params):
+        """Computes the value as `val = qval - a * log_prob(a)`."""
+        actor_network, qvalue_network = self
+
+        obs_keys = actor_network.in_keys
+        data = tensordict.select(*obs_keys)
+        # get actions and log-probs
+        with torch.no_grad():
+            with set_exploration_mode("random"):
+                dist = actor_network.get_dist(data, params=actor_params)
+                data.set("action", dist.rsample())
+                log_prob = dist.log_prob(data.get("action"))
+                data.set("sample_log_prob", log_prob)
+            sample_log_prob = data.get("sample_log_prob")
+
+            # get q-values
+            data = vmap(qvalue_network, (None, 0))(data, qval_params)
+            state_action_value = data.get("state_action_value")
+            if (
+                state_action_value.shape[-len(sample_log_prob.shape) :]
+                != sample_log_prob.shape
+            ):
+                sample_log_prob = sample_log_prob.unsqueeze(-1)
+            state_value = state_action_value - _alpha * sample_log_prob
+            state_value = state_value.min(0)[0]
+        tensordict.set("state_value", state_value)
+        return tensordict
+
+
 class SACLoss(LossModule):
     """TorchRL implementation of the SAC loss.
 
@@ -47,8 +99,6 @@ class SACLoss(LossModule):
             .. note::
               If not provided, the second version of SAC is assumed, where
               only the Q-Value network is needed.
-        value_function (ValueFunctionBase, optional): the value function module
-            to be used. Defaults to :class:`torchrl.objectives.values.TDLambdaEstimate`.
         priority_key (str, optional): tensordict key where to write the
             priority (for prioritized replay buffer usage). Defaults to
             ``"td_error"``.
@@ -83,7 +133,6 @@ def __init__(
         actor_network: ProbabilisticActor,
         qvalue_network: TensorDictModule,
         value_network: Optional[TensorDictModule] = None,
-        value_function: Optional[ValueFunctionBase] = None,
         num_qvalue_nets: int = 2,
         priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
@@ -178,30 +227,52 @@ def __init__(
             )
             make_functional(self.actor_critic)
 
-        if value_function is None:
-            value_function = self._default_value_function()
-        elif self._version == 1:
-            # in v1, the next value requires an action to be sampled
-            value_function.value_network = self.actor_critic
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        if self._version == 1:
+            value_net = self.actor_critic
+        elif self._version == 2:
+            value_net = _SACValueNet(self.actor_network, self.qvalue_network)
         else:
-            # TODO
-            pass
-
-        self.value_function = value_function
-
+            # unreachable
+            raise NotImplementedError
+
+        value_key = "state_value"
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        if value_type is ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.GAE:
+            self._value_function = GAE(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
 
     def _default_value_function(self):
-        return TDLambdaEstimate(
-            gamma=DEFAULT_VALUE_FUN_PARAMS.gamma,
-            lmbda=DEFAULT_VALUE_FUN_PARAMS.lmbda,
-            value_network=self.actor_critic if self._version == 1 else self.qvalue_network,
-            average_rewards=True,
-            differentiable=False,
-            vectorized=True,
-            advantage_key="advantage",
-            value_target_key="value_target",
-            value_key="state_action_value" if self._version == 2 else "state_value",
-        )
+        # TD0 by default, as in paper
+        self.make_value_function(ValueFunctions.TD0)
 
     @property
     def device(self) -> torch.device:
@@ -248,7 +319,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         }
         if self._version == 1:
             out["loss_value"] = loss_value.mean()
-        return TensorDict(out,[])
+        return TensorDict(out, [])
 
     def _loss_actor(self, tensordict: TensorDictBase) -> Tensor:
         # KL lossa
@@ -291,9 +362,8 @@ def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         )
         with set_exploration_mode("mode"):
             target_value = self.value_function.value_estimate(
-                tensordict,
-                target_params=target_params
-            )
+                tensordict, target_params=target_params
+            ).squeeze(-1)
 
         # value loss
         qvalue_network = self.qvalue_network
@@ -324,45 +394,14 @@ def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         return loss_value, priority_value
 
     def _loss_qvalue_v2(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
-        obs_keys = self.actor_network.in_keys
-        tensordict = tensordict.select("next", *obs_keys, "action")
-
-        with torch.no_grad():
-            next_td = step_mdp(tensordict).select(
-                *self.actor_network.in_keys
-            )  # next_observation ->
-            # observation
-            # select pseudo-action
-            with set_exploration_mode("random"):
-                dist = self.actor_network.get_dist(
-                    next_td,
-                    params=self.target_actor_network_params,
-                )
-                next_td.set("action", dist.rsample())
-                next_td.set("sample_log_prob", dist.log_prob(next_td["action"]))
-            sample_log_prob = next_td.get("sample_log_prob")
-            # get q-values
-            next_td = vmap(self.qvalue_network, (None, 0))(
-                next_td,
-                self.target_qvalue_network_params,
-            )
-            state_action_value = next_td.get("state_action_value")
-            if (
-                state_action_value.shape[-len(sample_log_prob.shape) :]
-                != sample_log_prob.shape
-            ):
-                sample_log_prob = sample_log_prob.unsqueeze(-1)
-            state_value = (
-                state_action_value - self._alpha * sample_log_prob
-            )
-            state_value = state_value.min(0)[0]
-
-        tensordict.set("next.state_value", state_value)
-        target_value = next_state_value(
+        # we pass the alpha value to the tensordict. Since it's a scalar, we must erase the batch-size first.
+        target_value = self.value_function.value_estimate(
             tensordict,
-            gamma=self.gamma,
-            pred_next_val=state_value,
-        )
+            _alpha=self._alpha,
+            actor_params=self.target_actor_network_params,
+            qval_params=self.target_qvalue_network_params,
+        ).squeeze(-1)
+
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
             tensordict.select(*self.qvalue_network.in_keys),
             self.qvalue_network_params,
@@ -390,8 +429,6 @@ def _loss_value(self, tensordict: TensorDictBase) -> Tensor:
             params=self.target_actor_network_params,
         )  # resample an action
         action = action_dist.rsample()
-        # if not self.actor_network.spec.is_in(action):
-        #     action.data.copy_(self.actor_network.spec.project(action.data))
 
         td_copy.set("action", action, inplace=False)
 
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index fc164430cb5..6f0a4a65c73 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -17,11 +17,23 @@
 
 
 class ValueFunctions(Enum):
+    """Value function enumerator for custom-built estimators.
+
+    Allows for a flexible usage of various value functions when the loss module
+    allows it.
+
+    Examples:
+        >>> dqn_loss = DQNLoss(actor)
+        >>> dqn_loss.make_value_function(ValueFunctions.TD0, gamma=0.9)
+
+    """
+
     TD0 = 1
     TD1 = 2
     TDLambda = 3
     GAE = 4
 
+
 def default_value_kwargs(value_type: ValueFunctions):
     """Default value function keyword argument generator.
 
@@ -46,8 +58,6 @@ def default_value_kwargs(value_type: ValueFunctions):
         raise NotImplementedError(f"Unknown value type {value_type}.")
 
 
-
-
 class _context_manager:
     def __init__(self, value=True):
         self.value = value
diff --git a/torchrl/objectives/value/__init__.py b/torchrl/objectives/value/__init__.py
index 6152732f411..ef224940ddf 100644
--- a/torchrl/objectives/value/__init__.py
+++ b/torchrl/objectives/value/__init__.py
@@ -3,4 +3,10 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .advantages import GAE, TDEstimate, TDLambdaEstimate, ValueFunctionBase
+from .advantages import (
+    GAE,
+    TD0Estimate,
+    TD1Estimate,
+    TDLambdaEstimate,
+    ValueFunctionBase,
+)
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 7465206ce78..17a9c35c90d 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -4,10 +4,10 @@
 # LICENSE file in the root directory of this source tree.
 import abc
 from functools import wraps
-from typing import List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 
 import torch
-from tensordict.nn import dispatch, TensorDictModule, is_functional
+from tensordict.nn import dispatch, is_functional, TensorDictModule
 from tensordict.tensordict import TensorDictBase
 from torch import nn, Tensor
 
@@ -32,9 +32,18 @@ def new_fun(self, *args, **kwargs):
 
 
 class ValueFunctionBase(nn.Module):
-    """An abstract parent class for value function modules."""
+    """An abstract parent class for value function modules.
 
-    value_network: TensorDictModule
+    Its :meth:`ValueFunctionBase.forward` method will compute the value (given
+    by the value network) and the value estimate (given by the value estimator)
+    as well as the advantage and write these values in the output tensordict.
+
+    If only the value estimate is needed, the :meth:`ValueFunctionBase.value_estimate`
+    should be used instead.
+
+    """
+
+    value_network: Union[TensorDictModule, Callable]
     value_key: Union[Tuple[str], str]
 
     @abc.abstractmethod
@@ -65,23 +74,20 @@ def forward(
         """
         raise NotImplementedError
 
-    def value_estimate(self, tensordict, requires_grad=True, target_params: Optional[TensorDictBase] = None):
+    def value_estimate(
+        self,
+        tensordict,
+        target_params: Optional[TensorDictBase] = None,
+        **kwargs,
+    ):
         """Gets a value estimate, usually used as a target value for the value network.
 
         Args:
             tensordict (TensorDictBase): the tensordict containing the data to
                 read.
-            requires_grad (bool, optional): whether the estimate should be part
-                of a computational graph.
-                .. note::
-                  To avoid carrying gradient with respect to the parameters,
-                  one can also use ``val_fun.value_estimate(tensordict, target_params=params.detach())``
-                  which allows gradients to pass through the value function
-                  without including the parameters in the computational graph.
-
-                Defaults to ``True``.
             target_params (TensorDictBase, optional): A nested TensorDict containing the
                 target params to be passed to the functional value network module.
+            **kwargs: the keyword arguments to be passed to the value network.
 
         """
         raise NotImplementedError
@@ -134,7 +140,10 @@ def __init__(
         self.average_rewards = average_rewards
         self.differentiable = differentiable
         self.value_key = value_key
-        if value_key not in value_network.out_keys:
+        if (
+            hasattr(value_network, "out_keys")
+            and value_key not in value_network.out_keys
+        ):
             raise KeyError(
                 f"value key '{value_key}' not found in value network out_keys."
             )
@@ -234,12 +243,18 @@ def forward(
         tensordict.set("value_target", value_target)
         return tensordict
 
-    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
-        kwargs = {}
+    def value_estimate(
+        self,
+        tensordict,
+        target_params: Optional[TensorDictBase] = None,
+        **kwargs,
+    ):
         gamma = self.gamma
-        # we may still need to pass gradient, but we don't want to assign grads to
-        # value net params
         reward = tensordict.get(("next", "reward"))
+        steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
+        if steps_to_next_obs is not None:
+            gamma = gamma ** steps_to_next_obs.view_as(reward)
+
         if self.average_rewards:
             reward = reward - reward.mean()
             reward = reward / reward.std().clamp_min(1e-4)
@@ -258,6 +273,7 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
         value_target = reward + gamma * (1 - done.to(reward.dtype)) * next_value
         return value_target
 
+
 class TD1Estimate(ValueFunctionBase):
     """Bootstrapped Temporal Difference (TD(1)) estimate of advantage function.
 
@@ -298,7 +314,10 @@ def __init__(
         self.average_rewards = average_rewards
         self.differentiable = differentiable
         self.value_key = value_key
-        if value_key not in value_network.out_keys:
+        if (
+            hasattr(value_network, "out_keys")
+            and value_key not in value_network.out_keys
+        ):
             raise KeyError(
                 f"value key '{value_key}' not found in value network out_keys."
             )
@@ -398,12 +417,18 @@ def forward(
         tensordict.set("value_target", value_target)
         return tensordict
 
-    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
-        kwargs = {}
+    def value_estimate(
+        self,
+        tensordict,
+        target_params: Optional[TensorDictBase] = None,
+        **kwargs,
+    ):
         gamma = self.gamma
-        # we may still need to pass gradient, but we don't want to assign grads to
-        # value net params
         reward = tensordict.get(("next", "reward"))
+        steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
+        if steps_to_next_obs is not None:
+            gamma = gamma ** steps_to_next_obs.view_as(reward)
+
         if self.average_rewards:
             reward = reward - reward.mean()
             reward = reward / reward.std().clamp_min(1e-4)
@@ -419,11 +444,14 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
             next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
-        value_target = td_advantage_estimate(gamma, torch.zeros_like(next_value), next_value, reward, done)
+        value_target = td_advantage_estimate(
+            gamma, torch.zeros_like(next_value), next_value, reward, done
+        )
         return value_target
 
+
 class TDLambdaEstimate(ValueFunctionBase):
-    """TD(:math:`\lambda`) estimate of advantage function.
+    r"""TD(:math:`\lambda`) estimate of advantage function.
 
     Args:
         gamma (scalar): exponential mean discount.
@@ -469,7 +497,10 @@ def __init__(
         self.average_rewards = average_rewards
         self.differentiable = differentiable
         self.value_key = value_key
-        if value_key not in value_network.out_keys:
+        if (
+            hasattr(value_network, "out_keys")
+            and value_key not in value_network.out_keys
+        ):
             raise KeyError(
                 f"value key '{value_key}' not found in value network out_keys."
             )
@@ -566,15 +597,24 @@ def forward(
             target_params = params.detach()
         value_target = self.value_estimate(tensordict, target_params=target_params)
 
-        tensordict.set(self.advantage_key, value_target-value)
+        tensordict.set(self.advantage_key, value_target - value)
         tensordict.set(self.value_target_key, value_target)
         return tensordict
 
-    def value_estimate(self, tensordict, requires_grad=False, target_params: Optional[TensorDictBase] = None):
+    def value_estimate(
+        self,
+        tensordict,
+        target_params: Optional[TensorDictBase] = None,
+        **kwargs,
+    ):
 
         gamma = self.gamma
-        lmbda = self.lmbda
         reward = tensordict.get(("next", "reward"))
+        steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
+        if steps_to_next_obs is not None:
+            gamma = gamma ** steps_to_next_obs.view_as(reward)
+
+        lmbda = self.lmbda
         if self.average_rewards:
             reward = reward - reward.mean()
             reward = reward / reward.std().clamp_min(1e-4)
@@ -582,9 +622,6 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
                 ("next", "reward"), reward
             )  # we must update the rewards if they are used later in the code
 
-
-        kwargs = {}
-
         step_td = step_mdp(tensordict)
         if target_params is not None:
             # we assume that target parameters are not differentiable
@@ -606,6 +643,7 @@ def value_estimate(self, tensordict, requires_grad=False, target_params: Optiona
             )
         return val
 
+
 class GAE(ValueFunctionBase):
     """A class wrapper around the generalized advantage estimate functional.
 
@@ -656,7 +694,10 @@ def __init__(
         self.register_buffer("lmbda", torch.tensor(lmbda, device=device))
         self.value_network = value_network
         self.value_key = value_key
-        if value_key not in value_network.out_keys:
+        if (
+            hasattr(value_network, "out_keys")
+            and value_key not in value_network.out_keys
+        ):
             raise KeyError(
                 f"value key '{value_key}' not found in value network out_keys."
             )
@@ -745,6 +786,11 @@ def forward(
             )
         reward = tensordict.get(("next", "reward"))
         gamma, lmbda = self.gamma, self.lmbda
+        reward = tensordict.get(("next", "reward"))
+        steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
+        if steps_to_next_obs is not None:
+            gamma = gamma ** steps_to_next_obs.view_as(reward)
+
         kwargs = {}
         if self.is_functional and params is None:
             raise RuntimeError(
@@ -785,3 +831,51 @@ def forward(
         tensordict.set(self.value_target_key, value_target)
 
         return tensordict
+
+    def value_estimate(
+        self,
+        tensordict,
+        params: Optional[TensorDictBase] = None,
+        target_params: Optional[TensorDictBase] = None,
+        **kwargs,
+    ):
+        if tensordict.batch_dims < 1:
+            raise RuntimeError(
+                "Expected input tensordict to have at least one dimensions, got"
+                f"tensordict.batch_size = {tensordict.batch_size}"
+            )
+        reward = tensordict.get(("next", "reward"))
+        gamma, lmbda = self.gamma, self.lmbda
+        steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
+        if steps_to_next_obs is not None:
+            gamma = gamma ** steps_to_next_obs.view_as(reward)
+
+        if self.is_functional and params is None:
+            raise RuntimeError(
+                "Expected params to be passed to advantage module but got none."
+            )
+        if params is not None:
+            kwargs["params"] = params
+        with hold_out_net(self.value_network):
+            # we may still need to pass gradient, but we don't want to assign grads to
+            # value net params
+            self.value_network(tensordict, **kwargs)
+
+        value = tensordict.get(self.value_key)
+
+        step_td = step_mdp(tensordict)
+        if target_params is not None:
+            # we assume that target parameters are not differentiable
+            kwargs["params"] = target_params
+        elif "params" in kwargs:
+            kwargs["params"] = kwargs["params"].detach()
+        with hold_out_net(self.value_network):
+            # we may still need to pass gradient, but we don't want to assign grads to
+            # value net params
+            self.value_network(step_td, **kwargs)
+        next_value = step_td.get(self.value_key)
+        done = tensordict.get(("next", "done"))
+        _, value_target = vec_generalized_advantage_estimate(
+            gamma, lmbda, value, next_value, reward, done
+        )
+        return value_target

From 01f1ae70fcf3b18f1396f34e8844654553a25fea Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 27 Mar 2023 18:28:34 +0100
Subject: [PATCH 18/89] amend

---
 test/test_cost.py                             | 158 ++++++----
 torchrl/collectors/collectors.py              |   6 +-
 torchrl/data/datasets/d4rl.py                 |   2 +-
 torchrl/data/replay_buffers/replay_buffers.py |   2 +-
 torchrl/data/replay_buffers/samplers.py       |   2 +-
 torchrl/data/tensor_specs.py                  |   6 +-
 torchrl/envs/common.py                        |  10 +-
 torchrl/envs/libs/dm_control.py               |   4 +-
 torchrl/envs/libs/utils.py                    |   2 +-
 torchrl/envs/transforms/r3m.py                |   2 +-
 torchrl/envs/transforms/transforms.py         |  22 +-
 torchrl/envs/transforms/vip.py                |   2 +-
 torchrl/envs/utils.py                         |  10 +-
 torchrl/envs/vec_env.py                       |   6 +-
 torchrl/modules/models/models.py              |   3 +-
 torchrl/modules/tensordict_module/actors.py   |   8 +-
 torchrl/modules/tensordict_module/common.py   |   2 +-
 .../tensordict_module/probabilistic.py        |   2 +-
 torchrl/objectives/a2c.py                     |  73 ++++-
 torchrl/objectives/common.py                  |  10 +-
 torchrl/objectives/ddpg.py                    |  53 +++-
 torchrl/objectives/deprecated.py              |  88 ++++--
 torchrl/objectives/dqn.py                     |  16 +-
 torchrl/objectives/dreamer.py                 |  94 ++++--
 torchrl/objectives/iql.py                     |  88 +++---
 torchrl/objectives/ppo.py                     | 269 ++++++++++++++----
 torchrl/objectives/redq.py                    |  90 +++---
 torchrl/objectives/reinforce.py               |  85 +++++-
 torchrl/objectives/sac.py                     | 197 +++++++------
 torchrl/objectives/td3.py                     |  69 +++--
 torchrl/objectives/value/advantages.py        | 175 +++++++-----
 torchrl/objectives/value/functional.py        |  36 ++-
 torchrl/objectives/value/utils.py             |   2 +-
 torchrl/record/recorder.py                    |   4 +-
 torchrl/trainers/helpers/collectors.py        |   2 +-
 torchrl/trainers/helpers/envs.py              |   4 +-
 torchrl/trainers/trainers.py                  |  12 +-
 37 files changed, 1084 insertions(+), 532 deletions(-)

diff --git a/test/test_cost.py b/test/test_cost.py
index 409bfa0e540..e09364ca69d 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import re
 from copy import deepcopy
 
 from packaging import version as pack_version
@@ -88,7 +87,12 @@
 from torchrl.objectives.deprecated import DoubleREDQLoss_deprecated, REDQLoss_deprecated
 from torchrl.objectives.redq import REDQLoss
 from torchrl.objectives.reinforce import ReinforceLoss
-from torchrl.objectives.utils import HardUpdate, hold_out_net, SoftUpdate
+from torchrl.objectives.utils import (
+    HardUpdate,
+    hold_out_net,
+    SoftUpdate,
+    ValueFunctions,
+)
 from torchrl.objectives.value.advantages import GAE, TD1Estimate, TDLambdaEstimate
 from torchrl.objectives.value.functional import (
     generalized_advantage_estimate,
@@ -295,7 +299,8 @@ def _create_seq_mock_data_dqn(
     @pytest.mark.parametrize(
         "action_spec_type", ("nd_bounded", "one_hot", "categorical")
     )
-    def test_dqn(self, delay_value, device, action_spec_type):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_dqn(self, delay_value, device, action_spec_type, td_est):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(
             action_spec_type=action_spec_type, device=device
@@ -304,6 +309,12 @@ def test_dqn(self, delay_value, device, action_spec_type):
             action_spec_type=action_spec_type, device=device
         )
         loss_fn = DQNLoss(actor, loss_function="l2", delay_value=delay_value)
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
         with _check_td_steady(td):
             loss = loss_fn(td)
         assert loss_fn.priority_key in td.keys()
@@ -388,8 +399,9 @@ def test_dqn_batcher(self, n, delay_value, device, action_spec_type, gamma=0.9):
     @pytest.mark.parametrize(
         "action_spec_type", ("mult_one_hot", "one_hot", "categorical")
     )
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
     def test_distributional_dqn(
-        self, atoms, delay_value, device, action_spec_type, gamma=0.9
+        self, atoms, delay_value, device, action_spec_type, td_est, gamma=0.9
     ):
         torch.manual_seed(self.seed)
         actor = self._create_mock_distributional_actor(
@@ -401,6 +413,13 @@ def test_distributional_dqn(
         ).to(device)
         loss_fn = DistributionalDQNLoss(actor, gamma=gamma, delay_value=delay_value)
 
+        if td_est not in (None, ValueFunctions.TD0):
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        elif td_est is not None:
+            loss_fn.make_value_function(td_est)
+
         with _check_td_steady(td):
             loss = loss_fn(td)
         assert loss_fn.priority_key in td.keys()
@@ -529,7 +548,8 @@ def _create_seq_mock_data_ddpg(
     )
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("delay_actor,delay_value", [(False, False), (True, True)])
-    def test_ddpg(self, delay_actor, delay_value, device):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_ddpg(self, delay_actor, delay_value, device, td_est):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(device=device)
         value = self._create_mock_value(device=device)
@@ -537,11 +557,17 @@ def test_ddpg(self, delay_actor, delay_value, device):
         loss_fn = DDPGLoss(
             actor,
             value,
-            gamma=0.9,
             loss_function="l2",
             delay_actor=delay_actor,
             delay_value=delay_value,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
+
         with _check_td_steady(td):
             loss = loss_fn(td)
 
@@ -632,7 +658,6 @@ def test_ddpg_batcher(self, n, delay_actor, delay_value, device, gamma=0.9):
         loss_fn = DDPGLoss(
             actor,
             value,
-            gamma=gamma,
             loss_function="l2",
             delay_actor=delay_actor,
             delay_value=delay_value,
@@ -763,6 +788,7 @@ def _create_seq_mock_data_td3(
     )
     @pytest.mark.parametrize("policy_noise", [0.1, 1.0])
     @pytest.mark.parametrize("noise_clip", [0.1, 1.0])
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
     def test_td3(
         self,
         delay_actor,
@@ -770,6 +796,7 @@ def test_td3(
         device,
         policy_noise,
         noise_clip,
+        td_est,
     ):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(device=device)
@@ -778,13 +805,18 @@ def test_td3(
         loss_fn = TD3Loss(
             actor,
             value,
-            gamma=0.9,
             loss_function="l2",
             policy_noise=policy_noise,
             noise_clip=noise_clip,
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
         with _check_td_steady(td):
             loss = loss_fn(td)
 
@@ -849,7 +881,6 @@ def test_td3_batcher(
         loss_fn = TD3Loss(
             actor,
             value,
-            gamma=0.9,
             policy_noise=policy_noise,
             noise_clip=noise_clip,
             delay_qvalue=delay_qvalue,
@@ -1043,8 +1074,16 @@ def _create_seq_mock_data_sac(
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
     def test_sac(
-        self, delay_value, delay_actor, delay_qvalue, num_qvalue, device, version
+        self,
+        delay_value,
+        delay_actor,
+        delay_qvalue,
+        num_qvalue,
+        device,
+        version,
+        td_est,
     ):
         if (delay_actor or delay_qvalue) and not delay_value:
             pytest.skip("incompatible config")
@@ -1076,6 +1115,13 @@ def test_sac(
             **kwargs,
         )
 
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
+
         with _check_td_steady(td):
             loss = loss_fn(td)
         assert loss_fn.priority_key in td.keys()
@@ -1445,6 +1491,7 @@ def _create_seq_mock_data_sac(
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("target_entropy_weight", [0.01, 0.5, 0.99])
     @pytest.mark.parametrize("target_entropy", ["auto", 1.0, 0.1, 0.0])
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
     def test_discrete_sac(
         self,
         delay_qvalue,
@@ -1452,6 +1499,7 @@ def test_discrete_sac(
         device,
         target_entropy_weight,
         target_entropy,
+        td_est,
     ):
 
         torch.manual_seed(self.seed)
@@ -1469,12 +1517,17 @@ def test_discrete_sac(
             qvalue_network=qvalue,
             num_actions=actor.spec["action"].space.n,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             target_entropy_weight=target_entropy_weight,
             target_entropy=target_entropy,
             loss_function="l2",
             **kwargs,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -1571,7 +1624,6 @@ def test_discrete_sac_batcher(
             qvalue_network=qvalue,
             num_actions=actor.spec["action"].space.n,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             target_entropy_weight=target_entropy_weight,
             target_entropy=target_entropy,
@@ -1807,7 +1859,6 @@ def test_redq(self, delay_qvalue, num_qvalue, device):
             actor_network=actor,
             qvalue_network=qvalue,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
@@ -1892,7 +1943,6 @@ def test_redq_shared(self, delay_qvalue, num_qvalue, device):
             actor_network=actor,
             qvalue_network=qvalue,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
             target_entropy=0.0,
@@ -1997,7 +2047,6 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device):
             actor_network=deepcopy(actor),
             qvalue_network=deepcopy(qvalue),
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
@@ -2009,7 +2058,6 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device):
             actor_network=deepcopy(actor),
             qvalue_network=deepcopy(qvalue),
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
         )
 
@@ -2041,7 +2089,6 @@ def test_redq_batcher(self, n, delay_qvalue, num_qvalue, device, gamma=0.9):
             actor_network=actor,
             qvalue_network=qvalue,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
@@ -2240,7 +2287,7 @@ def _create_seq_mock_data_ppo(
 
     @pytest.mark.parametrize("loss_class", (PPOLoss, ClipPPOLoss, KLPENPPOLoss))
     @pytest.mark.parametrize("gradient_mode", (True, False))
-    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
+    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_ppo(self, loss_class, device, gradient_mode, advantage):
         torch.manual_seed(self.seed)
@@ -2260,15 +2307,14 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage):
             advantage = TDLambdaEstimate(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
 
-        loss_fn = loss_class(actor, value, gamma=0.9, loss_critic_type="l2")
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = loss_fn(td)
-        advantage(td)
+        loss_fn = loss_class(actor, value, loss_critic_type="l2")
+        if advantage is not None:
+            advantage(td)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2302,7 +2348,7 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage):
         actor.zero_grad()
 
     @pytest.mark.parametrize("loss_class", (PPOLoss, ClipPPOLoss, KLPENPPOLoss))
-    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
+    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_ppo_shared(self, loss_class, device, advantage):
         torch.manual_seed(self.seed)
@@ -2326,20 +2372,18 @@ def test_ppo_shared(self, loss_class, device, advantage):
                 lmbda=0.9,
                 value_network=value,
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
         loss_fn = loss_class(
             actor,
             value,
-            gamma=0.9,
             loss_critic_type="l2",
         )
 
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = loss_fn(td)
-        advantage(td)
+        if advantage is not None:
+            advantage(td)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2377,7 +2421,7 @@ def test_ppo_shared(self, loss_class, device, advantage):
     )
     @pytest.mark.parametrize("loss_class", (PPOLoss, ClipPPOLoss, KLPENPPOLoss))
     @pytest.mark.parametrize("gradient_mode", (True, False))
-    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
+    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
         if pack_version.parse(torch.__version__) > pack_version.parse("1.14"):
@@ -2399,6 +2443,8 @@ def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
             advantage = TDLambdaEstimate(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
 
@@ -2409,11 +2455,8 @@ def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
         for p in params:
             p.data.zero_()
         # assert len(list(floss_fn.parameters())) == 0
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = floss_fn(params, buffers, td)
-        advantage(td)
+        if advantage is not None:
+            advantage(td)
         loss = floss_fn(params, buffers, td)
 
         loss_critic = loss["loss_critic"]
@@ -2515,7 +2558,7 @@ def _create_seq_mock_data_a2c(
         return td
 
     @pytest.mark.parametrize("gradient_mode", (True, False))
-    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
+    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_a2c(self, device, gradient_mode, advantage):
         torch.manual_seed(self.seed)
@@ -2535,10 +2578,12 @@ def test_a2c(self, device, gradient_mode, advantage):
             advantage = TDLambdaEstimate(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
 
-        loss_fn = A2CLoss(actor, value, gamma=0.9, loss_critic_type="l2")
+        loss_fn = A2CLoss(actor, value, loss_critic_type="l2")
 
         # Check error is raised when actions require grads
         td["action"].requires_grad = True
@@ -2550,12 +2595,8 @@ def test_a2c(self, device, gradient_mode, advantage):
         td["action"].requires_grad = False
 
         td = td.exclude(loss_fn.value_target_key)
-
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = loss_fn(td)
-        advantage(td)
+        if advantage is not None:
+            advantage(td)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2589,7 +2630,7 @@ def test_a2c(self, device, gradient_mode, advantage):
         not _has_functorch, reason=f"functorch not found, {FUNCTORCH_ERR}"
     )
     @pytest.mark.parametrize("gradient_mode", (True, False))
-    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
+    @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_a2c_diff(self, device, gradient_mode, advantage):
         if pack_version.parse(torch.__version__) > pack_version.parse("1.14"):
@@ -2611,18 +2652,17 @@ def test_a2c_diff(self, device, gradient_mode, advantage):
             advantage = TDLambdaEstimate(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
 
-        loss_fn = A2CLoss(actor, value, gamma=0.9, loss_critic_type="l2")
+        loss_fn = A2CLoss(actor, value, loss_critic_type="l2")
 
         floss_fn, params, buffers = make_functional_with_buffers(loss_fn)
 
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = floss_fn(params, buffers, td)
-        advantage(td)
+        if advantage is not None:
+            advantage(td)
         loss = floss_fn(params, buffers, td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2655,7 +2695,7 @@ def test_a2c_diff(self, device, gradient_mode, advantage):
 class TestReinforce:
     @pytest.mark.parametrize("delay_value", [True, False])
     @pytest.mark.parametrize("gradient_mode", [True, False])
-    @pytest.mark.parametrize("advantage", ["gae", "td", "td_lambda"])
+    @pytest.mark.parametrize("advantage", ["gae", "td", "td_lambda", None])
     def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
         n_obs = 3
         n_act = 5
@@ -2691,13 +2731,14 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
                 value_network=get_functional(value_net),
                 differentiable=gradient_mode,
             )
+        elif advantage is None:
+            pass
         else:
             raise NotImplementedError
 
         loss_fn = ReinforceLoss(
             actor_net,
             critic=value_net,
-            gamma=gamma,
             delay_value=delay_value,
         )
 
@@ -2714,12 +2755,9 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
             [batch],
         )
 
-        with pytest.raises(
-            KeyError, match=re.escape('key "advantage" not found in TensorDict with')
-        ):
-            _ = loss_fn(td)
         params = TensorDict(value_net.state_dict(), []).unflatten_keys(".")
-        advantage(td, params=params)
+        if advantage is not None:
+            advantage(td, params=params)
         loss_td = loss_fn(td)
         autograd.grad(
             loss_td.get("loss_actor"),
@@ -3270,7 +3308,6 @@ def test_iql(
             qvalue_network=qvalue,
             value_network=value,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             temperature=temperature,
             expectile=expectile,
             loss_function="l2",
@@ -3385,7 +3422,6 @@ def test_iql_batcher(
             qvalue_network=qvalue,
             value_network=value,
             num_qvalue_nets=num_qvalue,
-            gamma=0.9,
             temperature=temperature,
             expectile=expectile,
             loss_function="l2",
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 853c6c8970e..7bd1f92a1d1 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -635,7 +635,7 @@ def set_seed(self, seed: int, static_seed: bool = False) -> int:
 
         Args:
             seed (int): integer representing the seed to be used for the environment.
-            static_seed(bool, optional): if True, the seed is not incremented.
+            static_seed(bool, optional): if ``True``, the seed is not incremented.
                 Defaults to False
 
         Returns:
@@ -1263,7 +1263,7 @@ def set_seed(self, seed: int, static_seed: bool = False) -> int:
 
         Args:
             seed: integer representing the seed to be used for the environment.
-            static_seed (bool, optional): if True, the seed is not incremented.
+            static_seed (bool, optional): if ``True``, the seed is not incremented.
                 Defaults to False
 
         Returns:
@@ -1840,7 +1840,7 @@ class aSyncDataCollector(MultiaSyncDataCollector):
             the output TensorDict will be stored. For long trajectories,
             it may be necessary to store the data on a different.
             device than the one where the policy is stored. Default is None.
-        update_at_each_batch (bool): if True, the policy weights will be updated every time a batch of trajectories
+        update_at_each_batch (bool): if ``True``, the policy weights will be updated every time a batch of trajectories
             is collected.
             default=False
 
diff --git a/torchrl/data/datasets/d4rl.py b/torchrl/data/datasets/d4rl.py
index 087793937f3..d80ecdf74fa 100644
--- a/torchrl/data/datasets/d4rl.py
+++ b/torchrl/data/datasets/d4rl.py
@@ -47,7 +47,7 @@ class D4RLExperienceReplay(TensorDictReplayBuffer):
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
             To chain transforms use the :obj:`Compose` class.
-        split_trajs (bool, optional): if True, the trajectories will be split
+        split_trajs (bool, optional): if ``True``, the trajectories will be split
             along the first dimension and padded to have a matching shape.
             To split the trajectories, the ``"done"`` signal will be used, which
             is recovered via ``done = timeout | terminal``. In other words,
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
index 2ad9b3d65b9..0a20dc6dff7 100644
--- a/torchrl/data/replay_buffers/replay_buffers.py
+++ b/torchrl/data/replay_buffers/replay_buffers.py
@@ -561,7 +561,7 @@ class TensorDictPrioritizedReplayBuffer(TensorDictReplayBuffer):
             mini-batch of Tensor(s)/outputs.  Used when using batched loading
             from a map-style dataset.
         pin_memory (bool, optional): whether pin_memory() should be called on
-            the rb samples. Default is :obj:`False`.
+            the rb samples. Default is ``False``.
         prefetch (int, optional): number of next batches to be prefetched
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
index 4d180d35b49..9fd0fab8af4 100644
--- a/torchrl/data/replay_buffers/samplers.py
+++ b/torchrl/data/replay_buffers/samplers.py
@@ -76,7 +76,7 @@ class SamplerWithoutReplacement(Sampler):
     """A data-consuming sampler that ensures that the same sample is not present in consecutive batches.
 
     Args:
-        drop_last (bool, optional): if True, the last incomplete sample (if any) will be dropped.
+        drop_last (bool, optional): if ``True``, the last incomplete sample (if any) will be dropped.
             If False, this last sample will be kept and (unlike with torch dataloaders)
             completed with other samples from a fresh indices permutation.
 
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
index 2614dddea4f..e07796028b7 100644
--- a/torchrl/data/tensor_specs.py
+++ b/torchrl/data/tensor_specs.py
@@ -2345,7 +2345,7 @@ def keys(
                 :obj:`CompositeSpec(next=CompositeSpec(obs=None))` will lead to the keys
                 :obj:`["next"]. Default is ``False``, i.e. nested keys will not
                 be returned.
-            leaves_only (bool, optional): if :obj:`False`, the values returned
+            leaves_only (bool, optional): if ``False``, the values returned
                 will contain every level of nesting, i.e. a :obj:`CompositeSpec(next=CompositeSpec(obs=None))`
                 will lead to the keys :obj:`["next", ("next", "obs")]`.
                 Default is ``False``.
@@ -2367,7 +2367,7 @@ def items(
                 :obj:`CompositeSpec(next=CompositeSpec(obs=None))` will lead to the keys
                 :obj:`["next"]. Default is ``False``, i.e. nested keys will not
                 be returned.
-            leaves_only (bool, optional): if :obj:`False`, the values returned
+            leaves_only (bool, optional): if ``False``, the values returned
                 will contain every level of nesting, i.e. a :obj:`CompositeSpec(next=CompositeSpec(obs=None))`
                 will lead to the keys :obj:`["next", ("next", "obs")]`.
                 Default is ``False``.
@@ -2395,7 +2395,7 @@ def values(
                 :obj:`CompositeSpec(next=CompositeSpec(obs=None))` will lead to the keys
                 :obj:`["next"]. Default is ``False``, i.e. nested keys will not
                 be returned.
-            leaves_only (bool, optional): if :obj:`False`, the values returned
+            leaves_only (bool, optional): if ``False``, the values returned
                 will contain every level of nesting, i.e. a :obj:`CompositeSpec(next=CompositeSpec(obs=None))`
                 will lead to the keys :obj:`["next", ("next", "obs")]`.
                 Default is ``False``.
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
index b6a66b44663..1672867e652 100644
--- a/torchrl/envs/common.py
+++ b/torchrl/envs/common.py
@@ -127,7 +127,7 @@ class EnvBase(nn.Module, metaclass=abc.ABCMeta):
         - reward_spec (TensorSpec): sampling spec of the rewards;
         - batch_size (torch.Size): number of environments contained in the instance;
         - device (torch.device): device where the env input and output are expected to live
-        - run_type_checks (bool): if True, the observation and reward dtypes
+        - run_type_checks (bool): if ``True``, the observation and reward dtypes
             will be compared against their respective spec and an exception
             will be raised if they don't match.
             Defaults to False.
@@ -538,7 +538,7 @@ def set_seed(
 
         Args:
             seed (int): seed to be set
-            static_seed (bool, optional): if True, the seed is not incremented.
+            static_seed (bool, optional): if ``True``, the seed is not incremented.
                 Defaults to False
 
         Returns:
@@ -651,11 +651,11 @@ def rollout(
                 actions will be called using :obj:`env.rand_step()`
                 default = None
             callback (callable, optional): function to be called at each iteration with the given TensorDict.
-            auto_reset (bool, optional): if True, resets automatically the environment
+            auto_reset (bool, optional): if ``True``, resets automatically the environment
                 if it is in a done state when the rollout is initiated.
                 Default is :obj:`True`.
-            auto_cast_to_device (bool, optional): if True, the device of the tensordict is automatically cast to the
-                policy device before the policy is used. Default is :obj:`False`.
+            auto_cast_to_device (bool, optional): if ``True``, the device of the tensordict is automatically cast to the
+                policy device before the policy is used. Default is ``False``.
             break_when_any_done (bool): breaks if any of the done state is True. If False, a reset() is
                 called on the sub-envs that are done. Default is True.
             return_contiguous (bool): if False, a LazyStackedTensorDict will be returned. Default is True.
diff --git a/torchrl/envs/libs/dm_control.py b/torchrl/envs/libs/dm_control.py
index 34bcfa2580a..9097c6ca1d2 100644
--- a/torchrl/envs/libs/dm_control.py
+++ b/torchrl/envs/libs/dm_control.py
@@ -116,7 +116,7 @@ class DMControlWrapper(GymLikeEnv):
 
     Args:
         env (dm_control.suite env): environment instance
-        from_pixels (bool): if True, the observation
+        from_pixels (bool): if ``True``, the observation
 
     Examples:
         >>> env = dm_control.suite.load("cheetah", "run")
@@ -272,7 +272,7 @@ class DMControlEnv(DMControlWrapper):
         env_name (str): name of the environment
         task_name (str): name of the task
         seed (int, optional): seed to use for the environment
-        from_pixels (bool, optional): if True, the observation will be returned
+        from_pixels (bool, optional): if ``True``, the observation will be returned
             as an image.
             Default is False.
 
diff --git a/torchrl/envs/libs/utils.py b/torchrl/envs/libs/utils.py
index 84efb840db8..d157ee06d3c 100644
--- a/torchrl/envs/libs/utils.py
+++ b/torchrl/envs/libs/utils.py
@@ -37,7 +37,7 @@ class GymPixelObservationWrapper(ObservationWrapper):
         env: The environment to wrap.
         pixels_only: If :obj:`True` (default), the original observation returned
             by the wrapped environment will be discarded, and a dictionary
-            observation will only include pixels. If :obj:`False`, the
+            observation will only include pixels. If ``False``, the
             observation dictionary will contain both the original
             observations and the pixel observations.
         render_kwargs: Optional :obj:`dict` containing keyword arguments passed
diff --git a/torchrl/envs/transforms/r3m.py b/torchrl/envs/transforms/r3m.py
index 6f6db79edd9..938e67c08a9 100644
--- a/torchrl/envs/transforms/r3m.py
+++ b/torchrl/envs/transforms/r3m.py
@@ -215,7 +215,7 @@ class R3MTransform(Compose):
              argument will be treaded separetely and each will be given a single,
              separated entry in the output tensordict. Defaults to :obj:`True`.
         download (bool, torchvision Weights config or corresponding string):
-            if True, the weights will be downloaded using the torch.hub download
+            if ``True``, the weights will be downloaded using the torch.hub download
             API (i.e. weights will be cached for future use).
             These weights are the original weights from the R3M publication.
             If the torchvision weights are needed, there are two ways they can be
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
index a34d0bb8bed..6a0dd6be2b8 100644
--- a/torchrl/envs/transforms/transforms.py
+++ b/torchrl/envs/transforms/transforms.py
@@ -396,7 +396,7 @@ class TransformedEnv(EnvBase):
         transform (Transform, optional): transform to apply to the tensordict resulting
             from :obj:`env.step(td)`. If none is provided, an empty Compose
             placeholder in an eval mode is used.
-        cache_specs (bool, optional): if True, the specs will be cached once
+        cache_specs (bool, optional): if ``True``, the specs will be cached once
             and for all after the first call (i.e. the specs will be
             transformed_in only once). If the transform changes during
             training, the original spec transform may not be valid anymore,
@@ -880,7 +880,7 @@ class ToTensorImage(ObservationTransform):
     with values between 0 and 1.
 
     Args:
-        unsqueeze (bool): if True, the observation tensor is unsqueezed
+        unsqueeze (bool): if ``True``, the observation tensor is unsqueezed
             along the first dimension. default=False.
         dtype (torch.dtype, optional): dtype to use for the resulting
             observations.
@@ -1154,7 +1154,7 @@ class FlattenObservation(ObservationTransform):
             :obj:`["pixels"]` is assumed.
         out_keys (sequence of str, optional): the flatten observation keys. If none is
             provided, :obj:`in_keys` is assumed.
-        allow_positive_dim (bool, optional): if True, positive dimensions are accepted.
+        allow_positive_dim (bool, optional): if ``True``, positive dimensions are accepted.
             :obj:`FlattenObservation` will map these to the n^th feature dimension
             (ie n^th dimension after batch size of parent env) of the input tensor.
             Defaults to False, ie. non-negative dimensions are not permitted.
@@ -1229,7 +1229,7 @@ class UnsqueezeTransform(Transform):
     Args:
         unsqueeze_dim (int): dimension to unsqueeze. Must be negative (or allow_positive_dim
             must be turned on).
-        allow_positive_dim (bool, optional): if True, positive dimensions are accepted.
+        allow_positive_dim (bool, optional): if ``True``, positive dimensions are accepted.
             :obj:`UnsqueezeTransform` will map these to the n^th feature dimension
             (ie n^th dimension after batch size of parent env) of the input tensor,
             independently from the tensordict batch size (ie positive dims may be
@@ -1414,7 +1414,7 @@ class ObservationNorm(ObservationTransform):
             only the forward transform will be called.
         out_keys_inv (list of int, optional): output entries for the inverse transform.
             Defaults to the value of `in_keys_inv`.
-        standard_normal (bool, optional): if True, the transform will be
+        standard_normal (bool, optional): if ``True``, the transform will be
 
             .. math::
                 obs = (obs-loc)/scale
@@ -1831,7 +1831,7 @@ class RewardScaling(Transform):
     Args:
         loc (number or torch.Tensor): location of the affine transform
         scale (number or torch.Tensor): scale of the affine transform
-        standard_normal (bool, optional): if True, the transform will be
+        standard_normal (bool, optional): if ``True``, the transform will be
 
             .. math::
                 reward = (reward-loc)/scale
@@ -1993,9 +1993,9 @@ class CatTensors(Transform):
         out_key: key of the resulting tensor.
         dim (int, optional): dimension along which the concatenation will occur.
             Default is -1.
-        del_keys (bool, optional): if True, the input values will be deleted after
+        del_keys (bool, optional): if ``True``, the input values will be deleted after
             concatenation. Default is True.
-        unsqueeze_if_oor (bool, optional): if True, CatTensor will check that
+        unsqueeze_if_oor (bool, optional): if ``True``, CatTensor will check that
             the dimension indicated exist for the tensors to concatenate. If not,
             the tensors will be unsqueezed along that dimension.
             Default is False.
@@ -2168,7 +2168,7 @@ class DiscreteActionProjection(Transform):
         num_actions_effective (int): max number of action considered.
         max_actions (int): maximum number of actions that this module can read.
         action_key (str, optional): key name of the action. Defaults to "action".
-        include_forward (bool, optional): if True, a call to forward will also
+        include_forward (bool, optional): if ``True``, a call to forward will also
             map the action from one domain to the other when the module is called
             by a replay buffer or an nn.Module chain. Defaults to True.
 
@@ -2383,7 +2383,7 @@ class TensorDictPrimer(Transform):
     Args:
         primers (dict, optional): a dictionary containing key-spec pairs which will
             be used to populate the input tensordict.
-        random (bool, optional): if True, the values will be drawn randomly from
+        random (bool, optional): if ``True``, the values will be drawn randomly from
             the TensorSpec domain (or a unit Gaussian if unbounded). Otherwise a fixed value will be assumed.
             Defaults to `False`.
         default_value (float, optional): if non-random filling is chosen, this
@@ -2771,7 +2771,7 @@ def build_td_for_shared_vecnorm(
                 tensordict
             keys (iterable of str, optional): keys that
                 have to be normalized. Default is `["next", "reward"]`
-            memmap (bool): if True, the resulting tensordict will be cast into
+            memmap (bool): if ``True``, the resulting tensordict will be cast into
                 memmory map (using `memmap_()`). Otherwise, the tensordict
                 will be placed in shared memory.
 
diff --git a/torchrl/envs/transforms/vip.py b/torchrl/envs/transforms/vip.py
index 4ba18177c56..2795439ef17 100644
--- a/torchrl/envs/transforms/vip.py
+++ b/torchrl/envs/transforms/vip.py
@@ -181,7 +181,7 @@ class VIPTransform(Compose):
              argument will be treaded separetely and each will be given a single,
              separated entry in the output tensordict. Defaults to :obj:`True`.
         download (bool, torchvision Weights config or corresponding string):
-            if True, the weights will be downloaded using the torch.hub download
+            if ``True``, the weights will be downloaded using the torch.hub download
             API (i.e. weights will be cached for future use).
             These weights are the original weights from the VIP publication.
             If the torchvision weights are needed, there are two ways they can be
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
index f8ff52d6af2..c0bc3e2b6c4 100644
--- a/torchrl/envs/utils.py
+++ b/torchrl/envs/utils.py
@@ -39,17 +39,17 @@ def step_mdp(
     Args:
         tensordict (TensorDictBase): tensordict with keys to be renamed
         next_tensordict (TensorDictBase, optional): destination tensordict
-        keep_other (bool, optional): if True, all keys that do not start with :obj:`'next_'` will be kept.
+        keep_other (bool, optional): if ``True``, all keys that do not start with :obj:`'next_'` will be kept.
             Default is ``True``.
-        exclude_reward (bool, optional): if True, the :obj:`"reward"` key will be discarded
+        exclude_reward (bool, optional): if ``True``, the :obj:`"reward"` key will be discarded
             from the resulting tensordict. If ``False``, it will be copied (and replaced)
             from the ``"next"`` entry (if present).
             Default is ``False``.
-        exclude_done (bool, optional): if True, the :obj:`"done"` key will be discarded
+        exclude_done (bool, optional): if ``True``, the :obj:`"done"` key will be discarded
             from the resulting tensordict. If ``False``, it will be copied (and replaced)
             from the ``"next"`` entry (if present).
             Default is ``False``.
-        exclude_action (bool, optional): if True, the :obj:`"action"` key will
+        exclude_action (bool, optional): if ``True``, the :obj:`"action"` key will
             be discarded from the resulting tensordict. If ``False``, it will
             be kept in the root tensordict (since it should not be present in
             the ``"next"`` entry).
@@ -232,7 +232,7 @@ def check_env_specs(env, return_contiguous=True, check_dtype=True, seed=0):
 
     Args:
         env (EnvBase): the env for which the specs have to be checked against data.
-        return_contiguous (bool, optional): if True, the random rollout will be called with
+        return_contiguous (bool, optional): if ``True``, the random rollout will be called with
             return_contiguous=True. This will fail in some cases (e.g. heterogeneous shapes
             of inputs/outputs). Defaults to True.
         check_dtype (bool, optional): if False, dtype checks will be skipped.
diff --git a/torchrl/envs/vec_env.py b/torchrl/envs/vec_env.py
index 5250b78dccf..c519b591728 100644
--- a/torchrl/envs/vec_env.py
+++ b/torchrl/envs/vec_env.py
@@ -107,7 +107,7 @@ class _BatchedEnv(EnvBase):
             needed, which comes with a slight compute overhead;
         create_env_kwargs (dict or list of dicts, optional): kwargs to be used with the environments being created;
         pin_memory (bool): if True and device is "cpu", calls :obj:`pin_memory` on the tensordicts when created.
-        share_individual_td (bool, optional): if True, a different tensordict is created for every process/worker and a lazy
+        share_individual_td (bool, optional): if ``True``, a different tensordict is created for every process/worker and a lazy
             stack is returned.
             default = None (False if single task);
         shared_memory (bool): whether or not the returned tensordict will be placed in shared memory;
@@ -119,9 +119,9 @@ class _BatchedEnv(EnvBase):
             It is assumed that all environments will run on the same device as a common shared
             tensordict will be used to pass data from process to process. The device can be
             changed after instantiation using :obj:`env.to(device)`.
-        allow_step_when_done (bool, optional): if True, batched environments can
+        allow_step_when_done (bool, optional): if ``True``, batched environments can
             execute steps after a done state is encountered.
-            Defaults to :obj:`False`.
+            Defaults to ``False``.
 
     """
 
diff --git a/torchrl/modules/models/models.py b/torchrl/modules/models/models.py
index cefca9b595c..575c12daa74 100644
--- a/torchrl/modules/models/models.py
+++ b/torchrl/modules/models/models.py
@@ -172,7 +172,6 @@ def __init__(
 
         _out_features_num = out_features
         if not isinstance(out_features, Number):
-            print(out_features, type(out_features))
             _out_features_num = prod(out_features)
         self.out_features = out_features
         self._out_features_num = _out_features_num
@@ -725,7 +724,7 @@ class DdpgCnnActor(nn.Module):
             'bias_last_layer': True,
         }
         use_avg_pooling (bool, optional): if ``True``, a nn.AvgPooling layer is
-            used to aggregate the output. Default is :obj:`False`.
+            used to aggregate the output. Default is ``False``.
         device (Optional[DEVICE_TYPING]): device to create the module on.
     """
 
diff --git a/torchrl/modules/tensordict_module/actors.py b/torchrl/modules/tensordict_module/actors.py
index ad191e7d14c..635fc90ca21 100644
--- a/torchrl/modules/tensordict_module/actors.py
+++ b/torchrl/modules/tensordict_module/actors.py
@@ -52,7 +52,7 @@ class Actor(SafeModule):
             occur because of exploration policies or numerical under/overflow
             issues. If this value is out of bounds, it is projected back onto the
             desired space using the :obj:`TensorSpec.project`
-            method. Default is :obj:`False`.
+            method. Default is ``False``.
 
     Examples:
         >>> import torch
@@ -142,7 +142,7 @@ class ProbabilisticActor(SafeProbabilisticTensorDictSequential):
             occur because of exploration policies or numerical under/overflow
             issues. If this value is out of bounds, it is projected back onto the
             desired space using the :obj:`TensorSpec.project`
-            method. Default is :obj:`False`.
+            method. Default is ``False``.
         default_interaction_mode (str, optional): keyword-only argument.
             Default method to be used to retrieve
             the output value. Should be one of: 'mode', 'median', 'mean' or 'random'
@@ -586,7 +586,7 @@ class QValueActor(Actor):
             occur because of exploration policies or numerical under/overflow
             issues. If this value is out of bounds, it is projected back onto the
             desired space using the :obj:`TensorSpec.project`
-            method. Default is :obj:`False`.
+            method. Default is ``False``.
         action_space (str, optional): The action space to be considered.
             Must be one of
             ``"one-hot"``, ``"mult_one_hot"``, ``"binary"`` or ``"categorical"``.
@@ -659,7 +659,7 @@ class DistributionalQValueActor(QValueActor):
             occur because of exploration policies or numerical under/overflow
             issues. If this value is out of bounds, it is projected back onto the
             desired space using the :obj:`TensorSpec.project`
-            method. Default is :obj:`False`.
+            method. Default is ``False``.
         support (torch.Tensor): support of the action values.
         action_space (str, optional): The action space to be considered.
             Must be one of
diff --git a/torchrl/modules/tensordict_module/common.py b/torchrl/modules/tensordict_module/common.py
index 07486627f2b..0b12eaa2e82 100644
--- a/torchrl/modules/tensordict_module/common.py
+++ b/torchrl/modules/tensordict_module/common.py
@@ -119,7 +119,7 @@ class SafeModule(TensorDictModule):
             occur because of exploration policies or numerical under/overflow issues.
             If this value is out of bounds, it is projected back onto the
             desired space using the :obj:`TensorSpec.project`
-            method. Default is :obj:`False`.
+            method. Default is ``False``.
 
     Embedding a neural network in a TensorDictModule only requires to specify the input and output keys. The domain spec can
         be passed along if needed. TensorDictModule support functional and regular :obj:`nn.Module` objects. In the functional
diff --git a/torchrl/modules/tensordict_module/probabilistic.py b/torchrl/modules/tensordict_module/probabilistic.py
index f139f652031..299a8621c77 100644
--- a/torchrl/modules/tensordict_module/probabilistic.py
+++ b/torchrl/modules/tensordict_module/probabilistic.py
@@ -65,7 +65,7 @@ class SafeProbabilisticModule(ProbabilisticTensorDictModule):
             check will only occur for the distribution sample, but not the other tensors
             returned by the input module. If the sample is out of bounds, it is
             projected back onto the desired space using the `TensorSpec.project` method.
-            Default is :obj:`False`.
+            Default is ``False``.
         default_interaction_mode (str, optional): default method to be used to retrieve
             the output value. Should be one of: 'mode', 'median', 'mean' or 'random'
             (in which case the value is sampled randomly from the distribution). Default
diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
index a44557396ca..644324416f3 100644
--- a/torchrl/objectives/a2c.py
+++ b/torchrl/objectives/a2c.py
@@ -11,7 +11,8 @@
 from torch import distributions as d
 
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import distance_loss
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class A2CLoss(LossModule):
@@ -29,26 +30,47 @@ class A2CLoss(LossModule):
         critic (ValueOperator): value operator.
         advantage_key (str): the input tensordict key where the advantage is expected to be written.
             default: "advantage"
-        advantage_diff_key (str): the input tensordict key where advantage_diff is expected to be written.
-            default: "value_error"
+        value_target_key (str): the input tensordict key where the target state
+            value is expected to be written. Defaults to ``"value_target"``.
+        entropy_bonus (bool): if ``True``, an entropy bonus will be added to the
+            loss to favour exploratory policies.
+        samples_mc_entropy (int): if the distribution retrieved from the policy
+            operator does not have a closed form
+            formula for the entropy, a Monte-Carlo estimate will be used.
+            ``samples_mc_entropy`` will control how many
+            samples will be used to compute this estimate.
+            Defaults to ``1``.
         entropy_coef (float): the weight of the entropy loss.
         critic_coef (float): the weight of the critic loss.
-        gamma (scalar): a discount factor for return computation.
-        loss_function_type (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
-        advantage_module (nn.Module): TensorDictModule used to compute tha advantage function.
+        loss_critic_type (str): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
+
+    .. note:
+      The advantage (typically GAE) can be computed by the loss function or
+      in the training loop. The latter option is usually preferred, but this is
+      up to the user to choose which option is to be preferred.
+      If the advantage key (``"advantage`` by default) is not present in the
+      input tensordict, the advantage will be computed by the :meth:`~.forward`
+      method.
+      A custom advantage module can be built using :meth:`~.make_value_function`.
+      The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
+      dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
+
     """
 
+    default_value_type: ValueFunctions = ValueFunctions.GAE
+
     def __init__(
         self,
         actor: ProbabilisticTensorDictSequential,
         critic: TensorDictModule,
+        *,
         advantage_key: str = "advantage",
         value_target_key: str = "value_target",
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
         entropy_coef: float = 0.01,
         critic_coef: float = 1.0,
-        gamma: float = 0.99,
         loss_critic_type: str = "smooth_l1",
     ):
         super().__init__()
@@ -66,7 +88,6 @@ def __init__(
         self.register_buffer(
             "critic_coef", torch.tensor(critic_coef, device=self.device)
         )
-        self.register_buffer("gamma", torch.tensor(gamma, device=self.device))
         self.loss_critic_type = loss_critic_type
 
     def reset(self) -> None:
@@ -96,6 +117,8 @@ def _log_probs(
 
     def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
         try:
+            # TODO: if the advantage is gathered by forward, this introduces an
+            # overhead that we could easily reduce.
             target_return = tensordict.get(self.value_target_key)
             tensordict_select = tensordict.select(*self.critic.in_keys)
             state_value = self.critic(
@@ -118,8 +141,15 @@ def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
         return self.critic_coef * loss_value
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        tensordict = tensordict.clone()
-        advantage = tensordict.get(self.advantage_key)
+        tensordict = tensordict.clone(False)
+        advantage = tensordict.get(self.advantage_key, None)
+        if advantage is None:
+            self.value_function(
+                tensordict,
+                params=self.critic_params,
+                target_params=self.target_critic_params,
+            )
+            advantage = tensordict.get(self.advantage_key)
         log_probs, dist = self._log_probs(tensordict)
         loss = -(log_probs * advantage)
         td_out = TensorDict({"loss_objective": loss.mean()}, [])
@@ -131,3 +161,26 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             loss_critic = self.loss_critic(tensordict).mean()
             td_out.set("loss_critic", loss_critic.mean())
         return td_out
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_value"
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            self._value_function = GAE(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index a105b23da98..1cbddfb4deb 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -47,8 +47,15 @@ class LossModule(nn.Module):
     the various loss values throughout
     training. Other scalars present in the output tensordict will be logged too.
 
+    :cvar defaylt_value_type: The default value type of the class.
+        Losses that require a value estimation are equipped with a default value
+        pointer. This class attribute indicates which value estimator will be
+        used if none other is specified.
+        The value estimator can be changed using the :meth:`~.make_value_function` method.
     """
 
+    default_value_type: ValueFunctions = None
+
     def __init__(self):
         super().__init__()
         self._param_maps = {}
@@ -358,6 +365,7 @@ def cpu(self) -> LossModule:
 
     @property
     def value_function(self) -> ValueFunctionBase:
+        """The value function blends in the reward and value estimate(s) from upcoming state(s)/state-action pair(s) into a target value estimate for the value network."""
         out = self._value_function
         if out is None:
             self._default_value_function()
@@ -375,7 +383,7 @@ def _default_value_function(self):
         from :obj:`torchrl.objectives.utils.DEFAULT_VALUE_FUN_PARAMS`.
 
         """
-        raise NotImplementedError
+        self.make_value_function(self.default_value_type)
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         """Value-function constructor.
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index 42dea6244fa..980c0e3b8c9 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -14,10 +14,16 @@
 from tensordict.tensordict import TensorDict, TensorDictBase
 
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import distance_loss, hold_out_params, next_state_value
+from torchrl.objectives.utils import (
+    default_value_kwargs,
+    distance_loss,
+    hold_out_params,
+    ValueFunctions,
+)
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
+from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class DDPGLoss(LossModule):
@@ -26,21 +32,20 @@ class DDPGLoss(LossModule):
     Args:
         actor_network (TensorDictModule): a policy operator.
         value_network (TensorDictModule): a Q value operator.
-        gamma (scalar): a discount factor for return computation.
-        device (str, int or torch.device, optional): a device where the losses will be computed, if it can't be found
-            via the value operator.
         loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
         delay_actor (bool, optional): whether to separate the target actor networks from the actor networks used for
-            data collection. Default is :obj:`False`.
+            data collection. Default is ``False``.
         delay_value (bool, optional): whether to separate the target value networks from the value networks used for
-            data collection. Default is :obj:`False`.
+            data collection. Default is ``False``.
     """
 
+    default_value_type: ValueFunctions = ValueFunctions.TD0
+
     def __init__(
         self,
         actor_network: TensorDictModule,
         value_network: TensorDictModule,
-        gamma: float,
+        *,
         loss_function: str = "l2",
         delay_actor: bool = False,
         delay_value: bool = False,
@@ -71,7 +76,6 @@ def __init__(
 
         self.actor_in_keys = actor_network.in_keys
 
-        self.register_buffer("gamma", torch.tensor(gamma))
         self.loss_funtion = loss_function
 
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
@@ -147,7 +151,6 @@ def _loss_value(
         )
         pred_val = td_copy.get("state_action_value").squeeze(-1)
 
-        actor_critic = self.actor_critic
         target_params = TensorDict(
             {
                 "module": {
@@ -159,12 +162,9 @@ def _loss_value(
             device=self.target_actor_network_params.device,
         )
         with set_exploration_mode("mode"):
-            target_value = next_state_value(
-                tensordict,
-                actor_critic,
-                gamma=self.gamma,
-                params=target_params,
-            )
+            target_value = self.value_function.value_estimate(
+                tensordict, target_params=target_params
+            ).squeeze(-1)
 
         # td_error = pred_val - target_value
         loss_value = distance_loss(
@@ -172,3 +172,26 @@ def _loss_value(
         )
 
         return loss_value, (pred_val - target_value).pow(2), pred_val, target_value
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_action_value"
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py
index 6fd5d01ab97..97b7aab7e5e 100644
--- a/torchrl/objectives/deprecated.py
+++ b/torchrl/objectives/deprecated.py
@@ -16,11 +16,13 @@
 from torch import Tensor
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives import (
+    default_value_kwargs,
     distance_loss,
     hold_out_params,
-    next_state_value as get_next_state_value,
+    ValueFunctions,
 )
 from torchrl.objectives.common import LossModule
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -41,36 +43,49 @@ class REDQLoss_deprecated(LossModule):
 
     Args:
         actor_network (TensorDictModule): the actor to be trained
-        qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
-        num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        sub_sample_len (int, optional): number of Q-value networks to be subsampled to evaluate the next state value
-            Default is 2.
-        gamma (Number, optional): gamma decay factor. Default is 0.99.
-        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
-            `"td_error"`.
-        loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
-            "l1", Default is "smooth_l1".
+        qvalue_network (TensorDictModule): a single Q-value network that will
+            be multiplicated as many times as needed.
+        num_qvalue_nets (int, optional): Number of Q-value networks to be trained.
+            Default is ``10``.
+        sub_sample_len (int, optional): number of Q-value networks to be
+            subsampled to evaluate the next state value
+            Default is ``2``.
+        priority_key (str, optional): Key where to write the priority value
+            for prioritized replay buffers. Default is
+            ``"td_error"``.
+        loss_function (str, optional): loss function to be used for the Q-value.
+            Can be one of  ``"smooth_l1"``, ``"l2"``,
+            ``"l1"``, Default is ``"smooth_l1"``.
         alpha_init (float, optional): initial entropy multiplier.
-            Default is 1.0.
+            Default is ``1.0``.
         min_alpha (float, optional): min value of alpha.
-            Default is 0.1.
+            Default is ``0.1``.
         max_alpha (float, optional): max value of alpha.
-            Default is 10.0.
-        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is :obj:`False`.
-        target_entropy (Union[str, Number], optional): Target entropy for the stochastic policy. Default is "auto".
+            Default is ``10.0``.
+        fixed_alpha (bool, optional): whether alpha should be trained to match
+            a target entropy. Default is ``False``.
+        target_entropy (Union[str, Number], optional): Target entropy for the
+            stochastic policy. Default is "auto".
+        delay_qvalue (bool, optional): Whether to separate the target Q value
+            networks from the Q value networks used
+            for data collection. Default is ``False``.
+        gSDE (bool, optional): Knowing if gSDE is used is necessary to create
+            random noise variables.
+            Default is ``False``.
 
     """
 
     delay_actor: bool = False
+    default_value_type = ValueFunctions.TD0
 
     def __init__(
         self,
         actor_network: TensorDictModule,
         qvalue_network: TensorDictModule,
+        *,
         num_qvalue_nets: int = 10,
         sub_sample_len: int = 2,
-        gamma: Number = 0.99,
-        priotity_key: str = "td_error",
+        priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         alpha_init: float = 1.0,
         min_alpha: float = 0.1,
@@ -102,8 +117,7 @@ def __init__(
         )
         self.num_qvalue_nets = num_qvalue_nets
         self.sub_sample_len = max(1, min(sub_sample_len, num_qvalue_nets - 1))
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
+        self.priority_key = priority_key
         self.loss_function = loss_function
 
         try:
@@ -197,7 +211,7 @@ def _qvalue_loss(self, tensordict: TensorDictBase) -> Tensor:
         tensordict_save = tensordict
 
         obs_keys = self.actor_network.in_keys
-        tensordict = tensordict.select("next", *obs_keys, "action")
+        tensordict = tensordict.clone(False).select("next", *obs_keys, "action")
 
         selected_models_idx = torch.randperm(self.num_qvalue_nets)[
             : self.sub_sample_len
@@ -227,17 +241,13 @@ def _qvalue_loss(self, tensordict: TensorDictBase) -> Tensor:
                 != sample_log_prob.shape
             ):
                 sample_log_prob = sample_log_prob.unsqueeze(-1)
-            state_value = (
+            next_state_value = (
                 next_td.get("state_action_value") - self.alpha * sample_log_prob
             )
-            state_value = state_value.min(0)[0]
+            next_state_value = next_state_value.min(0)[0]
 
-        tensordict.set("next.state_value", state_value)
-        target_value = get_next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=state_value,
-        )
+        tensordict.set(("next", "state_value"), next_state_value)
+        target_value = self.value_function.value_estimate(tensordict).squeeze(-1)
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
             tensordict.select(*self.qvalue_network.in_keys),
             self.qvalue_network_params,
@@ -265,6 +275,28 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
 
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_value"
+        # we do not need a value network bc the next state value is already passed
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            self._value_function = GAE(value_network=None, value_key=value_key, **hp)
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
+
 
 class DoubleREDQLoss_deprecated(REDQLoss_deprecated):
     """[Deprecated] Class for delayed target-REDQ (which should be the default behaviour)."""
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 9792c782b0d..66ab0aba5c4 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -26,13 +26,16 @@ class DQNLoss(LossModule):
         value_network (QValueActor or nn.Module): a Q value operator.
         loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
         delay_value (bool, optional): whether to duplicate the value network into a new target value network to
-            create a double DQN. Default is :obj:`False`.
+            create a double DQN. Default is ``False``.
 
     """
 
+    default_value_type = ValueFunctions.TDLambda
+
     def __init__(
         self,
         value_network: Union[QValueActor, nn.Module],
+        *,
         loss_function: str = "l2",
         priority_key: str = "td_error",
         delay_value: bool = False,
@@ -76,12 +79,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_key="chosen_action_value",
             )
         elif value_type is ValueFunctions.GAE:
-            self._value_function = GAE(
-                **hp,
-                value_network=self.value_network,
-                advantage_key="advantage",
-                value_target_key="value_target",
-                value_key="chosen_action_value",
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
@@ -94,9 +93,6 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
 
-    def _default_value_function(self):
-        self.make_value_function(ValueFunctions.TDLambda)
-
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         """Computes the DQN loss given a tensordict sampled from the replay buffer.
 
diff --git a/torchrl/objectives/dreamer.py b/torchrl/objectives/dreamer.py
index 885558390c3..47f13e4ae90 100644
--- a/torchrl/objectives/dreamer.py
+++ b/torchrl/objectives/dreamer.py
@@ -11,15 +11,22 @@
 from torchrl.envs.model_based.dreamer import DreamerEnv
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import distance_loss, hold_out_net
-from torchrl.objectives.value.functional import vec_td_lambda_return_estimate
+from torchrl.objectives.utils import (
+    default_value_kwargs,
+    distance_loss,
+    hold_out_net,
+    ValueFunctions,
+)
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class DreamerModelLoss(LossModule):
     """Dreamer Model Loss.
 
-    Computes the loss of the dreamer world model. The loss is composed of the kl divergence between the prior and posterior of the RSSM,
-    the reconstruction loss over the reconstructed observation and the reward loss over the predicted reward.
+    Computes the loss of the dreamer world model. The loss is composed of the
+    kl divergence between the prior and posterior of the RSSM,
+    the reconstruction loss over the reconstructed observation and the reward
+    loss over the predicted reward.
 
     Reference: https://arxiv.org/abs/1912.01603.
 
@@ -31,10 +38,10 @@ class DreamerModelLoss(LossModule):
         reco_loss (str, optional): the reconstruction loss. Default: "l2".
         reward_loss (str, optional): the reward loss. Default: "l2".
         free_nats (int, optional): the free nats. Default: 3.
-        delayed_clamp (bool, optional): if True, the KL clamping occurs after
+        delayed_clamp (bool, optional): if ``True``, the KL clamping occurs after
             averaging. If False (default), the kl divergence is clamped to the
             free nats value first and then averaged.
-        global_average (bool, optional): if True, the losses will be averaged
+        global_average (bool, optional): if ``True``, the losses will be averaged
             over all dimensions. Otherwise, a sum will be performed over all
             non-batch/time dimensions and an average over batch and time.
             Default: False.
@@ -43,6 +50,7 @@ class DreamerModelLoss(LossModule):
     def __init__(
         self,
         world_model: TensorDictModule,
+        *,
         lambda_kl: float = 1.0,
         lambda_reco: float = 1.0,
         lambda_reward: float = 1.0,
@@ -129,7 +137,8 @@ def kl_loss(
 class DreamerActorLoss(LossModule):
     """Dreamer Actor Loss.
 
-    Computes the loss of the dreamer actor. The actor loss is computed as the negative average lambda return.
+    Computes the loss of the dreamer actor. The actor loss is computed as the
+    negative average lambda return.
 
     Reference: https://arxiv.org/abs/1912.01603.
 
@@ -138,22 +147,21 @@ class DreamerActorLoss(LossModule):
         value_model (TensorDictModule): the value model.
         model_based_env (DreamerEnv): the model based environment.
         imagination_horizon (int, optional): The number of steps to unroll the
-            model. Default: 15.
-        gamma (float, optional): the gamma discount factor. Default: 0.99.
-        lmbda (float, optional): the lambda discount factor factor. Default: 0.95.
-        discount_loss (bool, optional): if True, the loss is discounted with a
-            gamma discount factor. Default: False.
+            model. Defaults to ``15``.
+        discount_loss (bool, optional): if ``True``, the loss is discounted with a
+            gamma discount factor. Default to ``False``.
 
     """
 
+    default_value_type = ValueFunctions.TDLambda
+
     def __init__(
         self,
         actor_model: TensorDictModule,
         value_model: TensorDictModule,
         model_based_env: DreamerEnv,
+        *,
         imagination_horizon: int = 15,
-        gamma: int = 0.99,
-        lmbda: int = 0.95,
         discount_loss: bool = False,  # for consistency with paper
     ):
         super().__init__()
@@ -161,8 +169,6 @@ def __init__(
         self.value_model = value_model
         self.model_based_env = model_based_env
         self.imagination_horizon = imagination_horizon
-        self.gamma = gamma
-        self.lmbda = lmbda
         self.discount_loss = discount_loss
 
     def forward(self, tensordict: TensorDict) -> Tuple[TensorDict, TensorDict]:
@@ -192,9 +198,8 @@ def forward(self, tensordict: TensorDict) -> Tuple[TensorDict, TensorDict]:
         fake_data.set("lambda_target", lambda_target)
 
         if self.discount_loss:
-            discount = self.gamma * torch.ones_like(
-                lambda_target, device=tensordict.device
-            )
+            gamma = self.value_function.gamma.to(tensordict.device)
+            discount = gamma.expand(lambda_target.shape)
             discount[..., 0, :] = 1
             discount = discount.cumprod(dim=-2)
             actor_loss = -(lambda_target * discount).sum((-2, -1)).mean()
@@ -205,15 +210,58 @@ def forward(self, tensordict: TensorDict) -> Tuple[TensorDict, TensorDict]:
 
     def lambda_target(self, reward: torch.Tensor, value: torch.Tensor) -> torch.Tensor:
         done = torch.zeros(reward.shape, dtype=torch.bool, device=reward.device)
-        return vec_td_lambda_return_estimate(
-            self.gamma, self.lmbda, value, reward, done
+        input_tensordict = TensorDict(
+            {
+                ("next", "reward"): reward,
+                ("next", "state_value"): value,
+                ("next", "done"): done,
+            },
+            [],
         )
+        return self.value_function.value_estimate(input_tensordict)
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        value_net = None
+        value_key = "state_value"
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        if value_type is ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.GAE:
+            self._value_function = GAE(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
 
 
 class DreamerValueLoss(LossModule):
     """Dreamer Value Loss.
 
-    Computes the loss of the dreamer value model. The value loss is computed between the predicted value and the lambda target.
+    Computes the loss of the dreamer value model. The value loss is computed
+    between the predicted value and the lambda target.
 
     Reference: https://arxiv.org/abs/1912.01603.
 
@@ -221,7 +269,7 @@ class DreamerValueLoss(LossModule):
         value_model (TensorDictModule): the value model.
         value_loss (str, optional): the loss to use for the value loss. Default: "l2".
         gamma (float, optional): the gamma discount factor. Default: 0.99.
-        discount_loss (bool, optional): if True, the loss is discounted with a
+        discount_loss (bool, optional): if ``True``, the loss is discounted with a
             gamma discount factor. Default: False.
 
     """
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
index 83408fbd170..f817af1b904 100644
--- a/torchrl/objectives/iql.py
+++ b/torchrl/objectives/iql.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from numbers import Number
 from typing import Optional, Tuple
 
 import torch
@@ -12,10 +11,11 @@
 from torch import Tensor
 
 from torchrl.modules import ProbabilisticActor
-from torchrl.objectives.utils import distance_loss, next_state_value
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
 
-from ..envs.utils import set_exploration_mode, step_mdp
+from ..envs.utils import set_exploration_mode
 from .common import LossModule
+from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -35,14 +35,9 @@ class IQLLoss(LossModule):
     Args:
         actor_network (ProbabilisticActor): stochastic actor
         qvalue_network (TensorDictModule): Q(s, a) parametric model
-        value_network (TensorDictModule, optional): V(s) parametric model. If not
-            provided, the second version of SAC is assumed.
-        qvalue_network_bis (ProbabilisticTDModule, optional): if required, the
-            Q-value can be computed twice independently using two separate
-            networks. The minimum predicted value will then be used for
-            inference.
-        gamma (number, optional): discount for return computation
-            Default is 0.99
+        value_network (TensorDictModule, optional): V(s) parametric model.
+        num_qvalue_nets (integer, optional): number of Q-Value networks used.
+            Defaults to ``2``.
         priority_key (str, optional): tensordict key where to write the
             priority (for prioritized replay buffer usage). Default is
             `"td_error"`.
@@ -57,14 +52,16 @@ class IQLLoss(LossModule):
 
     """
 
+    default_value_type = ValueFunctions.TD0
+
     def __init__(
         self,
         actor_network: ProbabilisticActor,
         qvalue_network: TensorDictModule,
-        value_network: Optional[TensorDictModule] = None,
+        value_network: Optional[TensorDictModule],
+        *,
         num_qvalue_nets: int = 2,
-        gamma: Number = 0.99,
-        priotity_key: str = "td_error",
+        priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         temperature: float = 1.0,
         expectile: float = 0.5,
@@ -106,8 +103,7 @@ def __init__(
             + list(value_network.parameters()),
         )
 
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
+        self.priority_key = priority_key
         self.loss_function = loss_function
 
     @property
@@ -218,26 +214,9 @@ def _loss_qvalue(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         obs_keys = self.actor_network.in_keys
         tensordict = tensordict.select("next", *obs_keys, "action")
 
-        with torch.no_grad():
-            next_td = step_mdp(tensordict).select(
-                *self.actor_network.in_keys
-            )  # next_observation ->
-            # observation
-            # select pseudo-action
-            # get state values
-            next_td = self.value_network(
-                next_td,
-                params=self.value_network_params,
-            )
-
-            state_value = next_td.get("state_value")
-
-        tensordict.set("next.state_value", state_value)
-        target_value = next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=state_value,
-        )
+        target_value = self.value_function.value_estimate(
+            tensordict, target_params=self.target_value_network_params
+        ).squeeze(-1)
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
             tensordict.select(*self.qvalue_network.in_keys),
             self.qvalue_network_params,
@@ -254,3 +233,40 @@ def _loss_qvalue(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
             .mean()
         )
         return loss_qval, td_error.detach().max(0)[0]
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        value_net = self.value_network
+
+        value_key = "state_value"
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        if value_type is ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.GAE:
+            self._value_function = GAE(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
index 0ac84fd5daa..477cf3f5765 100644
--- a/torchrl/objectives/ppo.py
+++ b/torchrl/objectives/ppo.py
@@ -11,21 +11,25 @@
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import distributions as d
 
-from torchrl.objectives.utils import distance_loss
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
 
 from .common import LossModule
+from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class PPOLoss(LossModule):
     """A parent PPO loss class.
 
-    PPO (Proximal Policy Optimisation) is a model-free, online RL algorithm that makes use of a recorded (batch of)
-    trajectories to perform several optimization steps, while actively preventing the updated policy to deviate too
+    PPO (Proximal Policy Optimisation) is a model-free, online RL algorithm
+    that makes use of a recorded (batch of)
+    trajectories to perform several optimization steps, while actively
+    preventing the updated policy to deviate too
     much from its original parameter configuration.
 
-    PPO loss can be found in different flavours, depending on the way the constrained optimisation is implemented:
-        ClipPPOLoss and KLPENPPOLoss.
-    Unlike its subclasses, this class does not implement any regularisation and should therefore be used cautiously.
+    PPO loss can be found in different flavours, depending on the way the
+    constrained optimisation is implemented: ClipPPOLoss and KLPENPPOLoss.
+    Unlike its subclasses, this class does not implement any regularisation
+    and should therefore be used cautiously.
 
     For more details regarding PPO, refer to: "Proximal Policy Optimization Algorithms",
     https://arxiv.org/abs/1707.06347
@@ -33,35 +37,68 @@ class PPOLoss(LossModule):
     Args:
         actor (ProbabilisticTensorDictSequential): policy operator.
         critic (ValueOperator): value operator.
-        advantage_key (str): the input tensordict key where the advantage is expected to be written.
-            default: "advantage"
-        entropy_bonus (bool): if True, an entropy bonus will be added to the loss to favour exploratory policies.
-        samples_mc_entropy (int): if the distribution retrieved from the policy operator does not have a closed form
-            formula for the entropy, a Monte-Carlo estimate will be used. samples_mc_entropy will control how many
+        advantage_key (str): the input tensordict key where the advantage is
+            expected to be written.
+            Defaults to ``"advantage"``.
+        value_target_key (str): the input tensordict key where the target state
+            value is expected to be written. Defaults to ``"value_target"``.
+        entropy_bonus (bool): if ``True``, an entropy bonus will be added to the
+            loss to favour exploratory policies.
+        samples_mc_entropy (int): if the distribution retrieved from the policy
+            operator does not have a closed form
+            formula for the entropy, a Monte-Carlo estimate will be used.
+            ``samples_mc_entropy`` will control how many
             samples will be used to compute this estimate.
-            default: 1
+            Defaults to ``1``.
         entropy_coef (scalar): entropy multiplier when computing the total loss.
-            default: 0.01
-        critic_coef (scalar): critic loss multiplier when computing the total loss.
-            default: 1.0
-        gamma (scalar): a discount factor for return computation.
-        loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
-        normalize_advantage (bool): if True, the advantage will be normalized before being used.
-            Defaults to False.
+            Defaults to ``0.01``.
+        critic_coef (scalar): critic loss multiplier when computing the total
+            loss. Defaults to ``1.0``.
+        loss_critic_type (str): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
+        normalize_advantage (bool): if ``True``, the advantage will be normalized
+            before being used. Defaults to ``False``.
+
+    .. note:
+      The advantage (typically GAE) can be computed by the loss function or
+      in the training loop. The latter option is usually preferred, but this is
+      up to the user to choose which option is to be preferred.
+      If the advantage key (``"advantage`` by default) is not present in the
+      input tensordict, the advantage will be computed by the :meth:`~.forward`
+      method.
+
+        >>> ppo_loss = PPOLoss(actor, critic)
+        >>> advantage = GAE(critic)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
+        >>> # equivalent
+        >>> advantage(data)
+        >>> losses = ppo_loss(data)
+
+      A custom advantage module can be built using :meth:`~.make_value_function`.
+      The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
+      dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
+
+        >>> ppo_loss = PPOLoss(actor, critic)
+        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
 
     """
 
+    default_value_type = ValueFunctions.GAE
+
     def __init__(
         self,
         actor: ProbabilisticTensorDictSequential,
         critic: TensorDictModule,
+        *,
         advantage_key: str = "advantage",
         value_target_key: str = "value_target",
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
         entropy_coef: float = 0.01,
         critic_coef: float = 1.0,
-        gamma: float = 0.99,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
     ):
@@ -82,7 +119,6 @@ def __init__(
         self.register_buffer(
             "critic_coef", torch.tensor(critic_coef, device=self.device)
         )
-        self.register_buffer("gamma", torch.tensor(gamma, device=self.device))
         self.loss_critic_type = loss_critic_type
         self.normalize_advantage = normalize_advantage
 
@@ -117,6 +153,8 @@ def _log_weight(
         return log_weight, dist
 
     def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
+        # TODO: if the advantage is gathered by forward, this introduces an
+        # overhead that we could easily reduce.
         try:
             target_return = tensordict.get(self.value_target_key)
             tensordict_select = tensordict.select(*self.critic.in_keys)
@@ -141,7 +179,14 @@ def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = tensordict.clone(False)
-        advantage = tensordict.get(self.advantage_key)
+        advantage = tensordict.get(self.advantage_key, None)
+        if advantage is None:
+            self.value_function(
+                tensordict,
+                params=self.critic_params,
+                target_params=self.target_critic_params,
+            )
+            advantage = tensordict.get(self.advantage_key)
         if self.normalize_advantage and advantage.numel() > 1:
             loc = advantage.mean().item()
             scale = advantage.std().clamp_min(1e-6).item()
@@ -159,6 +204,29 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             td_out.set("loss_critic", loss_critic.mean())
         return td_out
 
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_value"
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            self._value_function = GAE(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
+
 
 class ClipPPOLoss(PPOLoss):
     """Clipped PPO loss.
@@ -170,22 +238,52 @@ class ClipPPOLoss(PPOLoss):
         actor (ProbabilisticTensorDictSequential): policy operator.
         critic (ValueOperator): value operator.
         advantage_key (str): the input tensordict key where the advantage is expected to be written.
-            default: "advantage"
+            Defaults to ``"advantage"``.
+        value_target_key (str): the input tensordict key where the target state
+            value is expected to be written. Defaults to ``"value_target"``.
         clip_epsilon (scalar): weight clipping threshold in the clipped PPO loss equation.
             default: 0.2
-        entropy_bonus (bool): if True, an entropy bonus will be added to the loss to favour exploratory policies.
-        samples_mc_entropy (int): if the distribution retrieved from the policy operator does not have a closed form
-            formula for the entropy, a Monte-Carlo estimate will be used. samples_mc_entropy will control how many
+        entropy_bonus (bool): if ``True``, an entropy bonus will be added to the
+            loss to favour exploratory policies.
+        samples_mc_entropy (int): if the distribution retrieved from the policy
+            operator does not have a closed form
+            formula for the entropy, a Monte-Carlo estimate will be used.
+            ``samples_mc_entropy`` will control how many
             samples will be used to compute this estimate.
-            default: 1
+            Defaults to ``1``.
         entropy_coef (scalar): entropy multiplier when computing the total loss.
-            default: 0.01
-        critic_coef (scalar): critic loss multiplier when computing the total loss.
-            default: 1.0
-        gamma (scalar): a discount factor for return computation.
-        loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
-        normalize_advantage (bool): if True, the advantage will be normalized before being used.
-            Defaults to True.
+            Defaults to ``0.01``.
+        critic_coef (scalar): critic loss multiplier when computing the total
+            loss. Defaults to ``1.0``.
+        loss_critic_type (str): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
+        normalize_advantage (bool): if ``True``, the advantage will be normalized
+            before being used. Defaults to ``False``.
+
+    .. note:
+      The advantage (typically GAE) can be computed by the loss function or
+      in the training loop. The latter option is usually preferred, but this is
+      up to the user to choose which option is to be preferred.
+      If the advantage key (``"advantage`` by default) is not present in the
+      input tensordict, the advantage will be computed by the :meth:`~.forward`
+      method.
+
+        >>> ppo_loss = ClipPPOLoss(actor, critic)
+        >>> advantage = GAE(critic)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
+        >>> # equivalent
+        >>> advantage(data)
+        >>> losses = ppo_loss(data)
+
+      A custom advantage module can be built using :meth:`~.make_value_function`.
+      The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
+      dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
+
+        >>> ppo_loss = ClipPPOLoss(actor, critic)
+        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
 
     """
 
@@ -193,13 +291,13 @@ def __init__(
         self,
         actor: ProbabilisticTensorDictSequential,
         critic: TensorDictModule,
+        *,
         advantage_key: str = "advantage",
         clip_epsilon: float = 0.2,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
         entropy_coef: float = 0.01,
         critic_coef: float = 1.0,
-        gamma: float = 0.99,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = True,
         **kwargs,
@@ -207,12 +305,11 @@ def __init__(
         super(ClipPPOLoss, self).__init__(
             actor,
             critic,
-            advantage_key,
+            advantage_key=advantage_key,
             entropy_bonus=entropy_bonus,
             samples_mc_entropy=samples_mc_entropy,
             entropy_coef=entropy_coef,
             critic_coef=critic_coef,
-            gamma=gamma,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
             **kwargs,
@@ -228,7 +325,14 @@ def _clip_bounds(self):
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = tensordict.clone(False)
-        advantage = tensordict.get(self.advantage_key)
+        advantage = tensordict.get(self.advantage_key, None)
+        if advantage is None:
+            self.value_function(
+                tensordict,
+                params=self.critic_params,
+                target_params=self.target_critic_params,
+            )
+            advantage = tensordict.get(self.advantage_key)
         log_weight, dist = self._log_weight(tensordict)
         # ESS for logging
         with torch.no_grad():
@@ -278,28 +382,61 @@ class KLPENPPOLoss(PPOLoss):
     Args:
         actor (ProbabilisticTensorDictSequential): policy operator.
         critic (ValueOperator): value operator.
-        advantage_key (str): the input tensordict key where the advantage is expected to be written.
-            default: "advantage"
-        dtarg (scalar): target KL divergence.
-        beta (scalar): initial KL divergence multiplier.
-            default: 1.0
-        increment (scalar): how much beta should be incremented if KL > dtarg. Valid range: increment >= 1.0
-            default: 2.0
-        decrement (scalar): how much beta should be decremented if KL < dtarg. Valid range: decrement <= 1.0
-            default: 0.5
-        entropy_bonus (bool): if True, an entropy bonus will be added to the loss to favour exploratory policies.
-        samples_mc_entropy (int): if the distribution retrieved from the policy operator does not have a closed form
-            formula for the entropy, a Monte-Carlo estimate will be used. samples_mc_entropy will control how many
+        advantage_key (str, optional): the input tensordict key where the advantage is expected to be written.
+            Defaults to ``"advantage"``.
+        value_target_key (str, optional): the input tensordict key where the target state
+            value is expected to be written. Defaults to ``"value_target"``.
+        dtarg (scalar, optional): target KL divergence. Defaults to ``0.01``.
+        samples_mc_kl (int, optional): number of samples used to compute the KL divergence
+            if no analytical formula can be found. Defaults to ``1``.
+        beta (scalar, optional): initial KL divergence multiplier.
+            Defaults to ``1.0``.
+        decrement (scalar, optional): how much beta should be decremented if KL < dtarg. Valid range: decrement <= 1.0
+            default: ``0.5``.
+        increment (scalar, optional): how much beta should be incremented if KL > dtarg. Valid range: increment >= 1.0
+            default: ``2.0``.
+        entropy_bonus (bool, optional): if ``True``, an entropy bonus will be added to the
+            loss to favour exploratory policies. Defaults to ``True``.
+        samples_mc_entropy (int, optional): if the distribution retrieved from the policy
+            operator does not have a closed form
+            formula for the entropy, a Monte-Carlo estimate will be used.
+            ``samples_mc_entropy`` will control how many
             samples will be used to compute this estimate.
-            default: 1
-        entropy_coef (scalar): entropy multiplier when computing the total loss.
-            default: 0.01
-        critic_coef (scalar): critic loss multiplier when computing the total loss.
-            default: 1.0
-        gamma (scalar): a discount factor for return computation.
-        loss_critic_type (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
-        normalize_advantage (bool): if True, the advantage will be normalized before being used.
-            Defaults to True.
+            Defaults to ``1``.
+        entropy_coef (scalar, optional): entropy multiplier when computing the total loss.
+            Defaults to ``0.01``.
+        critic_coef (scalar, optional): critic loss multiplier when computing the total
+            loss. Defaults to ``1.0``.
+        loss_critic_type (str, optional): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
+        normalize_advantage (bool, optional): if ``True``, the advantage will be normalized
+            before being used. Defaults to ``False``.
+
+
+    .. note:
+      The advantage (typically GAE) can be computed by the loss function or
+      in the training loop. The latter option is usually preferred, but this is
+      up to the user to choose which option is to be preferred.
+      If the advantage key (``"advantage`` by default) is not present in the
+      input tensordict, the advantage will be computed by the :meth:`~.forward`
+      method.
+
+        >>> ppo_loss = KLPENPPOLoss(actor, critic)
+        >>> advantage = GAE(critic)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
+        >>> # equivalent
+        >>> advantage(data)
+        >>> losses = ppo_loss(data)
+
+      A custom advantage module can be built using :meth:`~.make_value_function`.
+      The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
+      dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
+
+        >>> ppo_loss = KLPENPPOLoss(actor, critic)
+        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> data = next(datacollector)
+        >>> losses = ppo_loss(data)
 
     """
 
@@ -307,6 +444,7 @@ def __init__(
         self,
         actor: ProbabilisticTensorDictSequential,
         critic: TensorDictModule,
+        *,
         advantage_key="advantage",
         dtarg: float = 0.01,
         beta: float = 1.0,
@@ -317,7 +455,6 @@ def __init__(
         samples_mc_entropy: int = 1,
         entropy_coef: float = 0.01,
         critic_coef: float = 1.0,
-        gamma: float = 0.99,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = True,
         **kwargs,
@@ -325,12 +462,11 @@ def __init__(
         super(KLPENPPOLoss, self).__init__(
             actor,
             critic,
-            advantage_key,
+            advantage_key=advantage_key,
             entropy_bonus=entropy_bonus,
             samples_mc_entropy=samples_mc_entropy,
             entropy_coef=entropy_coef,
             critic_coef=critic_coef,
-            gamma=gamma,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
             **kwargs,
@@ -354,7 +490,14 @@ def __init__(
 
     def forward(self, tensordict: TensorDictBase) -> TensorDict:
         tensordict = tensordict.clone(False)
-        advantage = tensordict.get(self.advantage_key)
+        advantage = tensordict.get(self.advantage_key, None)
+        if advantage is None:
+            self.value_function(
+                tensordict,
+                params=self.critic_params,
+                target_params=self.target_critic_params,
+            )
+            advantage = tensordict.get(self.advantage_key)
         if self.normalize_advantage and advantage.numel() > 1:
             loc = advantage.mean().item()
             scale = advantage.std().clamp_min(1e-6).item()
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
index fd039b555c4..a48e0d78580 100644
--- a/torchrl/objectives/redq.py
+++ b/torchrl/objectives/redq.py
@@ -16,10 +16,8 @@
 
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import (
-    distance_loss,
-    next_state_value as get_next_state_value,
-)
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -40,40 +38,49 @@ class REDQLoss(LossModule):
 
     Args:
         actor_network (TensorDictModule): the actor to be trained
-        qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
-        num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        sub_sample_len (int, optional): number of Q-value networks to be subsampled to evaluate the next state value
-            Default is 2.
-        gamma (Number, optional): gamma decay factor. Default is 0.99.
-        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
-            `"td_error"`.
-        loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
-            "l1", Default is "smooth_l1".
+        qvalue_network (TensorDictModule): a single Q-value network that will
+            be multiplicated as many times as needed.
+        num_qvalue_nets (int, optional): Number of Q-value networks to be trained.
+            Default is ``10``.
+        sub_sample_len (int, optional): number of Q-value networks to be
+            subsampled to evaluate the next state value
+            Default is ``2``.
+        priority_key (str, optional): Key where to write the priority value
+            for prioritized replay buffers. Default is
+            ``"td_error"``.
+        loss_function (str, optional): loss function to be used for the Q-value.
+            Can be one of  ``"smooth_l1"``, ``"l2"``,
+            ``"l1"``, Default is ``"smooth_l1"``.
         alpha_init (float, optional): initial entropy multiplier.
-            Default is 1.0.
+            Default is ``1.0``.
         min_alpha (float, optional): min value of alpha.
-            Default is 0.1.
+            Default is ``0.1``.
         max_alpha (float, optional): max value of alpha.
-            Default is 10.0.
-        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is :obj:`False`.
-        target_entropy (Union[str, Number], optional): Target entropy for the stochastic policy. Default is "auto".
-        delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
-            for data collection. Default is :obj:`False`.
-        gSDE (bool, optional): Knowing if gSDE is used is necessary to create random noise variables.
-            Default is False
+            Default is ``10.0``.
+        fixed_alpha (bool, optional): whether alpha should be trained to match
+            a target entropy. Default is ``False``.
+        target_entropy (Union[str, Number], optional): Target entropy for the
+            stochastic policy. Default is "auto".
+        delay_qvalue (bool, optional): Whether to separate the target Q value
+            networks from the Q value networks used
+            for data collection. Default is ``False``.
+        gSDE (bool, optional): Knowing if gSDE is used is necessary to create
+            random noise variables.
+            Default is ``False``.
 
     """
 
     delay_actor: bool = False
+    default_value_type = ValueFunctions.TD0
 
     def __init__(
         self,
         actor_network: TensorDictModule,
         qvalue_network: TensorDictModule,
+        *,
         num_qvalue_nets: int = 10,
         sub_sample_len: int = 2,
-        gamma: Number = 0.99,
-        priotity_key: str = "td_error",
+        priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         alpha_init: float = 1.0,
         min_alpha: float = 0.1,
@@ -107,8 +114,7 @@ def __init__(
         )
         self.num_qvalue_nets = num_qvalue_nets
         self.sub_sample_len = max(1, min(sub_sample_len, num_qvalue_nets - 1))
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
+        self.priority_key = priority_key
         self.loss_function = loss_function
 
         try:
@@ -156,7 +162,7 @@ def alpha(self):
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         obs_keys = self.actor_network.in_keys
-        tensordict_select = tensordict.select("next", *obs_keys, "action")
+        tensordict_select = tensordict.clone(False).select("next", *obs_keys, "action")
         selected_models_idx = torch.randperm(self.num_qvalue_nets)[
             : self.sub_sample_len
         ].sort()[0]
@@ -259,11 +265,9 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         )
         next_state_value = next_state_value.min(0)[0]
 
-        target_value = get_next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=next_state_value,
-        )
+        tensordict_select.set(("next", "state_value"), next_state_value.unsqueeze(-1))
+        target_value = self.value_function.value_estimate(tensordict_select).squeeze(-1)
+
         pred_val = state_action_value_qvalue
         td_error = (pred_val - target_value).pow(2)
         loss_qval = distance_loss(
@@ -308,3 +312,25 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             # placeholder
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_value"
+        # we do not need a value network bc the next state value is already passed
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            self._value_function = GAE(value_network=None, value_key=value_key, **hp)
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/reinforce.py b/torchrl/objectives/reinforce.py
index c5b265d4e77..1079c4eb2be 100644
--- a/torchrl/objectives/reinforce.py
+++ b/torchrl/objectives/reinforce.py
@@ -10,7 +10,8 @@
 from tensordict.nn import ProbabilisticTensorDictSequential, TensorDictModule
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import distance_loss
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class ReinforceLoss(LossModule):
@@ -19,14 +20,55 @@ class ReinforceLoss(LossModule):
     Presented in "Simple statistical gradient-following algorithms for connectionist reinforcement learning", Williams, 1992
     https://doi.org/10.1007/BF00992696
 
+
+    Args:
+        actor (ProbabilisticTensorDictSequential): policy operator.
+        critic (ValueOperator): value operator.
+        delay_value (bool, optional): if ``True``, a target network is needed
+            for the critic. Defaults to ``False``.
+        advantage_key (str): the input tensordict key where the advantage is
+            expected to be written.
+            Defaults to ``"advantage"``.
+        value_target_key (str): the input tensordict key where the target state
+            value is expected to be written. Defaults to ``"value_target"``.
+        loss_critic_type (str): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1". Defaults to ``"smooth_l1"``.
+
+    .. note:
+      The advantage (typically GAE) can be computed by the loss function or
+      in the training loop. The latter option is usually preferred, but this is
+      up to the user to choose which option is to be preferred.
+      If the advantage key (``"advantage`` by default) is not present in the
+      input tensordict, the advantage will be computed by the :meth:`~.forward`
+      method.
+
+        >>> reinforce_loss = ReinforceLoss(actor, critic)
+        >>> advantage = GAE(critic)
+        >>> data = next(datacollector)
+        >>> losses = reinforce_loss(data)
+        >>> # equivalent
+        >>> advantage(data)
+        >>> losses = reinforce_loss(data)
+
+      A custom advantage module can be built using :meth:`~.make_value_function`.
+      The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
+      dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
+
+        >>> reinforce_loss = ReinforceLoss(actor, critic)
+        >>> reinforce_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> data = next(datacollector)
+        >>> losses = reinforce_loss(data)
+
     """
 
+    default_value_type = ValueFunctions.GAE
+
     def __init__(
         self,
-        actor_network: ProbabilisticTensorDictSequential,
+        actor: ProbabilisticTensorDictSequential,
         critic: Optional[TensorDictModule] = None,
+        *,
         delay_value: bool = False,
-        gamma: float = 0.99,
         advantage_key: str = "advantage",
         value_target_key: str = "value_target",
         loss_critic_type: str = "smooth_l1",
@@ -37,11 +79,10 @@ def __init__(
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
         self.loss_critic_type = loss_critic_type
-        self.register_buffer("gamma", torch.tensor(gamma))
 
         # Actor
         self.convert_to_functional(
-            actor_network,
+            actor,
             "actor_network",
             create_target_params=False,
         )
@@ -52,11 +93,18 @@ def __init__(
                 critic,
                 "critic",
                 create_target_params=self.delay_value,
-                compare_against=list(actor_network.parameters()),
+                compare_against=list(actor.parameters()),
             )
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        advantage = tensordict.get(self.advantage_key)
+        advantage = tensordict.get(self.advantage_key, None)
+        if advantage is None:
+            self.value_function(
+                tensordict,
+                params=self.critic_params,
+                target_params=self.target_critic_params,
+            )
+            advantage = tensordict.get(self.advantage_key)
 
         # compute log-prob
         tensordict = self.actor_network(
@@ -95,3 +143,26 @@ def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
                 f"can be used for the value loss."
             )
         return loss_value
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_value"
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            self._value_function = GAE(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=self.critic, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 4045dcfc119..5a8bdb25aef 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -9,18 +9,13 @@
 
 import numpy as np
 import torch
-from tensordict.nn import make_functional, TensorDictModule, TensorDictSequential
+from tensordict.nn import make_functional, TensorDictModule
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import Tensor
 
 from torchrl.modules import ProbabilisticActor
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import (
-    default_value_kwargs,
-    distance_loss,
-    next_state_value,
-    ValueFunctions,
-)
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
@@ -36,53 +31,6 @@
     FUNCTORCH_ERROR = err
 
 
-class _SACValueNet(TensorDictSequential):
-    r"""Value network for SAC v2.
-
-    SAC v2 is based on a value estimate of the form:
-
-    .. math::
-
-      V = Q(s,a) - \alpha * \log p(a | s)
-
-    This class computes this value given the actor and qvalue network
-
-    """
-
-    def __init__(self, actor_network, qvalue_network):
-        super().__init__(actor_network, qvalue_network)
-        # we highjack the forward so the out_keys must be re-written
-        self.out_keys = ["state_value"]
-
-    def forward(self, tensordict, _alpha, actor_params, qval_params):
-        """Computes the value as `val = qval - a * log_prob(a)`."""
-        actor_network, qvalue_network = self
-
-        obs_keys = actor_network.in_keys
-        data = tensordict.select(*obs_keys)
-        # get actions and log-probs
-        with torch.no_grad():
-            with set_exploration_mode("random"):
-                dist = actor_network.get_dist(data, params=actor_params)
-                data.set("action", dist.rsample())
-                log_prob = dist.log_prob(data.get("action"))
-                data.set("sample_log_prob", log_prob)
-            sample_log_prob = data.get("sample_log_prob")
-
-            # get q-values
-            data = vmap(qvalue_network, (None, 0))(data, qval_params)
-            state_action_value = data.get("state_action_value")
-            if (
-                state_action_value.shape[-len(sample_log_prob.shape) :]
-                != sample_log_prob.shape
-            ):
-                sample_log_prob = sample_log_prob.unsqueeze(-1)
-            state_value = state_action_value - _alpha * sample_log_prob
-            state_value = state_value.min(0)[0]
-        tensordict.set("state_value", state_value)
-        return tensordict
-
-
 class SACLoss(LossModule):
     """TorchRL implementation of the SAC loss.
 
@@ -99,6 +47,8 @@ class SACLoss(LossModule):
             .. note::
               If not provided, the second version of SAC is assumed, where
               only the Q-Value network is needed.
+        num_qvalue_nets (integer, optional): number of Q-Value networks used.
+            Defaults to ``2``.
         priority_key (str, optional): tensordict key where to write the
             priority (for prioritized replay buffer usage). Defaults to
             ``"td_error"``.
@@ -110,29 +60,32 @@ class SACLoss(LossModule):
             Default is 0.1.
         max_alpha (float, optional): max value of alpha.
             Default is 10.0.
-        fixed_alpha (bool, optional): if True, alpha will be fixed to its
+        fixed_alpha (bool, optional): if ``True``, alpha will be fixed to its
             initial value. Otherwise, alpha will be optimized to
             match the 'target_entropy' value.
-            Default is :obj:`False`.
+            Default is ``False``.
         target_entropy (float or str, optional): Target entropy for the
             stochastic policy. Default is "auto", where target entropy is
             computed as :obj:`-prod(n_actions)`.
         delay_actor (bool, optional): Whether to separate the target actor
             networks from the actor networks used for data collection.
-            Default is :obj:`False`.
+            Default is ``False``.
         delay_qvalue (bool, optional): Whether to separate the target Q value
             networks from the Q value networks used for data collection.
-            Default is :obj:`False`.
+            Default is ``False``.
         delay_value (bool, optional): Whether to separate the target value
             networks from the value networks used for data collection.
-            Default is :obj:`False`.
+            Default is ``False``.
     """
 
+    default_value_type = ValueFunctions.TD0
+
     def __init__(
         self,
         actor_network: ProbabilisticActor,
         qvalue_network: TensorDictModule,
         value_network: Optional[TensorDictModule] = None,
+        *,
         num_qvalue_nets: int = 2,
         priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
@@ -231,7 +184,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         if self._version == 1:
             value_net = self.actor_critic
         elif self._version == 2:
-            value_net = _SACValueNet(self.actor_network, self.qvalue_network)
+            # we will take care of computing the next value inside this module
+            value_net = None
         else:
             # unreachable
             raise NotImplementedError
@@ -254,11 +208,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_key=value_key,
             )
         elif value_type is ValueFunctions.GAE:
-            self._value_function = GAE(
-                **hp,
-                value_network=value_net,
-                value_target_key="value_target",
-                value_key=value_key,
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
@@ -270,10 +221,6 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
 
-    def _default_value_function(self):
-        # TD0 by default, as in paper
-        self.make_value_function(ValueFunctions.TD0)
-
     @property
     def device(self) -> torch.device:
         for p in self.parameters():
@@ -393,14 +340,57 @@ def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
 
         return loss_value, priority_value
 
+    def _get_value_v2(self, tensordict, _alpha, actor_params, qval_params):
+        r"""Value network for SAC v2.
+
+        SAC v2 is based on a value estimate of the form:
+
+        .. math::
+
+          V = Q(s,a) - \alpha * \log p(a | s)
+
+        This class computes this value given the actor and qvalue network
+
+        """
+        tensordict = tensordict.clone(False)
+        # get actions and log-probs
+        with torch.no_grad():
+            with set_exploration_mode("random"):
+                dist = self.actor_network.get_dist(tensordict, params=actor_params)
+                tensordict.set("action", dist.rsample())
+                log_prob = dist.log_prob(tensordict.get("action"))
+                tensordict.set("sample_log_prob", log_prob)
+            sample_log_prob = tensordict.get("sample_log_prob")
+
+            # get q-values
+            tensordict_expand = vmap(self.qvalue_network, (None, 0))(
+                tensordict, qval_params
+            )
+            state_action_value = tensordict_expand.get("state_action_value")
+            if (
+                state_action_value.shape[-len(sample_log_prob.shape) :]
+                != sample_log_prob.shape
+            ):
+                sample_log_prob = sample_log_prob.unsqueeze(-1)
+            state_value = state_action_value - _alpha * sample_log_prob
+            state_value = state_value.min(0)[0]
+            tensordict.set(("next", self.value_function.value_key), state_value)
+            target_value = self.value_function.value_estimate(
+                tensordict,
+                _alpha=self._alpha,
+                actor_params=self.target_actor_network_params,
+                qval_params=self.target_qvalue_network_params,
+            ).squeeze(-1)
+            return target_value
+
     def _loss_qvalue_v2(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         # we pass the alpha value to the tensordict. Since it's a scalar, we must erase the batch-size first.
-        target_value = self.value_function.value_estimate(
+        target_value = self._get_value_v2(
             tensordict,
-            _alpha=self._alpha,
-            actor_params=self.target_actor_network_params,
-            qval_params=self.target_qvalue_network_params,
-        ).squeeze(-1)
+            self._alpha,
+            self.target_actor_network_params,
+            self.target_qvalue_network_params,
+        )
 
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
             tensordict.select(*self.qvalue_network.in_keys),
@@ -476,9 +466,9 @@ class DiscreteSACLoss(LossModule):
     Args:
         actor_network (ProbabilisticActor): the actor to be trained
         qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
+        num_actions (int): number of actions in the action space.
         num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        gamma (Number, optional): gamma decay factor. Default is 0.99.
-        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
+        priority_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
             `"td_error"`.
         loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
             "l1", Default is "smooth_l1".
@@ -488,13 +478,15 @@ class DiscreteSACLoss(LossModule):
             Default is 0.1.
         max_alpha (float, optional): max value of alpha.
             Default is 10.0.
-        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is :obj:`False`.
+        fixed_alpha (bool, optional): whether alpha should be trained to match a target entropy. Default is ``False``.
         target_entropy_weight (float, optional): weight for the target entropy term.
         target_entropy (Union[str, Number], optional): Target entropy for the stochastic policy. Default is "auto".
         delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
-            for data collection. Default is :obj:`False`.
+            for data collection. Default is ``False``.
+
     """
 
+    default_value_type = ValueFunctions.TD0
     delay_actor: bool = False
 
     def __init__(
@@ -502,9 +494,9 @@ def __init__(
         actor_network: ProbabilisticActor,
         qvalue_network: TensorDictModule,
         num_actions: int,
+        *,
         num_qvalue_nets: int = 2,
-        gamma: Number = 0.99,
-        priotity_key: str = "td_error",
+        priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         alpha_init: float = 1.0,
         min_alpha: float = 0.1,
@@ -533,8 +525,7 @@ def __init__(
             compare_against=list(actor_network.parameters()),
         )
         self.num_qvalue_nets = num_qvalue_nets
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
+        self.priority_key = priority_key
         self.loss_function = loss_function
 
         try:
@@ -575,7 +566,7 @@ def alpha(self):
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         obs_keys = self.actor_network.in_keys
-        tensordict_select = tensordict.select("next", *obs_keys, "action")
+        tensordict_select = tensordict.clone(False).select("next", *obs_keys, "action")
 
         actor_params = torch.stack(
             [self.actor_network_params, self.target_actor_network_params], 0
@@ -668,11 +659,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             * (next_state_action_value_qvalue.min(0)[0] - self.alpha * logp_pi[1])
         ).sum(dim=-1, keepdim=True)
 
-        target_value = next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=pred_next_val,
-        )
+        tensordict_select.set(("next", self.value_function.value_key), pred_next_val)
+        target_value = self.value_function.value_estimate(tensordict_select).squeeze(-1)
 
         actions = torch.argmax(tensordict_select["action"], dim=-1)
 
@@ -731,3 +719,36 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             # placeholder
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        value_net = None
+        value_key = "state_value"
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        if value_type is ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        elif value_type is ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
+        elif value_type is ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                **hp,
+                value_network=value_net,
+                value_target_key="value_target",
+                value_key=value_key,
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index 562e2f711ba..61dcdbbfcb1 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from numbers import Number
 
 import torch
 from tensordict.nn import TensorDictModule
@@ -12,10 +11,8 @@
 
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import (
-    distance_loss,
-    next_state_value as get_next_state_value,
-)
+from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -34,29 +31,29 @@ class TD3Loss(LossModule):
         actor_network (TensorDictModule): the actor to be trained
         qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
         num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        gamma (Number, optional): gamma decay factor. Default is 0.99.
-        max_action (float, optional): Maximum action, in MuJoCo environments typically 1.0.
         policy_noise (float, optional): Standard deviation for the target policy action noise. Default is 0.2.
         noise_clip (float, optional): Clipping range value for the sampled target policy action noise. Default is 0.5.
-        priotity_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
+        priority_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
             `"td_error"`.
         loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
             "l1", Default is "smooth_l1".
         delay_actor (bool, optional): whether to separate the target actor networks from the actor networks used for
-            data collection. Default is :obj:`False`.
+            data collection. Default is ``False``.
         delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
-            for data collection. Default is :obj:`False`.
+            for data collection. Default is ``False``.
     """
 
+    default_value_type = ValueFunctions.TD0
+
     def __init__(
         self,
         actor_network: TensorDictModule,
         qvalue_network: TensorDictModule,
+        *,
         num_qvalue_nets: int = 2,
-        gamma: Number = 0.99,
         policy_noise: float = 0.2,
         noise_clip: float = 0.5,
-        priotity_key: str = "td_error",
+        priority_key: str = "td_error",
         loss_function: str = "smooth_l1",
         delay_actor: bool = False,
         delay_qvalue: bool = False,
@@ -86,8 +83,7 @@ def __init__(
         )
 
         self.num_qvalue_nets = num_qvalue_nets
-        self.register_buffer("gamma", torch.tensor(gamma))
-        self.priority_key = priotity_key
+        self.priority_key = priority_key
         self.loss_function = loss_function
         self.policy_noise = policy_noise
         self.noise_clip = noise_clip
@@ -95,16 +91,17 @@ def __init__(
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         obs_keys = self.actor_network.in_keys
-        tensordict_select = tensordict.select("next", *obs_keys, "action")
+        tensordict_save = tensordict
+        tensordict = tensordict.clone(False)
 
         actor_params = torch.stack(
             [self.actor_network_params, self.target_actor_network_params], 0
         )
 
-        tensordict_actor_grad = tensordict_select.select(
+        tensordict_actor_grad = tensordict.select(
             *obs_keys
         )  # to avoid overwriting keys
-        next_td_actor = step_mdp(tensordict_select).select(
+        next_td_actor = step_mdp(tensordict).select(
             *self.actor_network.in_keys
         )  # next_observation ->
         tensordict_actor = torch.stack([tensordict_actor_grad, next_td_actor], 0)
@@ -134,9 +131,9 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             .select(*self.qvalue_network.in_keys)
             .expand(self.num_qvalue_nets, *tensordict_actor[0].batch_size)
         )  # for actor loss
-        _qval_td = tensordict_select.select(*self.qvalue_network.in_keys).expand(
+        _qval_td = tensordict.select(*self.qvalue_network.in_keys).expand(
             self.num_qvalue_nets,
-            *tensordict_select.select(*self.qvalue_network.in_keys).batch_size,
+            *tensordict.select(*self.qvalue_network.in_keys).batch_size,
         )  # for qvalue loss
         _next_val_td = (
             tensordict_actor[1]
@@ -180,12 +177,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         loss_actor = -(state_action_value_actor.min(0)[0]).mean()
 
         next_state_value = next_state_action_value_qvalue.min(0)[0]
-
-        target_value = get_next_state_value(
-            tensordict,
-            gamma=self.gamma,
-            pred_next_val=next_state_value,
-        )
+        tensordict.set(("next", "state_action_value"), next_state_value.unsqueeze(-1))
+        target_value = self.value_function.value_estimate(tensordict).squeeze(-1)
         pred_val = state_action_value_qvalue
         td_error = (pred_val - target_value).pow(2)
         loss_qval = (
@@ -199,7 +192,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             * 0.5
         )
 
-        tensordict.set("td_error", td_error.detach().max(0)[0])
+        tensordict_save.set("td_error", td_error.detach().max(0)[0])
 
         if not loss_qval.shape == loss_actor.shape:
             raise RuntimeError(
@@ -218,3 +211,27 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         )
 
         return td_out
+
+    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        hp.update(hyperparams)
+        value_key = "state_action_value"
+        # we do not need a value network bc the next state value is already passed
+        if value_type == ValueFunctions.TD1:
+            self._value_function = TD1Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.TD0:
+            self._value_function = TD0Estimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        elif value_type == ValueFunctions.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
+        elif value_type == ValueFunctions.TDLambda:
+            self._value_function = TDLambdaEstimate(
+                value_network=None, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 17a9c35c90d..6695fbc2488 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -82,6 +82,9 @@ def value_estimate(
     ):
         """Gets a value estimate, usually used as a target value for the value network.
 
+        If the state value key is present under ``tensordict.get(("next", self.value_key))``
+        then this value will be used without recurring to the value network.
+
         Args:
             tensordict (TensorDictBase): the tensordict containing the data to
                 read.
@@ -89,6 +92,8 @@ def value_estimate(
                 target params to be passed to the functional value network module.
             **kwargs: the keyword arguments to be passed to the value network.
 
+        Returns: a tensor corresponding to the state value.
+
         """
         raise NotImplementedError
 
@@ -99,17 +104,24 @@ def is_functional(self):
         else:
             raise RuntimeError("Cannot determine if value network is functional.")
 
+    @property
+    def is_stateless(self):
+        if not self.is_functional:
+            return False
+        return self.value_network._is_stateless
+
 
 class TD0Estimate(ValueFunctionBase):
     """Myopic Temporal Difference (TD(0)) estimate of advantage function.
 
     Args:
         gamma (scalar): exponential mean discount.
-        value_network (TensorDictModule): value operator used to retrieve the value estimates.
-        average_rewards (bool, optional): if True, rewards will be standardized
+        value_network (TensorDictModule): value operator used to retrieve
+            the value estimates.
+        average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if True, gradients are propagated throught
-            the computation of the value function. Default is :obj:`False`.
+        differentiable (bool, optional): if ``True``, gradients are propagated throught
+            the computation of the value function. Default is ``False``.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -121,6 +133,7 @@ class TD0Estimate(ValueFunctionBase):
 
     def __init__(
         self,
+        *,
         gamma: Union[float, torch.Tensor],
         value_network: TensorDictModule,
         average_rewards: bool = False,
@@ -132,7 +145,7 @@ def __init__(
         super().__init__()
         try:
             device = next(value_network.parameters()).device
-        except StopIteration:
+        except (AttributeError, StopIteration):
             device = torch.device("cpu")
         self.register_buffer("gamma", torch.tensor(gamma, device=device))
         self.value_network = value_network
@@ -151,11 +164,16 @@ def __init__(
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
 
-        self.in_keys = (
-            value_network.in_keys
-            + [("next", "reward"), ("next", "done")]
-            + [("next", in_key) for in_key in value_network.in_keys]
-        )
+        try:
+            self.in_keys = (
+                value_network.in_keys
+                + [("next", "reward"), ("next", "done")]
+                + [("next", in_key) for in_key in value_network.in_keys]
+            )
+        except AttributeError:
+            # value network does not have an `in_keys` attribute
+            pass
+
         self.out_keys = [self.advantage_key, self.value_target_key]
 
     @_self_set_grad_enabled
@@ -226,7 +244,7 @@ def forward(
             )
 
         kwargs = {}
-        if self.is_functional and params is None:
+        if self.is_stateless and params is None:
             raise RuntimeError(
                 "Expected params to be passed to advantage module but got none."
             )
@@ -249,8 +267,9 @@ def value_estimate(
         target_params: Optional[TensorDictBase] = None,
         **kwargs,
     ):
-        gamma = self.gamma
         reward = tensordict.get(("next", "reward"))
+        device = reward.device
+        gamma = self.gamma.to(device)
         steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
         if steps_to_next_obs is not None:
             gamma = gamma ** steps_to_next_obs.view_as(reward)
@@ -262,12 +281,12 @@ def value_estimate(
                 ("next", "reward"), reward
             )  # we must update the rewards if they are used later in the code
         step_td = step_mdp(tensordict)
-        if target_params is not None:
-            # we assume that target parameters are not differentiable
-            kwargs["params"] = target_params
-        with hold_out_net(self.value_network):
-            self.value_network(step_td, **kwargs)
-            next_value = step_td.get(self.value_key)
+        if self.value_key not in step_td.keys():
+            if target_params is not None:
+                kwargs["params"] = target_params
+            with hold_out_net(self.value_network):
+                self.value_network(step_td, **kwargs)
+        next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
         value_target = reward + gamma * (1 - done.to(reward.dtype)) * next_value
@@ -280,10 +299,10 @@ class TD1Estimate(ValueFunctionBase):
     Args:
         gamma (scalar): exponential mean discount.
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
-        average_rewards (bool, optional): if True, rewards will be standardized
+        average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if True, gradients are propagated throught
-            the computation of the value function. Default is :obj:`False`.
+        differentiable (bool, optional): if ``True``, gradients are propagated throught
+            the computation of the value function. Default is ``False``.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -295,6 +314,7 @@ class TD1Estimate(ValueFunctionBase):
 
     def __init__(
         self,
+        *,
         gamma: Union[float, torch.Tensor],
         value_network: TensorDictModule,
         average_rewards: bool = False,
@@ -306,7 +326,7 @@ def __init__(
         super().__init__()
         try:
             device = next(value_network.parameters()).device
-        except StopIteration:
+        except (AttributeError, StopIteration):
             device = torch.device("cpu")
         self.register_buffer("gamma", torch.tensor(gamma, device=device))
         self.value_network = value_network
@@ -325,11 +345,15 @@ def __init__(
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
 
-        self.in_keys = (
-            value_network.in_keys
-            + [("next", "reward"), ("next", "done")]
-            + [("next", in_key) for in_key in value_network.in_keys]
-        )
+        try:
+            self.in_keys = (
+                value_network.in_keys
+                + [("next", "reward"), ("next", "done")]
+                + [("next", in_key) for in_key in value_network.in_keys]
+            )
+        except AttributeError:
+            # value network does not have an `in_keys` attribute
+            pass
         self.out_keys = [self.advantage_key, self.value_target_key]
 
     @_self_set_grad_enabled
@@ -400,7 +424,7 @@ def forward(
             )
 
         kwargs = {}
-        if self.is_functional and params is None:
+        if self.is_stateless and params is None:
             raise RuntimeError(
                 "Expected params to be passed to advantage module but got none."
             )
@@ -423,8 +447,9 @@ def value_estimate(
         target_params: Optional[TensorDictBase] = None,
         **kwargs,
     ):
-        gamma = self.gamma
         reward = tensordict.get(("next", "reward"))
+        device = reward.device
+        gamma = self.gamma.to(device)
         steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
         if steps_to_next_obs is not None:
             gamma = gamma ** steps_to_next_obs.view_as(reward)
@@ -436,12 +461,12 @@ def value_estimate(
                 ("next", "reward"), reward
             )  # we must update the rewards if they are used later in the code
         step_td = step_mdp(tensordict)
-        if target_params is not None:
-            # we assume that target parameters are not differentiable
-            kwargs["params"] = target_params
-        with hold_out_net(self.value_network):
-            self.value_network(step_td, **kwargs)
-            next_value = step_td.get(self.value_key)
+        if self.value_key not in step_td.keys():
+            if target_params is not None:
+                kwargs["params"] = target_params
+            with hold_out_net(self.value_network):
+                self.value_network(step_td, **kwargs)
+        next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
         value_target = td_advantage_estimate(
@@ -457,10 +482,10 @@ class TDLambdaEstimate(ValueFunctionBase):
         gamma (scalar): exponential mean discount.
         lmbda (scalar): trajectory discount.
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
-        average_rewards (bool, optional): if True, rewards will be standardized
+        average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if True, gradients are propagated throught
-            the computation of the value function. Default is :obj:`False`.
+        differentiable (bool, optional): if ``True``, gradients are propagated throught
+            the computation of the value function. Default is ``False``.
         vectorized (bool, optional): whether to use the vectorized version of the
             lambda return. Default is `True`.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
@@ -474,6 +499,7 @@ class TDLambdaEstimate(ValueFunctionBase):
 
     def __init__(
         self,
+        *,
         gamma: Union[float, torch.Tensor],
         lmbda: Union[float, torch.Tensor],
         value_network: TensorDictModule,
@@ -487,7 +513,7 @@ def __init__(
         super().__init__()
         try:
             device = next(value_network.parameters()).device
-        except StopIteration:
+        except (AttributeError, StopIteration):
             device = torch.device("cpu")
         self.register_buffer("gamma", torch.tensor(gamma, device=device))
         self.register_buffer("lmbda", torch.tensor(lmbda, device=device))
@@ -508,11 +534,15 @@ def __init__(
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
 
-        self.in_keys = (
-            value_network.in_keys
-            + [("next", "reward"), ("next", "done")]
-            + [("next", in_key) for in_key in value_network.in_keys]
-        )
+        try:
+            self.in_keys = (
+                value_network.in_keys
+                + [("next", "reward"), ("next", "done")]
+                + [("next", in_key) for in_key in value_network.in_keys]
+            )
+        except AttributeError:
+            # value network does not have an `in_keys` attribute
+            pass
         self.out_keys = [self.advantage_key, self.value_target_key]
 
     @_self_set_grad_enabled
@@ -584,7 +614,7 @@ def forward(
                 f"tensordict.batch_size = {tensordict.batch_size}"
             )
         kwargs = {}
-        if self.is_functional and params is None:
+        if self.is_stateless and params is None:
             raise RuntimeError(
                 "Expected params to be passed to advantage module but got none."
             )
@@ -607,9 +637,9 @@ def value_estimate(
         target_params: Optional[TensorDictBase] = None,
         **kwargs,
     ):
-
-        gamma = self.gamma
         reward = tensordict.get(("next", "reward"))
+        device = reward.device
+        gamma = self.gamma.to(device)
         steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
         if steps_to_next_obs is not None:
             gamma = gamma ** steps_to_next_obs.view_as(reward)
@@ -623,14 +653,12 @@ def value_estimate(
             )  # we must update the rewards if they are used later in the code
 
         step_td = step_mdp(tensordict)
-        if target_params is not None:
-            # we assume that target parameters are not differentiable
-            kwargs["params"] = target_params
-        with hold_out_net(self.value_network):
-            # we may still need to pass gradient, but we don't want to assign grads to
-            # value net params
-            self.value_network(step_td, **kwargs)
-            next_value = step_td.get(self.value_key)
+        if self.value_key not in step_td.keys():
+            if target_params is not None:
+                kwargs["params"] = target_params
+            with hold_out_net(self.value_network):
+                self.value_network(step_td, **kwargs)
+        next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
         if self.vectorized:
@@ -654,10 +682,10 @@ class GAE(ValueFunctionBase):
         gamma (scalar): exponential mean discount.
         lmbda (scalar): trajectory discount.
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
-        average_gae (bool): if True, the resulting GAE values will be standardized.
-            Default is :obj:`False`.
-        differentiable (bool, optional): if True, gradients are propagated throught
-            the computation of the value function. Default is :obj:`False`.
+        average_gae (bool): if ``True``, the resulting GAE values will be standardized.
+            Default is ``False``.
+        differentiable (bool, optional): if ``True``, gradients are propagated throught
+            the computation of the value function. Default is ``False``.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -676,6 +704,7 @@ class GAE(ValueFunctionBase):
 
     def __init__(
         self,
+        *,
         gamma: Union[float, torch.Tensor],
         lmbda: float,
         value_network: TensorDictModule,
@@ -688,7 +717,7 @@ def __init__(
         super().__init__()
         try:
             device = next(value_network.parameters()).device
-        except StopIteration:
+        except (AttributeError, StopIteration):
             device = torch.device("cpu")
         self.register_buffer("gamma", torch.tensor(gamma, device=device))
         self.register_buffer("lmbda", torch.tensor(lmbda, device=device))
@@ -708,11 +737,16 @@ def __init__(
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
 
-        self.in_keys = (
-            value_network.in_keys
-            + [("next", "reward"), ("next", "done")]
-            + [("next", in_key) for in_key in value_network.in_keys]
-        )
+        try:
+            self.in_keys = (
+                value_network.in_keys
+                + [("next", "reward"), ("next", "done")]
+                + [("next", in_key) for in_key in value_network.in_keys]
+            )
+        except AttributeError:
+            # value network does not have an `in_keys` attribute
+            pass
+
         self.out_keys = [self.advantage_key, self.value_target_key]
 
     @_self_set_grad_enabled
@@ -785,14 +819,14 @@ def forward(
                 f"tensordict.batch_size = {tensordict.batch_size}"
             )
         reward = tensordict.get(("next", "reward"))
-        gamma, lmbda = self.gamma, self.lmbda
-        reward = tensordict.get(("next", "reward"))
+        device = reward.device
+        gamma, lmbda = self.gamma.to(device), self.lmbda.to(device)
         steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
         if steps_to_next_obs is not None:
             gamma = gamma ** steps_to_next_obs.view_as(reward)
 
         kwargs = {}
-        if self.is_functional and params is None:
+        if self.is_stateless and params is None:
             raise RuntimeError(
                 "Expected params to be passed to advantage module but got none."
             )
@@ -845,12 +879,13 @@ def value_estimate(
                 f"tensordict.batch_size = {tensordict.batch_size}"
             )
         reward = tensordict.get(("next", "reward"))
-        gamma, lmbda = self.gamma, self.lmbda
+        device = reward.device
+        gamma, lmbda = self.gamma.to(device), self.lmbda.to(device)
         steps_to_next_obs = tensordict.get("steps_to_next_obs", None)
         if steps_to_next_obs is not None:
             gamma = gamma ** steps_to_next_obs.view_as(reward)
 
-        if self.is_functional and params is None:
+        if self.is_stateless and params is None:
             raise RuntimeError(
                 "Expected params to be passed to advantage module but got none."
             )
diff --git a/torchrl/objectives/value/functional.py b/torchrl/objectives/value/functional.py
index d4e0a4f0c77..534eb47306d 100644
--- a/torchrl/objectives/value/functional.py
+++ b/torchrl/objectives/value/functional.py
@@ -45,6 +45,10 @@ def generalized_advantage_estimate(
         done (Tensor): boolean flag for end of episode.
 
     """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     for tensor in (next_state_value, state_value, reward, done):
         if tensor.shape[-1] != 1:
             raise RuntimeError(
@@ -97,6 +101,10 @@ def vec_generalized_advantage_estimate(
         done (Tensor): boolean flag for end of episode.
 
     """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     for tensor in (next_state_value, state_value, reward, done):
         if tensor.shape[-1] != 1:
             raise RuntimeError(
@@ -163,6 +171,10 @@ def td_advantage_estimate(
         done (Tensor): boolean flag for end of episode.
 
     """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     for tensor in (next_state_value, state_value, reward, done):
         if tensor.shape[-1] != 1:
             raise RuntimeError(
@@ -191,7 +203,7 @@ def td_lambda_return_estimate(
         reward (Tensor): reward of taking actions in the environment.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
-        rolling_gamma (bool, optional): if True, it is assumed that each gamma
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
               gamma = [g1, g2, g3, g4]
               value = [v1, v2, v3, v4]
@@ -214,6 +226,10 @@ def td_lambda_return_estimate(
             Default is True.
 
     """
+    if not (next_state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     for tensor in (next_state_value, reward, done):
         if tensor.shape[-1] != 1:
             raise RuntimeError(
@@ -288,7 +304,7 @@ def td_lambda_advantage_estimate(
         reward (Tensor): reward of taking actions in the environment.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
-        rolling_gamma (bool, optional): if True, it is assumed that each gamma
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
               gamma = [g1, g2, g3, g4]
               value = [v1, v2, v3, v4]
@@ -311,6 +327,10 @@ def td_lambda_advantage_estimate(
             Default is True.
 
     """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     if not state_value.shape == next_state_value.shape:
         raise RuntimeError("shape of state_value and next_state_value must match")
     returns = td_lambda_return_estimate(
@@ -342,7 +362,7 @@ def vec_td_lambda_advantage_estimate(
         reward (Tensor): reward of taking actions in the environment.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
-        rolling_gamma (bool, optional): if True, it is assumed that each gamma
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
               gamma = [g1, g2, g3, g4]
               value = [v1, v2, v3, v4]
@@ -365,6 +385,10 @@ def vec_td_lambda_advantage_estimate(
             Default is True.
 
     """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     return (
         vec_td_lambda_return_estimate(
             gamma, lmbda, next_state_value, reward, done, rolling_gamma
@@ -387,7 +411,7 @@ def vec_td_lambda_return_estimate(
         reward (Tensor): reward of taking actions in the environment.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
-        rolling_gamma (bool, optional): if True, it is assumed that each gamma
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
               gamma = [g1, g2, g3, g4]
               value = [v1, v2, v3, v4]
@@ -410,6 +434,10 @@ def vec_td_lambda_return_estimate(
             Default is True.
 
     """
+    if not (next_state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
     shape = next_state_value.shape
     if not shape[-1] == 1:
         raise RuntimeError("last dimension of inputs shape must be singleton")
diff --git a/torchrl/objectives/value/utils.py b/torchrl/objectives/value/utils.py
index beaeacf4bf8..d5091fcd67f 100644
--- a/torchrl/objectives/value/utils.py
+++ b/torchrl/objectives/value/utils.py
@@ -145,7 +145,7 @@ def _make_gammas_tensor(gamma: torch.Tensor, T: int, rolling_gamma: bool):
     Args:
         gamma (torch.tensor): the gamma tensor to be prepared.
         T (int): the time length
-        rolling_gamma (bool): if True, the gamma value is set for each step
+        rolling_gamma (bool): if ``True``, the gamma value is set for each step
             independently. If False, the gamma value at (i, t) will be used for the
             trajectory following (i, t).
 
diff --git a/torchrl/record/recorder.py b/torchrl/record/recorder.py
index 529e64927d5..56c68e065ca 100644
--- a/torchrl/record/recorder.py
+++ b/torchrl/record/recorder.py
@@ -34,7 +34,7 @@ class VideoRecorder(ObservationTransform):
         skip (int): frame interval in the output video.
             Default is 2.
         center_crop (int, optional): value of square center crop.
-        make_grid (bool, optional): if True, a grid is created assuming that a
+        make_grid (bool, optional): if ``True``, a grid is created assuming that a
             tensor of shape [B x W x H x 3] is provided, with B being the batch
             size. Default is True.
 
@@ -138,7 +138,7 @@ class TensorDictRecorder(Transform):
 
     Args:
         out_file_base (str): a string defining the prefix of the file where the tensordict will be written.
-        skip_reset (bool): if True, the first TensorDict of the list will be discarded (usually the tensordict
+        skip_reset (bool): if ``True``, the first TensorDict of the list will be discarded (usually the tensordict
             resulting from the call to :obj:`env.reset()`)
             default: True
         skip (int): frame interval for the saved tensordict.
diff --git a/torchrl/trainers/helpers/collectors.py b/torchrl/trainers/helpers/collectors.py
index e60e943bf9d..4ec6056363d 100644
--- a/torchrl/trainers/helpers/collectors.py
+++ b/torchrl/trainers/helpers/collectors.py
@@ -373,7 +373,7 @@ class OnPolicyCollectorConfig:
     # If the collector device differs from the policy device (cuda:0 if available), then the
     # weights of the collector policy are synchronized with collector.update_policy_weights_().
     pin_memory: bool = False
-    # if True, the data collector will call pin_memory before dispatching tensordicts onto the passing device
+    # if ``True``, the data collector will call pin_memory before dispatching tensordicts onto the passing device
     frames_per_batch: int = 1000
     # number of steps executed in the environment per collection.
     # This value represents how many steps will the data collector execute and return in *each*
diff --git a/torchrl/trainers/helpers/envs.py b/torchrl/trainers/helpers/envs.py
index 3babbb3d5f7..14036cd355b 100644
--- a/torchrl/trainers/helpers/envs.py
+++ b/torchrl/trainers/helpers/envs.py
@@ -244,7 +244,7 @@ def transformed_env_constructor(
         custom_env (EnvBase, optional): if an existing environment needs to be
             transformed_in, it can be passed directly to this helper. `custom_env_maker`
             and `custom_env` are exclusive features.
-        return_transformed_envs (bool, optional): if True, a transformed_in environment
+        return_transformed_envs (bool, optional): if ``True``, a transformed_in environment
             is returned.
         action_dim_gsde (int, Optional): if gSDE is used, this can present the action dim to initialize the noise.
             Make sure this is indicated in environment executed in parallel.
@@ -556,7 +556,7 @@ class EnvConfig:
     max_frames_per_traj: int = 1000
     # Number of steps before a reset of the environment is called (if it has not been flagged as done before).
     batch_transform: bool = False
-    # if True, the transforms will be applied to the parallel env, and not to each individual env.\
+    # if ``True``, the transforms will be applied to the parallel env, and not to each individual env.\
     image_size: int = 84
     # if True and environment has discrete action space, then it is encoded as categorical values rather than one-hot.
     categorical_action_encoding: bool = False
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 06338d9d41c..cbd1a66cb77 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -603,11 +603,11 @@ class ReplayBufferTrainer(TrainerHookBase):
         replay_buffer (TensorDictReplayBuffer): replay buffer to be used.
         batch_size (int): batch size when sampling data from the
             latest collection or from the replay buffer.
-        memmap (bool, optional): if True, a memmap tensordict is created.
+        memmap (bool, optional): if ``True``, a memmap tensordict is created.
             Default is False.
         device (device, optional): device where the samples must be placed.
             Default is cpu.
-        flatten_tensordicts (bool, optional): if True, the tensordicts will be
+        flatten_tensordicts (bool, optional): if ``True``, the tensordicts will be
             flattened (or equivalently masked with the valid mask obtained from
             the collector) before being passed to the replay buffer. Otherwise,
             no transform will be achieved other than padding (see :obj:`max_dims` arg below).
@@ -792,8 +792,8 @@ class LogReward(TrainerHookBase):
 
     Args:
         logname (str, optional): name of the rewards to be logged. Default is :obj:`"r_training"`.
-        log_pbar (bool, optional): if True, the reward value will be logged on
-            the progression bar. Default is :obj:`False`.
+        log_pbar (bool, optional): if ``True``, the reward value will be logged on
+            the progression bar. Default is ``False``.
         reward_key (str or tuple, optional): the key where to find the reward
             in the input batch. Defaults to ``("next", "reward")``
 
@@ -1125,7 +1125,7 @@ class Recorder(TrainerHookBase):
         out_key (str, optional): reward key to set to the logger. Default is
             `"reward_evaluation"`.
         suffix (str, optional): suffix of the video to be recorded.
-        log_pbar (bool, optional): if True, the reward value will be logged on
+        log_pbar (bool, optional): if ``True``, the reward value will be logged on
             the progression bar. Default is `False`.
 
     """
@@ -1265,7 +1265,7 @@ class CountFramesLog(TrainerHookBase):
         frame_skip (int): frame skip of the environment. This argument is
             important to keep track of the total number of frames, not the
             apparent one.
-        log_pbar (bool, optional): if True, the reward value will be logged on
+        log_pbar (bool, optional): if ``True``, the reward value will be logged on
             the progression bar. Default is `False`.
 
     Examples:

From acd0ec1152c740218742c5904d2e6c0fca7ba76c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 27 Mar 2023 21:19:50 +0100
Subject: [PATCH 19/89] test

---
 test/test_cost.py                | 58 ++++++++++++++++++++++++++++----
 torchrl/objectives/ddpg.py       |  2 +-
 torchrl/objectives/deprecated.py |  6 ++--
 torchrl/objectives/dqn.py        |  2 +-
 torchrl/objectives/dreamer.py    |  9 ++---
 torchrl/objectives/iql.py        |  9 ++---
 torchrl/objectives/redq.py       |  6 ++--
 torchrl/objectives/sac.py        |  2 +-
 torchrl/objectives/td3.py        |  2 +-
 9 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/test/test_cost.py b/test/test_cost.py
index e09364ca69d..4477feee0da 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -1847,7 +1847,8 @@ def _create_seq_mock_data_redq(
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
-    def test_redq(self, delay_qvalue, num_qvalue, device):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_redq(self, delay_qvalue, num_qvalue, device, td_est):
 
         torch.manual_seed(self.seed)
         td = self._create_mock_data_redq(device=device)
@@ -1862,6 +1863,12 @@ def test_redq(self, delay_qvalue, num_qvalue, device):
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -2035,7 +2042,8 @@ def test_redq_shared(self, delay_qvalue, num_qvalue, device):
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
-    def test_redq_batched(self, delay_qvalue, num_qvalue, device):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_redq_batched(self, delay_qvalue, num_qvalue, device, td_est):
 
         torch.manual_seed(self.seed)
         td = self._create_mock_data_redq(device=device)
@@ -2050,6 +2058,12 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device):
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
 
         loss_class_deprec = (
             REDQLoss_deprecated if not delay_qvalue else DoubleREDQLoss_deprecated
@@ -2060,6 +2074,12 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device):
             num_qvalue_nets=num_qvalue,
             loss_function="l2",
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn_deprec.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn_deprec.make_value_function(td_est)
 
         td_clone1 = td.clone()
         td_clone2 = td.clone()
@@ -2289,7 +2309,8 @@ def _create_seq_mock_data_ppo(
     @pytest.mark.parametrize("gradient_mode", (True, False))
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
-    def test_ppo(self, loss_class, device, gradient_mode, advantage):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_ppo(self, loss_class, device, gradient_mode, advantage, td_est):
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_ppo(device=device)
 
@@ -2315,6 +2336,10 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage):
         loss_fn = loss_class(actor, value, loss_critic_type="l2")
         if advantage is not None:
             advantage(td)
+        else:
+            if td_est is not None:
+                loss_fn.make_value_function(td_est)
+
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2560,7 +2585,8 @@ def _create_seq_mock_data_a2c(
     @pytest.mark.parametrize("gradient_mode", (True, False))
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
-    def test_a2c(self, device, gradient_mode, advantage):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_a2c(self, device, gradient_mode, advantage, td_est):
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_a2c(device=device)
 
@@ -2597,6 +2623,8 @@ def test_a2c(self, device, gradient_mode, advantage):
         td = td.exclude(loss_fn.value_target_key)
         if advantage is not None:
             advantage(td)
+        else:
+            loss_fn.make_value_function(td_est)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2696,7 +2724,8 @@ class TestReinforce:
     @pytest.mark.parametrize("delay_value", [True, False])
     @pytest.mark.parametrize("gradient_mode", [True, False])
     @pytest.mark.parametrize("advantage", ["gae", "td", "td_lambda", None])
-    def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_reinforce_value_net(self, advantage, gradient_mode, delay_value, td_est):
         n_obs = 3
         n_act = 5
         batch = 4
@@ -2758,6 +2787,8 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value):
         params = TensorDict(value_net.state_dict(), []).unflatten_keys(".")
         if advantage is not None:
             advantage(td, params=params)
+        else:
+            loss_fn.make_value_function(td_est)
         loss_td = loss_fn(td)
         autograd.grad(
             loss_td.get("loss_actor"),
@@ -3113,7 +3144,8 @@ def test_dreamer_env(self, device, imagination_horizon, discount_loss):
 
     @pytest.mark.parametrize("imagination_horizon", [3, 5])
     @pytest.mark.parametrize("discount_loss", [True, False])
-    def test_dreamer_actor(self, device, imagination_horizon, discount_loss):
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    def test_dreamer_actor(self, device, imagination_horizon, discount_loss, td_est):
         tensordict = self._create_actor_data(2, 3, 10, 5).to(device)
         mb_env = self._create_mb_env(10, 5).to(device)
         actor_model = self._create_actor_model(10, 5).to(device)
@@ -3125,6 +3157,12 @@ def test_dreamer_actor(self, device, imagination_horizon, discount_loss):
             imagination_horizon=imagination_horizon,
             discount_loss=discount_loss,
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_module.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_module.make_value_function(td_est)
         loss_td, fake_data = loss_module(tensordict)
         assert not fake_data.requires_grad
         assert fake_data.shape == torch.Size([tensordict.numel(), imagination_horizon])
@@ -3288,12 +3326,14 @@ def _create_seq_mock_data_iql(
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("temperature", [0.0, 0.1, 1.0, 10.0])
     @pytest.mark.parametrize("expectile", [0.1, 0.5, 1.0])
+    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
     def test_iql(
         self,
         num_qvalue,
         device,
         temperature,
         expectile,
+        td_est,
     ):
 
         torch.manual_seed(self.seed)
@@ -3312,6 +3352,12 @@ def test_iql(
             expectile=expectile,
             loss_function="l2",
         )
+        if td_est is ValueFunctions.GAE:
+            with pytest.raises(NotImplementedError):
+                loss_fn.make_value_function(td_est)
+            return
+        if td_est is not None:
+            loss_fn.make_value_function(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index 980c0e3b8c9..6bb15af4052 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -23,7 +23,7 @@
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
-from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class DDPGLoss(LossModule):
diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py
index 97b7aab7e5e..40cdfe1d687 100644
--- a/torchrl/objectives/deprecated.py
+++ b/torchrl/objectives/deprecated.py
@@ -22,7 +22,7 @@
     ValueFunctions,
 )
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -289,7 +289,9 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueFunctions.GAE:
-            self._value_function = GAE(value_network=None, value_key=value_key, **hp)
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
         elif value_type == ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
                 value_network=None, value_key=value_key, **hp
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 66ab0aba5c4..b8d385f2b8e 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -15,7 +15,7 @@
 
 from .common import LossModule
 from .utils import default_value_kwargs, distance_loss, ValueFunctions
-from .value import GAE, TDLambdaEstimate
+from .value import TDLambdaEstimate
 from .value.advantages import TD0Estimate, TD1Estimate
 
 
diff --git a/torchrl/objectives/dreamer.py b/torchrl/objectives/dreamer.py
index 47f13e4ae90..67c66b93fa8 100644
--- a/torchrl/objectives/dreamer.py
+++ b/torchrl/objectives/dreamer.py
@@ -17,7 +17,7 @@
     hold_out_net,
     ValueFunctions,
 )
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
 class DreamerModelLoss(LossModule):
@@ -240,11 +240,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_key=value_key,
             )
         elif value_type is ValueFunctions.GAE:
-            self._value_function = GAE(
-                **hp,
-                value_network=value_net,
-                value_target_key="value_target",
-                value_key=value_key,
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
index f817af1b904..4da4027d5b8 100644
--- a/torchrl/objectives/iql.py
+++ b/torchrl/objectives/iql.py
@@ -15,7 +15,7 @@
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
-from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -255,11 +255,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_key=value_key,
             )
         elif value_type is ValueFunctions.GAE:
-            self._value_function = GAE(
-                **hp,
-                value_network=value_net,
-                value_target_key="value_target",
-                value_key=value_key,
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
index a48e0d78580..1a9ecb45955 100644
--- a/torchrl/objectives/redq.py
+++ b/torchrl/objectives/redq.py
@@ -17,7 +17,7 @@
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
@@ -327,7 +327,9 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueFunctions.GAE:
-            self._value_function = GAE(value_network=None, value_key=value_key, **hp)
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
         elif value_type == ValueFunctions.TDLambda:
             self._value_function = TDLambdaEstimate(
                 value_network=None, value_key=value_key, **hp
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 5a8bdb25aef..ad7263c2fda 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -19,7 +19,7 @@
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
-from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index 61dcdbbfcb1..b6c96a253e4 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -12,7 +12,7 @@
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
     from functorch import vmap

From 7855ef6ab73f3784de68aeb998f995e1af22056c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 09:05:57 +0100
Subject: [PATCH 20/89] smooth deprecation

---
 test/test_cost.py                |  4 ++--
 torchrl/objectives/a2c.py        | 15 +++++++++++++--
 torchrl/objectives/ddpg.py       |  9 +++++++++
 torchrl/objectives/deprecated.py | 10 +++++++++-
 torchrl/objectives/dqn.py        | 16 ++++++++++++++--
 torchrl/objectives/dreamer.py    | 23 ++++++++++++++++++++---
 torchrl/objectives/iql.py        | 15 +++++++++++++--
 torchrl/objectives/ppo.py        | 19 +++++++++++++++++--
 torchrl/objectives/redq.py       | 15 +++++++++++++--
 torchrl/objectives/reinforce.py  | 15 +++++++++++++--
 torchrl/objectives/sac.py        | 15 +++++++++++++--
 torchrl/objectives/td3.py        | 15 +++++++++++++--
 torchrl/objectives/utils.py      |  6 ++++++
 13 files changed, 155 insertions(+), 22 deletions(-)

diff --git a/test/test_cost.py b/test/test_cost.py
index 4477feee0da..dc763f37f66 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -2623,7 +2623,7 @@ def test_a2c(self, device, gradient_mode, advantage, td_est):
         td = td.exclude(loss_fn.value_target_key)
         if advantage is not None:
             advantage(td)
-        else:
+        elif td_est is not None:
             loss_fn.make_value_function(td_est)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
@@ -2787,7 +2787,7 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value, td_est
         params = TensorDict(value_net.state_dict(), []).unflatten_keys(".")
         if advantage is not None:
             advantage(td, params=params)
-        else:
+        elif td_est is not None:
             loss_fn.make_value_function(td_est)
         loss_td = loss_fn(td)
         autograd.grad(
diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
index 644324416f3..a8bdade55b6 100644
--- a/torchrl/objectives/a2c.py
+++ b/torchrl/objectives/a2c.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 from typing import Tuple
 
 import torch
@@ -11,7 +11,12 @@
 from torch import distributions as d
 
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
@@ -72,6 +77,7 @@ def __init__(
         entropy_coef: float = 0.01,
         critic_coef: float = 1.0,
         loss_critic_type: str = "smooth_l1",
+        gamma: float = None,
     ):
         super().__init__()
         self.convert_to_functional(
@@ -88,6 +94,9 @@ def __init__(
         self.register_buffer(
             "critic_coef", torch.tensor(critic_coef, device=self.device)
         )
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
         self.loss_critic_type = loss_critic_type
 
     def reset(self) -> None:
@@ -165,6 +174,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         value_key = "state_value"
         if value_type == ValueFunctions.TD1:
             self._value_function = TD1Estimate(
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index 6bb15af4052..f8c6e83a621 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import warnings
 from copy import deepcopy
 
 from typing import Tuple
@@ -15,6 +16,7 @@
 
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
 from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
     hold_out_params,
@@ -49,6 +51,7 @@ def __init__(
         loss_function: str = "l2",
         delay_actor: bool = False,
         delay_value: bool = False,
+        gamma: float = None,
     ) -> None:
         super().__init__()
         self.delay_actor = delay_actor
@@ -78,6 +81,10 @@ def __init__(
 
         self.loss_funtion = loss_function
 
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
+
     def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         """Computes the DDPG losses given a tensordict sampled from the replay buffer.
 
@@ -175,6 +182,8 @@ def _loss_value(
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_action_value"
         if value_type == ValueFunctions.TD1:
diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py
index 40cdfe1d687..0a518bff304 100644
--- a/torchrl/objectives/deprecated.py
+++ b/torchrl/objectives/deprecated.py
@@ -2,8 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
 import math
+import warnings
 from numbers import Number
 from typing import Tuple, Union
 
@@ -22,6 +22,7 @@
     ValueFunctions,
 )
 from torchrl.objectives.common import LossModule
+from torchrl.objectives.utils import _GAMMA_LMBDA_DEPREC_WARNING
 from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
@@ -94,6 +95,7 @@ def __init__(
         target_entropy: Union[str, Number] = "auto",
         delay_qvalue: bool = True,
         gSDE: bool = False,
+        gamma: float = None,
     ):
         if not _has_functorch:
             raise ImportError("Failed to import functorch.") from FUNCTORCH_ERR
@@ -156,6 +158,10 @@ def __init__(
         )
         self.gSDE = gSDE
 
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
+
     @property
     def alpha(self):
         # keep alpha is a reasonable range
@@ -277,6 +283,8 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index b8d385f2b8e..01dbcc98182 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 from typing import Union
 
 import torch
@@ -14,7 +14,12 @@
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 
 from .common import LossModule
-from .utils import default_value_kwargs, distance_loss, ValueFunctions
+from .utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 from .value import TDLambdaEstimate
 from .value.advantages import TD0Estimate, TD1Estimate
 
@@ -39,6 +44,7 @@ def __init__(
         loss_function: str = "l2",
         priority_key: str = "td_error",
         delay_value: bool = False,
+        gamma: float = None,
     ) -> None:
 
         super().__init__()
@@ -59,8 +65,14 @@ def __init__(
         self.priority_key = priority_key
         self.action_space = self.value_network.action_space
 
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
+
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueFunctions.TD1:
             self._value_function = TD1Estimate(
diff --git a/torchrl/objectives/dreamer.py b/torchrl/objectives/dreamer.py
index 67c66b93fa8..146686ed03c 100644
--- a/torchrl/objectives/dreamer.py
+++ b/torchrl/objectives/dreamer.py
@@ -2,6 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+import warnings
 from typing import Optional, Tuple
 
 import torch
@@ -12,6 +13,7 @@
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
     hold_out_net,
@@ -163,6 +165,8 @@ def __init__(
         *,
         imagination_horizon: int = 15,
         discount_loss: bool = False,  # for consistency with paper
+        gamma: int = None,
+        lmbda: int = None,
     ):
         super().__init__()
         self.actor_model = actor_model
@@ -170,6 +174,12 @@ def __init__(
         self.model_based_env = model_based_env
         self.imagination_horizon = imagination_horizon
         self.discount_loss = discount_loss
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
+        if lmbda is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.lmbda = lmbda
 
     def forward(self, tensordict: TensorDict) -> Tuple[TensorDict, TensorDict]:
         with torch.no_grad():
@@ -224,6 +234,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         value_net = None
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueFunctions.TD1:
             self._value_function = TD1Estimate(
@@ -240,10 +252,14 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
                 value_key=value_key,
             )
         elif value_type is ValueFunctions.GAE:
+            if hasattr(self, "lmbda"):
+                hp["lmbda"] = self.lmbda
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueFunctions.TDLambda:
+            if hasattr(self, "lmbda"):
+                hp["lmbda"] = self.lmbda
             self._value_function = TDLambdaEstimate(
                 **hp,
                 value_network=value_net,
@@ -264,10 +280,11 @@ class DreamerValueLoss(LossModule):
 
     Args:
         value_model (TensorDictModule): the value model.
-        value_loss (str, optional): the loss to use for the value loss. Default: "l2".
-        gamma (float, optional): the gamma discount factor. Default: 0.99.
+        value_loss (str, optional): the loss to use for the value loss.
+            Default: ``"l2"``.
         discount_loss (bool, optional): if ``True``, the loss is discounted with a
             gamma discount factor. Default: False.
+        gamma (float, optional): the gamma discount factor. Default: ``0.99``.
 
     """
 
@@ -275,8 +292,8 @@ def __init__(
         self,
         value_model: TensorDictModule,
         value_loss: Optional[str] = None,
-        gamma: int = 0.99,
         discount_loss: bool = False,  # for consistency with paper
+        gamma: int = 0.99,
     ):
         super().__init__()
         self.value_model = value_model
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
index 4da4027d5b8..26d7c1a8c2f 100644
--- a/torchrl/objectives/iql.py
+++ b/torchrl/objectives/iql.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 from typing import Optional, Tuple
 
 import torch
@@ -11,7 +11,12 @@
 from torch import Tensor
 
 from torchrl.modules import ProbabilisticActor
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
@@ -65,6 +70,7 @@ def __init__(
         loss_function: str = "smooth_l1",
         temperature: float = 1.0,
         expectile: float = 0.5,
+        gamma: float = None,
     ) -> None:
         if not _has_functorch:
             raise ImportError("Failed to import functorch.") from FUNCTORCH_ERROR
@@ -105,6 +111,9 @@ def __init__(
 
         self.priority_key = priority_key
         self.loss_function = loss_function
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     @property
     def device(self) -> torch.device:
@@ -239,6 +248,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
 
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueFunctions.TD1:
             self._value_function = TD1Estimate(
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
index 477cf3f5765..3bb97d9a371 100644
--- a/torchrl/objectives/ppo.py
+++ b/torchrl/objectives/ppo.py
@@ -2,8 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
 import math
+import warnings
 from typing import Tuple
 
 import torch
@@ -11,7 +11,12 @@
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import distributions as d
 
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 
 from .common import LossModule
 from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
@@ -101,6 +106,7 @@ def __init__(
         critic_coef: float = 1.0,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
+        gamma: float = None,
     ):
         super().__init__()
         self.convert_to_functional(
@@ -121,6 +127,9 @@ def __init__(
         )
         self.loss_critic_type = loss_critic_type
         self.normalize_advantage = normalize_advantage
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     def reset(self) -> None:
         pass
@@ -206,6 +215,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         if value_type == ValueFunctions.TD1:
@@ -300,6 +311,7 @@ def __init__(
         critic_coef: float = 1.0,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = True,
+        gamma: float = None,
         **kwargs,
     ):
         super(ClipPPOLoss, self).__init__(
@@ -312,6 +324,7 @@ def __init__(
             critic_coef=critic_coef,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
+            gamma=gamma,
             **kwargs,
         )
         self.register_buffer("clip_epsilon", torch.tensor(clip_epsilon))
@@ -457,6 +470,7 @@ def __init__(
         critic_coef: float = 1.0,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = True,
+        gamma: float = None,
         **kwargs,
     ):
         super(KLPENPPOLoss, self).__init__(
@@ -469,6 +483,7 @@ def __init__(
             critic_coef=critic_coef,
             loss_critic_type=loss_critic_type,
             normalize_advantage=normalize_advantage,
+            gamma=gamma,
             **kwargs,
         )
 
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
index 1a9ecb45955..fe717c77cd7 100644
--- a/torchrl/objectives/redq.py
+++ b/torchrl/objectives/redq.py
@@ -2,8 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
 import math
+import warnings
 from numbers import Number
 from typing import Union
 
@@ -16,7 +16,12 @@
 
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
@@ -89,6 +94,7 @@ def __init__(
         target_entropy: Union[str, Number] = "auto",
         delay_qvalue: bool = True,
         gSDE: bool = False,
+        gamma: float = None,
     ):
         if not _has_functorch:
             raise ImportError("Failed to import functorch.") from FUNCTORCH_ERR
@@ -152,6 +158,9 @@ def __init__(
             "target_entropy", torch.tensor(target_entropy, device=device)
         )
         self.gSDE = gSDE
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     @property
     def alpha(self):
@@ -315,6 +324,8 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
diff --git a/torchrl/objectives/reinforce.py b/torchrl/objectives/reinforce.py
index 1079c4eb2be..2c972291455 100644
--- a/torchrl/objectives/reinforce.py
+++ b/torchrl/objectives/reinforce.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 from typing import Optional
 
 import torch
@@ -10,7 +10,12 @@
 from tensordict.nn import ProbabilisticTensorDictSequential, TensorDictModule
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 
@@ -72,6 +77,7 @@ def __init__(
         advantage_key: str = "advantage",
         value_target_key: str = "value_target",
         loss_critic_type: str = "smooth_l1",
+        gamma: float = None,
     ) -> None:
         super().__init__()
 
@@ -95,6 +101,9 @@ def __init__(
                 create_target_params=self.delay_value,
                 compare_against=list(actor.parameters()),
             )
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         advantage = tensordict.get(self.advantage_key, None)
@@ -146,6 +155,8 @@ def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         if value_type == ValueFunctions.TD1:
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index ad7263c2fda..f24d1f13b3c 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -2,8 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
 import math
+import warnings
 from numbers import Number
 from typing import Optional, Tuple, Union
 
@@ -15,7 +15,12 @@
 
 from torchrl.modules import ProbabilisticActor
 from torchrl.modules.tensordict_module.actors import ActorCriticWrapper
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
@@ -97,6 +102,7 @@ def __init__(
         delay_actor: bool = False,
         delay_qvalue: bool = False,
         delay_value: bool = False,
+        gamma: float = None,
     ) -> None:
         if not _has_functorch:
             raise ImportError("Failed to import functorch.") from FUNCTORCH_ERROR
@@ -179,6 +185,9 @@ def __init__(
                 self.actor_network, self.value_network
             )
             make_functional(self.actor_critic)
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         if self._version == 1:
@@ -725,6 +734,8 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         if value_type is ValueFunctions.TD1:
             self._value_function = TD1Estimate(
                 **hp,
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index b6c96a253e4..e3c56dbb41a 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 
 import torch
 from tensordict.nn import TensorDictModule
@@ -11,7 +11,12 @@
 
 from torchrl.envs.utils import set_exploration_mode, step_mdp
 from torchrl.objectives.common import LossModule
-from torchrl.objectives.utils import default_value_kwargs, distance_loss, ValueFunctions
+from torchrl.objectives.utils import (
+    _GAMMA_LMBDA_DEPREC_WARNING,
+    default_value_kwargs,
+    distance_loss,
+    ValueFunctions,
+)
 from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
 
 try:
@@ -57,6 +62,7 @@ def __init__(
         loss_function: str = "smooth_l1",
         delay_actor: bool = False,
         delay_qvalue: bool = False,
+        gamma: float = None,
     ) -> None:
         if not _has_functorch:
             raise ImportError(
@@ -88,6 +94,9 @@ def __init__(
         self.policy_noise = policy_noise
         self.noise_clip = noise_clip
         self.max_action = actor_network.spec["action"].space.maximum.max().item()
+        if gamma is not None:
+            warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
+            self.gamma = gamma
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         obs_keys = self.actor_network.in_keys
@@ -214,6 +223,8 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_action_value"
         # we do not need a value network bc the next state value is already passed
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index bcdc38434ab..09edf449e08 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -15,6 +15,12 @@
 
 from torchrl.envs.utils import step_mdp
 
+_GAMMA_LMBDA_DEPREC_WARNING = (
+    "Passing gamma / lambda parameters through the loss constructor "
+    "is deprecated and will be removed soon. To customize your value function, "
+    "run `loss_module.make_value_function(ValueFunctions.<value_fun>, gamma=val)`."
+)
+
 
 class ValueFunctions(Enum):
     """Value function enumerator for custom-built estimators.

From 55361cc26681d0e76ae0661b458df9929b5f8b29 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 09:16:06 +0100
Subject: [PATCH 21/89] amend

---
 torchrl/objectives/common.py |  4 ++--
 torchrl/objectives/td3.py    | 28 ++++++++++++++++++----------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 1cbddfb4deb..9f1605ebb4d 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -54,7 +54,7 @@ class LossModule(nn.Module):
         The value estimator can be changed using the :meth:`~.make_value_function` method.
     """
 
-    default_value_type: ValueFunctions = None
+    default_value_function: ValueFunctions = None
 
     def __init__(self):
         super().__init__()
@@ -383,7 +383,7 @@ def _default_value_function(self):
         from :obj:`torchrl.objectives.utils.DEFAULT_VALUE_FUN_PARAMS`.
 
         """
-        self.make_value_function(self.default_value_type)
+        self.make_value_function(self.default_value_function)
 
     def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         """Value-function constructor.
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index e3c56dbb41a..1ef911a8891 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -34,21 +34,29 @@ class TD3Loss(LossModule):
 
     Args:
         actor_network (TensorDictModule): the actor to be trained
-        qvalue_network (TensorDictModule): a single Q-value network that will be multiplicated as many times as needed.
-        num_qvalue_nets (int, optional): Number of Q-value networks to be trained. Default is 10.
-        policy_noise (float, optional): Standard deviation for the target policy action noise. Default is 0.2.
-        noise_clip (float, optional): Clipping range value for the sampled target policy action noise. Default is 0.5.
-        priority_key (str, optional): Key where to write the priority value for prioritized replay buffers. Default is
+        qvalue_network (TensorDictModule): a single Q-value network that will
+            be multiplicated as many times as needed.
+        num_qvalue_nets (int, optional): Number of Q-value networks to be
+            trained. Default is ``10``.
+        policy_noise (float, optional): Standard deviation for the target
+            policy action noise. Default is ``0.2``.
+        noise_clip (float, optional): Clipping range value for the sampled
+            target policy action noise. Default is ``0.5``.
+        priority_key (str, optional): Key where to write the priority value
+            for prioritized replay buffers. Default is
             `"td_error"`.
-        loss_function (str, optional): loss function to be used for the Q-value. Can be one of  `"smooth_l1"`, "l2",
-            "l1", Default is "smooth_l1".
-        delay_actor (bool, optional): whether to separate the target actor networks from the actor networks used for
+        loss_function (str, optional): loss function to be used for the Q-value.
+            Can be one of  ``"smooth_l1"``, ``"l2"``,
+            ``"l1"``, Default is ``"smooth_l1"``.
+        delay_actor (bool, optional): whether to separate the target actor
+            networks from the actor networks used for
             data collection. Default is ``False``.
-        delay_qvalue (bool, optional): Whether to separate the target Q value networks from the Q value networks used
+        delay_qvalue (bool, optional): Whether to separate the target Q value
+            networks from the Q value networks used
             for data collection. Default is ``False``.
     """
 
-    default_value_type = ValueFunctions.TD0
+    default_value_function = ValueFunctions.TD0
 
     def __init__(
         self,

From fd874576798ea91a6fdb6aef73da2e5f97ceb01a Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 11:51:25 +0100
Subject: [PATCH 22/89] amend

---
 docs/source/index.rst                  |   2 +-
 docs/source/reference/objectives.rst   |  83 ++++++++++++++-
 test/test_cost.py                      | 138 ++++++++++++-------------
 test/test_modules.py                   |   4 +-
 torchrl/modules/planners/mppi.py       |   4 +-
 torchrl/objectives/__init__.py         |   2 +-
 torchrl/objectives/a2c.py              |  24 ++---
 torchrl/objectives/common.py           |  32 +++---
 torchrl/objectives/ddpg.py             |  22 ++--
 torchrl/objectives/deprecated.py       |  22 ++--
 torchrl/objectives/dqn.py              |  38 +++----
 torchrl/objectives/dreamer.py          |  22 ++--
 torchrl/objectives/iql.py              |  22 ++--
 torchrl/objectives/ppo.py              |  34 +++---
 torchrl/objectives/redq.py             |  22 ++--
 torchrl/objectives/reinforce.py        |  26 ++---
 torchrl/objectives/sac.py              |  40 +++----
 torchrl/objectives/td3.py              |  22 ++--
 torchrl/objectives/utils.py            |  18 ++--
 torchrl/objectives/value/__init__.py   |   5 +-
 torchrl/objectives/value/advantages.py |  33 ++++--
 21 files changed, 356 insertions(+), 259 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index c8463c53909..d379b418298 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -73,7 +73,7 @@ Knowledge Base
 ==============
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
 
    reference/knowledge_base
 
diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index ff178fba548..01a7379e298 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -3,6 +3,81 @@
 torchrl.objectives package
 ==========================
 
+TorchRL provides a series of losses to use in your training scripts.
+The aim is to have losses that are easily reusable/swappable and that have
+a simple signature.
+
+The main characteristics of TorchRL losses are:
+
+- they are stateful objects: they contain a copy of the trainable parameters
+  such that ``loss_module.parameters()`` gives whatever is needed to train the
+  algorithm.
+- They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
+  method will receive a tensordict as input that contains all the necessary
+  information to return a loss value.
+- They output a :class:`tensordict.TensorDict` instance with the loss values
+  written under a ``"loss_<smth>`` where ``smth`` is a string describing the
+  loss. Additional keys in the tensordict may be useful metrics to log during
+  training time.
+  .. note::
+    The reason we return independent losses is to let the user use a different
+    optimizer for different sets of parameters for instance. Summing the losses
+    can be simply done via ``sum(loss for key, loss in loss_vals.items() if key.startswith("loss_")``.
+
+Training value functions
+------------------------
+
+TorchRL provides a range of **value estimators** such as TD(0), TD(1), TD(:math:`\lambda`)
+and GAE.
+In a nutshell, a value estimator is a function of data (mostly
+rewards and done states) and a state value (ie. the value
+returned by a function that is fit to estimate state-values).
+To learn more about value estimators, check the introduction to RL from `Sutton
+and Barto <https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf>`_,
+in particular the chapters about value iteration and TD learning.
+It gives a somewhat biased estimation of the discounted return following a state
+or a state-action pair based on data and proxy maps. These estimators are
+used in two contexts:
+
+- To train the value network to learn the "true" state value (or state-action
+  value) map, one needs a target value to fit it to. The better (less bias,
+  less variance) the estimator, the better the value network will be, which in
+  turn can speed up the policy training significantly. Typically, the value
+  network loss will look like:
+
+    >>> value = value_network(states)
+    >>> target_value = value_estimator(rewards, done, value_network(next_state))
+    >>> value_net_loss = (value - target_value).pow(2).mean()
+
+- Computing an "advantage" signal for policy-optimization. The advantage is
+  the delta between the value estimate (from the estimator, ie from "real" data)
+  and the output of the value network (ie the proxy to this value). A positive
+  advantage can be seen as a signal that the policy actually performed better
+  than expected, thereby signaling that there is room for improvement if that
+  trajectory is to be taken as example. Conversely, a negative advantage signifies
+  that the policy underperformed compared to what was to be expected.
+
+Thins are not always as easy as in the example above and the formula to compute
+the value estimator or the advantage may be slightly more intricate than this.
+To help users flexibly use one or another value estimator, we provide a simple
+API to change it on-the-fly. Here is an example with DQN, but all modules will
+follow a similar structure:
+
+  >>> from torchrl.objectives import DQNLoss, ValueEstimators
+  >>> loss_module = DQNLoss(actor)
+  >>> kwargs = {"gamma": 0.9, "lmbda": 0.9}
+  >>> loss_module.make_value_estimator(ValueEstimators.TDLambda, **kwargs)
+
+The :class:`torchrl.objectives.ValueEstimators` class enumerates the value
+estimators to choose from. This makes it easy for the users to rely on
+auto-completion to make their choice.
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template_noinherit.rst
+
+    LossModule
+
 DQN
 ---
 
@@ -108,10 +183,10 @@ Returns
     :toctree: generated/
     :template: rl_template_noinherit.rst
 
-    ValueFunctionBase
-    TD0Estimate
-    TD1Estimate
-    TDLambdaEstimate
+    ValueEstimatorBase
+    TD0Estimator
+    TD1Estimator
+    TDLambdaEstimator
     GAE
     functional.generalized_advantage_estimate
     functional.vec_generalized_advantage_estimate
diff --git a/test/test_cost.py b/test/test_cost.py
index dc763f37f66..0aa87bcb2c4 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -91,9 +91,9 @@
     HardUpdate,
     hold_out_net,
     SoftUpdate,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value.advantages import GAE, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value.advantages import GAE, TD1Estimator, TDLambdaEstimator
 from torchrl.objectives.value.functional import (
     generalized_advantage_estimate,
     td_advantage_estimate,
@@ -299,7 +299,7 @@ def _create_seq_mock_data_dqn(
     @pytest.mark.parametrize(
         "action_spec_type", ("nd_bounded", "one_hot", "categorical")
     )
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_dqn(self, delay_value, device, action_spec_type, td_est):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(
@@ -309,12 +309,12 @@ def test_dqn(self, delay_value, device, action_spec_type, td_est):
             action_spec_type=action_spec_type, device=device
         )
         loss_fn = DQNLoss(actor, loss_function="l2", delay_value=delay_value)
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
         with _check_td_steady(td):
             loss = loss_fn(td)
         assert loss_fn.priority_key in td.keys()
@@ -399,7 +399,7 @@ def test_dqn_batcher(self, n, delay_value, device, action_spec_type, gamma=0.9):
     @pytest.mark.parametrize(
         "action_spec_type", ("mult_one_hot", "one_hot", "categorical")
     )
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_distributional_dqn(
         self, atoms, delay_value, device, action_spec_type, td_est, gamma=0.9
     ):
@@ -413,12 +413,12 @@ def test_distributional_dqn(
         ).to(device)
         loss_fn = DistributionalDQNLoss(actor, gamma=gamma, delay_value=delay_value)
 
-        if td_est not in (None, ValueFunctions.TD0):
+        if td_est not in (None, ValueEstimators.TD0):
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         elif td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -548,7 +548,7 @@ def _create_seq_mock_data_ddpg(
     )
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("delay_actor,delay_value", [(False, False), (True, True)])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_ddpg(self, delay_actor, delay_value, device, td_est):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(device=device)
@@ -561,12 +561,12 @@ def test_ddpg(self, delay_actor, delay_value, device, td_est):
             delay_actor=delay_actor,
             delay_value=delay_value,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -788,7 +788,7 @@ def _create_seq_mock_data_td3(
     )
     @pytest.mark.parametrize("policy_noise", [0.1, 1.0])
     @pytest.mark.parametrize("noise_clip", [0.1, 1.0])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_td3(
         self,
         delay_actor,
@@ -811,12 +811,12 @@ def test_td3(
             delay_actor=delay_actor,
             delay_qvalue=delay_qvalue,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
         with _check_td_steady(td):
             loss = loss_fn(td)
 
@@ -1074,7 +1074,7 @@ def _create_seq_mock_data_sac(
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_sac(
         self,
         delay_value,
@@ -1115,12 +1115,12 @@ def test_sac(
             **kwargs,
         )
 
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -1491,7 +1491,7 @@ def _create_seq_mock_data_sac(
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("target_entropy_weight", [0.01, 0.5, 0.99])
     @pytest.mark.parametrize("target_entropy", ["auto", 1.0, 0.1, 0.0])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_discrete_sac(
         self,
         delay_qvalue,
@@ -1522,12 +1522,12 @@ def test_discrete_sac(
             loss_function="l2",
             **kwargs,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -1847,7 +1847,7 @@ def _create_seq_mock_data_redq(
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_redq(self, delay_qvalue, num_qvalue, device, td_est):
 
         torch.manual_seed(self.seed)
@@ -1863,12 +1863,12 @@ def test_redq(self, delay_qvalue, num_qvalue, device, td_est):
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -2042,7 +2042,7 @@ def test_redq_shared(self, delay_qvalue, num_qvalue, device):
     @pytest.mark.parametrize("delay_qvalue", (True, False))
     @pytest.mark.parametrize("num_qvalue", [1, 2, 4, 8])
     @pytest.mark.parametrize("device", get_available_devices())
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_redq_batched(self, delay_qvalue, num_qvalue, device, td_est):
 
         torch.manual_seed(self.seed)
@@ -2058,12 +2058,12 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device, td_est):
             loss_function="l2",
             delay_qvalue=delay_qvalue,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         loss_class_deprec = (
             REDQLoss_deprecated if not delay_qvalue else DoubleREDQLoss_deprecated
@@ -2074,12 +2074,12 @@ def test_redq_batched(self, delay_qvalue, num_qvalue, device, td_est):
             num_qvalue_nets=num_qvalue,
             loss_function="l2",
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn_deprec.make_value_function(td_est)
+                loss_fn_deprec.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn_deprec.make_value_function(td_est)
+            loss_fn_deprec.make_value_estimator(td_est)
 
         td_clone1 = td.clone()
         td_clone2 = td.clone()
@@ -2309,7 +2309,7 @@ def _create_seq_mock_data_ppo(
     @pytest.mark.parametrize("gradient_mode", (True, False))
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_ppo(self, loss_class, device, gradient_mode, advantage, td_est):
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_ppo(device=device)
@@ -2321,11 +2321,11 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage, td_est):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage is None:
@@ -2338,7 +2338,7 @@ def test_ppo(self, loss_class, device, gradient_mode, advantage, td_est):
             advantage(td)
         else:
             if td_est is not None:
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
 
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
@@ -2387,12 +2387,12 @@ def test_ppo_shared(self, loss_class, device, advantage):
                 value_network=value,
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=0.9,
                 value_network=value,
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9,
                 lmbda=0.9,
                 value_network=value,
@@ -2461,11 +2461,11 @@ def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage is None:
@@ -2585,7 +2585,7 @@ def _create_seq_mock_data_a2c(
     @pytest.mark.parametrize("gradient_mode", (True, False))
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda", None))
     @pytest.mark.parametrize("device", get_available_devices())
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_a2c(self, device, gradient_mode, advantage, td_est):
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_a2c(device=device)
@@ -2597,11 +2597,11 @@ def test_a2c(self, device, gradient_mode, advantage, td_est):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage is None:
@@ -2624,7 +2624,7 @@ def test_a2c(self, device, gradient_mode, advantage, td_est):
         if advantage is not None:
             advantage(td)
         elif td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
         loss = loss_fn(td)
         loss_critic = loss["loss_critic"]
         loss_objective = loss["loss_objective"] + loss.get("loss_entropy", 0.0)
@@ -2673,11 +2673,11 @@ def test_a2c_diff(self, device, gradient_mode, advantage):
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9, lmbda=0.9, value_network=value, differentiable=gradient_mode
             )
         elif advantage is None:
@@ -2724,7 +2724,7 @@ class TestReinforce:
     @pytest.mark.parametrize("delay_value", [True, False])
     @pytest.mark.parametrize("gradient_mode", [True, False])
     @pytest.mark.parametrize("advantage", ["gae", "td", "td_lambda", None])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_reinforce_value_net(self, advantage, gradient_mode, delay_value, td_est):
         n_obs = 3
         n_act = 5
@@ -2748,13 +2748,13 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value, td_est
                 differentiable=gradient_mode,
             )
         elif advantage == "td":
-            advantage = TD1Estimate(
+            advantage = TD1Estimator(
                 gamma=gamma,
                 value_network=get_functional(value_net),
                 differentiable=gradient_mode,
             )
         elif advantage == "td_lambda":
-            advantage = TDLambdaEstimate(
+            advantage = TDLambdaEstimator(
                 gamma=0.9,
                 lmbda=0.9,
                 value_network=get_functional(value_net),
@@ -2788,7 +2788,7 @@ def test_reinforce_value_net(self, advantage, gradient_mode, delay_value, td_est
         if advantage is not None:
             advantage(td, params=params)
         elif td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
         loss_td = loss_fn(td)
         autograd.grad(
             loss_td.get("loss_actor"),
@@ -3144,7 +3144,7 @@ def test_dreamer_env(self, device, imagination_horizon, discount_loss):
 
     @pytest.mark.parametrize("imagination_horizon", [3, 5])
     @pytest.mark.parametrize("discount_loss", [True, False])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_dreamer_actor(self, device, imagination_horizon, discount_loss, td_est):
         tensordict = self._create_actor_data(2, 3, 10, 5).to(device)
         mb_env = self._create_mb_env(10, 5).to(device)
@@ -3157,12 +3157,12 @@ def test_dreamer_actor(self, device, imagination_horizon, discount_loss, td_est)
             imagination_horizon=imagination_horizon,
             discount_loss=discount_loss,
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_module.make_value_function(td_est)
+                loss_module.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_module.make_value_function(td_est)
+            loss_module.make_value_estimator(td_est)
         loss_td, fake_data = loss_module(tensordict)
         assert not fake_data.requires_grad
         assert fake_data.shape == torch.Size([tensordict.numel(), imagination_horizon])
@@ -3326,7 +3326,7 @@ def _create_seq_mock_data_iql(
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("temperature", [0.0, 0.1, 1.0, 10.0])
     @pytest.mark.parametrize("expectile", [0.1, 0.5, 1.0])
-    @pytest.mark.parametrize("td_est", list(ValueFunctions) + [None])
+    @pytest.mark.parametrize("td_est", list(ValueEstimators) + [None])
     def test_iql(
         self,
         num_qvalue,
@@ -3352,12 +3352,12 @@ def test_iql(
             expectile=expectile,
             loss_function="l2",
         )
-        if td_est is ValueFunctions.GAE:
+        if td_est is ValueEstimators.GAE:
             with pytest.raises(NotImplementedError):
-                loss_fn.make_value_function(td_est)
+                loss_fn.make_value_estimator(td_est)
             return
         if td_est is not None:
-            loss_fn.make_value_function(td_est)
+            loss_fn.make_value_estimator(td_est)
 
         with _check_td_steady(td):
             loss = loss_fn(td)
@@ -4289,8 +4289,8 @@ class TestAdv:
         "adv,kwargs",
         [
             [GAE, {"lmbda": 0.95}],
-            [TD1Estimate, {}],
-            [TDLambdaEstimate, {"lmbda": 0.95}],
+            [TD1Estimator, {}],
+            [TDLambdaEstimator, {"lmbda": 0.95}],
         ],
     )
     def test_dispatch(
@@ -4321,8 +4321,8 @@ def test_dispatch(
         "adv,kwargs",
         [
             [GAE, {"lmbda": 0.95}],
-            [TD1Estimate, {}],
-            [TDLambdaEstimate, {"lmbda": 0.95}],
+            [TD1Estimator, {}],
+            [TDLambdaEstimator, {"lmbda": 0.95}],
         ],
     )
     def test_diff_reward(
@@ -4362,8 +4362,8 @@ def test_diff_reward(
         "adv,kwargs",
         [
             [GAE, {"lmbda": 0.95}],
-            [TD1Estimate, {}],
-            [TDLambdaEstimate, {"lmbda": 0.95}],
+            [TD1Estimator, {}],
+            [TDLambdaEstimator, {"lmbda": 0.95}],
         ],
     )
     def test_non_differentiable(self, adv, kwargs):
diff --git a/test/test_modules.py b/test/test_modules.py
index 78f4f61c39b..ab3ee0303c6 100644
--- a/test/test_modules.py
+++ b/test/test_modules.py
@@ -37,7 +37,7 @@
 )
 from torchrl.modules.models.utils import SquashDims
 from torchrl.modules.planners.mppi import MPPIPlanner
-from torchrl.objectives.value import TDLambdaEstimate
+from torchrl.objectives.value import TDLambdaEstimator
 
 
 @pytest.fixture
@@ -477,7 +477,7 @@ def test_MPPI(self, device, batch_size, seed=1):
         env = MockBatchedUnLockedEnv(device=device)
         value_net = nn.LazyLinear(1, device=device)
         value_net = ValueOperator(value_net, in_keys=["observation"])
-        advantage_module = TDLambdaEstimate(
+        advantage_module = TDLambdaEstimator(
             0.99,
             0.95,
             value_net,
diff --git a/torchrl/modules/planners/mppi.py b/torchrl/modules/planners/mppi.py
index 21fb53fae00..bd95e2de5c8 100644
--- a/torchrl/modules/planners/mppi.py
+++ b/torchrl/modules/planners/mppi.py
@@ -46,7 +46,7 @@ class MPPIPlanner(MPCPlannerBase):
         >>> from torchrl.data import CompositeSpec, NdUnboundedContinuousTensorSpec
         >>> from torchrl.envs.model_based import ModelBasedEnvBase
         >>> from torchrl.modules import TensorDictModule, ValueOperator
-        >>> from torchrl.objectives.value import TDLambdaEstimate
+        >>> from torchrl.objectives.value import TDLambdaEstimator
         >>> class MyMBEnv(ModelBasedEnvBase):
         ...     def __init__(self, world_model, device="cpu", dtype=None, batch_size=None):
         ...         super().__init__(world_model, device=device, dtype=dtype, batch_size=batch_size)
@@ -87,7 +87,7 @@ class MPPIPlanner(MPCPlannerBase):
         >>> env = MyMBEnv(world_model)
         >>> value_net = nn.Linear(4, 1)
         >>> value_net = ValueOperator(value_net, in_keys=["hidden_observation"])
-        >>> adv = TDLambdaEstimate(
+        >>> adv = TDLambdaEstimator(
         ...     0.99,
         ...     0.95,
         ...     value_net,
diff --git a/torchrl/objectives/__init__.py b/torchrl/objectives/__init__.py
index e74ccbac808..70d794e6495 100644
--- a/torchrl/objectives/__init__.py
+++ b/torchrl/objectives/__init__.py
@@ -22,7 +22,7 @@
     hold_out_params,
     next_state_value,
     SoftUpdate,
-    ValueFunctions,
+    ValueEstimators,
 )
 
 # from .value import bellman_max, c_val, dv_val, vtrace, GAE, TDLambdaEstimate, TDEstimate
diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
index a8bdade55b6..af91ce86385 100644
--- a/torchrl/objectives/a2c.py
+++ b/torchrl/objectives/a2c.py
@@ -15,9 +15,9 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import GAE, TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
 class A2CLoss(LossModule):
@@ -57,13 +57,13 @@ class A2CLoss(LossModule):
       If the advantage key (``"advantage`` by default) is not present in the
       input tensordict, the advantage will be computed by the :meth:`~.forward`
       method.
-      A custom advantage module can be built using :meth:`~.make_value_function`.
+      A custom advantage module can be built using :meth:`~.make_value_estimator`.
       The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
       dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
 
     """
 
-    default_value_type: ValueFunctions = ValueFunctions.GAE
+    default_value_estimator: ValueEstimators = ValueEstimators.GAE
 
     def __init__(
         self,
@@ -171,26 +171,26 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             td_out.set("loss_critic", loss_critic.mean())
         return td_out
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         value_key = "state_value"
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             self._value_function = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 9f1605ebb4d..b6dd85119e9 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -18,8 +18,8 @@
 from torch.nn import Parameter
 
 from torchrl.modules.utils import Buffer
-from torchrl.objectives.utils import ValueFunctions
-from torchrl.objectives.value import ValueFunctionBase
+from torchrl.objectives.utils import ValueEstimators
+from torchrl.objectives.value import ValueEstimatorBase
 
 _has_functorch = False
 try:
@@ -51,10 +51,10 @@ class LossModule(nn.Module):
         Losses that require a value estimation are equipped with a default value
         pointer. This class attribute indicates which value estimator will be
         used if none other is specified.
-        The value estimator can be changed using the :meth:`~.make_value_function` method.
+        The value estimator can be changed using the :meth:`~.make_value_estimator` method.
     """
 
-    default_value_function: ValueFunctions = None
+    default_value_estimator: ValueEstimators = None
 
     def __init__(self):
         super().__init__()
@@ -364,11 +364,11 @@ def cpu(self) -> LossModule:
         return self.to(torch.device("cpu"))
 
     @property
-    def value_function(self) -> ValueFunctionBase:
+    def value_function(self) -> ValueEstimatorBase:
         """The value function blends in the reward and value estimate(s) from upcoming state(s)/state-action pair(s) into a target value estimate for the value network."""
         out = self._value_function
         if out is None:
-            self._default_value_function()
+            self._default_value_estimator()
             return self._value_function
         return out
 
@@ -376,23 +376,23 @@ def value_function(self) -> ValueFunctionBase:
     def value_function(self, value):
         self._value_function = value
 
-    def _default_value_function(self):
+    def _default_value_estimator(self):
         """A value-function constructor when none is provided.
 
         No kwarg should be present as default parameters should be retrieved
         from :obj:`torchrl.objectives.utils.DEFAULT_VALUE_FUN_PARAMS`.
 
         """
-        self.make_value_function(self.default_value_function)
+        self.make_value_estimator(self.default_value_estimator)
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         """Value-function constructor.
 
         If the non-default value function is wanted, it must be built using
         this method.
 
         Args:
-            value_type (ValueFunctions): A :class:`torchrl.objectives.utils.ValueFunctions`
+            value_type (ValueEstimators): A :class:`torchrl.objectives.utils.ValueFunctions`
                 enum type indicating the value function to use.
             **hyperparams: hyperparameters to use for the value function.
                 If not provided, the value indicated by
@@ -402,24 +402,24 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         Examples:
             >>> # initialize the DQN loss
             >>> dqn_loss = DQNLoss(actor)
-            >>> dqn_loss.make_value_function(
-            ...     ValueFunctions.TD1,
+            >>> dqn_loss.make_value_estimator(
+            ...     ValueEstimators.TD1,
             ...     gamma=0.9)
 
         """
-        if value_type == ValueFunctions.TD1:
+        if value_type == ValueEstimators.TD1:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TD0:
+        elif value_type == ValueEstimators.TD0:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TDLambda:
+        elif value_type == ValueEstimators.TDLambda:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index f8c6e83a621..f87a5cc5423 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -20,12 +20,12 @@
     default_value_kwargs,
     distance_loss,
     hold_out_params,
-    ValueFunctions,
+    ValueEstimators,
 )
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
-from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
 class DDPGLoss(LossModule):
@@ -41,7 +41,7 @@ class DDPGLoss(LossModule):
             data collection. Default is ``False``.
     """
 
-    default_value_type: ValueFunctions = ValueFunctions.TD0
+    default_value_estimator: ValueEstimators = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -180,26 +180,26 @@ def _loss_value(
 
         return loss_value, (pred_val - target_value).pow(2), pred_val, target_value
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_action_value"
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py
index 0a518bff304..17e7e346808 100644
--- a/torchrl/objectives/deprecated.py
+++ b/torchrl/objectives/deprecated.py
@@ -19,11 +19,11 @@
     default_value_kwargs,
     distance_loss,
     hold_out_params,
-    ValueFunctions,
+    ValueEstimators,
 )
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import _GAMMA_LMBDA_DEPREC_WARNING
-from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 try:
     from functorch import vmap
@@ -77,7 +77,7 @@ class REDQLoss_deprecated(LossModule):
     """
 
     delay_actor: bool = False
-    default_value_type = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -281,27 +281,27 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 01dbcc98182..77017064509 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -18,10 +18,10 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
-from .value import TDLambdaEstimate
-from .value.advantages import TD0Estimate, TD1Estimate
+from .value import TDLambdaEstimator
+from .value.advantages import TD0Estimator, TD1Estimator
 
 
 class DQNLoss(LossModule):
@@ -35,7 +35,7 @@ class DQNLoss(LossModule):
 
     """
 
-    default_value_type = ValueFunctions.TDLambda
+    default_value_estimator = ValueEstimators.TDLambda
 
     def __init__(
         self,
@@ -69,33 +69,33 @@ def __init__(
             warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
             self.gamma = gamma
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
-        if value_type is ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type is ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
                 value_target_key="value_target",
                 value_key="chosen_action_value",
             )
-        elif value_type is ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type is ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
                 value_target_key="value_target",
                 value_key="chosen_action_value",
             )
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type is ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type is ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
@@ -359,24 +359,24 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
         loss_td = TensorDict({"loss": loss.mean()}, [])
         return loss_td
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
-        if value_type is ValueFunctions.TD1:
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
+        if value_type is ValueEstimators.TD1:
             raise NotImplementedError(
                 f"value type {value_type} is not implemented for {self.__class__.__name__}."
             )
-        elif value_type is ValueFunctions.TD0:
+        elif value_type is ValueEstimators.TD0:
             # see forward call
             pass
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
                 f"value type {value_type} is not implemented for {self.__class__.__name__}."
             )
-        elif value_type is ValueFunctions.TDLambda:
+        elif value_type is ValueEstimators.TDLambda:
             raise NotImplementedError(
                 f"value type {value_type} is not implemented for {self.__class__.__name__}."
             )
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
 
-    def _default_value_function(self):
-        self.make_value_function(ValueFunctions.TD0)
+    def _default_value_estimator(self):
+        self.make_value_estimator(ValueEstimators.TD0)
diff --git a/torchrl/objectives/dreamer.py b/torchrl/objectives/dreamer.py
index 146686ed03c..03f215d0953 100644
--- a/torchrl/objectives/dreamer.py
+++ b/torchrl/objectives/dreamer.py
@@ -17,9 +17,9 @@
     default_value_kwargs,
     distance_loss,
     hold_out_net,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
 class DreamerModelLoss(LossModule):
@@ -155,7 +155,7 @@ class DreamerActorLoss(LossModule):
 
     """
 
-    default_value_type = ValueFunctions.TDLambda
+    default_value_estimator = ValueEstimators.TDLambda
 
     def __init__(
         self,
@@ -230,37 +230,37 @@ def lambda_target(self, reward: torch.Tensor, value: torch.Tensor) -> torch.Tens
         )
         return self.value_function.value_estimate(input_tensordict)
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_net = None
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
-        if value_type is ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type is ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type is ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             if hasattr(self, "lmbda"):
                 hp["lmbda"] = self.lmbda
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type is ValueFunctions.TDLambda:
+        elif value_type is ValueEstimators.TDLambda:
             if hasattr(self, "lmbda"):
                 hp["lmbda"] = self.lmbda
-            self._value_function = TDLambdaEstimate(
+            self._value_function = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
index 26d7c1a8c2f..ca1ea2a01a0 100644
--- a/torchrl/objectives/iql.py
+++ b/torchrl/objectives/iql.py
@@ -15,12 +15,12 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
 
 from ..envs.utils import set_exploration_mode
 from .common import LossModule
-from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 try:
     from functorch import vmap
@@ -57,7 +57,7 @@ class IQLLoss(LossModule):
 
     """
 
-    default_value_type = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -243,7 +243,7 @@ def _loss_qvalue(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         )
         return loss_qval, td_error.detach().max(0)[0]
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_net = self.value_network
 
         value_key = "state_value"
@@ -251,26 +251,26 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
-        if value_type is ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type is ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type is ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type is ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type is ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
index 3bb97d9a371..c3b89b45bb6 100644
--- a/torchrl/objectives/ppo.py
+++ b/torchrl/objectives/ppo.py
@@ -15,11 +15,11 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
 
 from .common import LossModule
-from .value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import GAE, TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
 class PPOLoss(LossModule):
@@ -80,18 +80,18 @@ class PPOLoss(LossModule):
         >>> advantage(data)
         >>> losses = ppo_loss(data)
 
-      A custom advantage module can be built using :meth:`~.make_value_function`.
+      A custom advantage module can be built using :meth:`~.make_value_estimator`.
       The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
       dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
 
         >>> ppo_loss = PPOLoss(actor, critic)
-        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> ppo_loss.make_value_estimator(ValueEstimators.TDLambda)
         >>> data = next(datacollector)
         >>> losses = ppo_loss(data)
 
     """
 
-    default_value_type = ValueFunctions.GAE
+    default_value_estimator = ValueEstimators.GAE
 
     def __init__(
         self,
@@ -213,26 +213,26 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             td_out.set("loss_critic", loss_critic.mean())
         return td_out
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             self._value_function = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
@@ -287,12 +287,12 @@ class ClipPPOLoss(PPOLoss):
         >>> advantage(data)
         >>> losses = ppo_loss(data)
 
-      A custom advantage module can be built using :meth:`~.make_value_function`.
+      A custom advantage module can be built using :meth:`~.make_value_estimator`.
       The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
       dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
 
         >>> ppo_loss = ClipPPOLoss(actor, critic)
-        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> ppo_loss.make_value_estimator(ValueEstimators.TDLambda)
         >>> data = next(datacollector)
         >>> losses = ppo_loss(data)
 
@@ -442,12 +442,12 @@ class KLPENPPOLoss(PPOLoss):
         >>> advantage(data)
         >>> losses = ppo_loss(data)
 
-      A custom advantage module can be built using :meth:`~.make_value_function`.
+      A custom advantage module can be built using :meth:`~.make_value_estimator`.
       The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
       dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
 
         >>> ppo_loss = KLPENPPOLoss(actor, critic)
-        >>> ppo_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> ppo_loss.make_value_estimator(ValueEstimators.TDLambda)
         >>> data = next(datacollector)
         >>> losses = ppo_loss(data)
 
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
index fe717c77cd7..dc7b146b142 100644
--- a/torchrl/objectives/redq.py
+++ b/torchrl/objectives/redq.py
@@ -20,9 +20,9 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 try:
     from functorch import vmap
@@ -76,7 +76,7 @@ class REDQLoss(LossModule):
     """
 
     delay_actor: bool = False
-    default_value_type = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -322,27 +322,27 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/reinforce.py b/torchrl/objectives/reinforce.py
index 2c972291455..98384444298 100644
--- a/torchrl/objectives/reinforce.py
+++ b/torchrl/objectives/reinforce.py
@@ -14,9 +14,9 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value import GAE, TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import GAE, TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 
 class ReinforceLoss(LossModule):
@@ -55,18 +55,18 @@ class ReinforceLoss(LossModule):
         >>> advantage(data)
         >>> losses = reinforce_loss(data)
 
-      A custom advantage module can be built using :meth:`~.make_value_function`.
+      A custom advantage module can be built using :meth:`~.make_value_estimator`.
       The default is :class:`torchrl.objectives.value.GAE` with hyperparameters
       dictated by :func:`torchrl.objectives.utils.default_value_kwargs`.
 
         >>> reinforce_loss = ReinforceLoss(actor, critic)
-        >>> reinforce_loss.make_value_function(ValueFunctions.TDLambda)
+        >>> reinforce_loss.make_value_estimator(ValueEstimators.TDLambda)
         >>> data = next(datacollector)
         >>> losses = reinforce_loss(data)
 
     """
 
-    default_value_type = ValueFunctions.GAE
+    default_value_estimator = ValueEstimators.GAE
 
     def __init__(
         self,
@@ -153,26 +153,26 @@ def loss_critic(self, tensordict: TensorDictBase) -> torch.Tensor:
             )
         return loss_value
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_value"
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             self._value_function = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index f24d1f13b3c..3d82d141af6 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -19,12 +19,12 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
 
 from ..envs.utils import set_exploration_mode, step_mdp
 from .common import LossModule
-from .value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from .value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 try:
     from functorch import vmap
@@ -83,7 +83,7 @@ class SACLoss(LossModule):
             Default is ``False``.
     """
 
-    default_value_type = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -189,7 +189,7 @@ def __init__(
             warnings.warn(_GAMMA_LMBDA_DEPREC_WARNING)
             self.gamma = gamma
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         if self._version == 1:
             value_net = self.actor_critic
         elif self._version == 2:
@@ -202,26 +202,26 @@ def make_value_function(self, value_type: ValueFunctions, **hyperparams):
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
-        if value_type is ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type is ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type is ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type is ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type is ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -495,7 +495,7 @@ class DiscreteSACLoss(LossModule):
 
     """
 
-    default_value_type = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
     delay_actor: bool = False
 
     def __init__(
@@ -729,33 +729,33 @@ def _loss_alpha(self, log_pi: Tensor) -> Tensor:
             alpha_loss = torch.zeros_like(log_pi)
         return alpha_loss
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_net = None
         value_key = "state_value"
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
-        if value_type is ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type is ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type is ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
-        elif value_type is ValueFunctions.GAE:
+        elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type is ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type is ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index 1ef911a8891..1f8ed97e37f 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -15,9 +15,9 @@
     _GAMMA_LMBDA_DEPREC_WARNING,
     default_value_kwargs,
     distance_loss,
-    ValueFunctions,
+    ValueEstimators,
 )
-from torchrl.objectives.value import TD0Estimate, TD1Estimate, TDLambdaEstimate
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
 
 try:
     from functorch import vmap
@@ -56,7 +56,7 @@ class TD3Loss(LossModule):
             for data collection. Default is ``False``.
     """
 
-    default_value_function = ValueFunctions.TD0
+    default_value_estimator = ValueEstimators.TD0
 
     def __init__(
         self,
@@ -229,27 +229,27 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
         return td_out
 
-    def make_value_function(self, value_type: ValueFunctions, **hyperparams):
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         value_key = "state_action_value"
         # we do not need a value network bc the next state value is already passed
-        if value_type == ValueFunctions.TD1:
-            self._value_function = TD1Estimate(
+        if value_type == ValueEstimators.TD1:
+            self._value_function = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.TD0:
-            self._value_function = TD0Estimate(
+        elif value_type == ValueEstimators.TD0:
+            self._value_function = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
-        elif value_type == ValueFunctions.GAE:
+        elif value_type == ValueEstimators.GAE:
             raise NotImplementedError(
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
-        elif value_type == ValueFunctions.TDLambda:
-            self._value_function = TDLambdaEstimate(
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_function = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 09edf449e08..63891c0fe27 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -18,11 +18,11 @@
 _GAMMA_LMBDA_DEPREC_WARNING = (
     "Passing gamma / lambda parameters through the loss constructor "
     "is deprecated and will be removed soon. To customize your value function, "
-    "run `loss_module.make_value_function(ValueFunctions.<value_fun>, gamma=val)`."
+    "run `loss_module.make_value_estimator(ValueFunctions.<value_fun>, gamma=val)`."
 )
 
 
-class ValueFunctions(Enum):
+class ValueEstimators(Enum):
     """Value function enumerator for custom-built estimators.
 
     Allows for a flexible usage of various value functions when the loss module
@@ -30,7 +30,7 @@ class ValueFunctions(Enum):
 
     Examples:
         >>> dqn_loss = DQNLoss(actor)
-        >>> dqn_loss.make_value_function(ValueFunctions.TD0, gamma=0.9)
+        >>> dqn_loss.make_value_estimator(ValueEstimators.TD0, gamma=0.9)
 
     """
 
@@ -40,7 +40,7 @@ class ValueFunctions(Enum):
     GAE = 4
 
 
-def default_value_kwargs(value_type: ValueFunctions):
+def default_value_kwargs(value_type: ValueEstimators):
     """Default value function keyword argument generator.
 
     Args:
@@ -48,17 +48,17 @@ def default_value_kwargs(value_type: ValueFunctions):
         :class:`torchrl.objectives.utils.ValueFunctions` class.
 
     Examples:
-        >>> kwargs = default_value_kwargs(ValueFunctions.TDLambda)
+        >>> kwargs = default_value_kwargs(ValueEstimators.TDLambda)
         {"gamma": 0.99, "lmbda": 0.95}
 
     """
-    if value_type == ValueFunctions.TD1:
+    if value_type == ValueEstimators.TD1:
         return {"gamma": 0.99}
-    elif value_type == ValueFunctions.TD0:
+    elif value_type == ValueEstimators.TD0:
         return {"gamma": 0.99}
-    elif value_type == ValueFunctions.GAE:
+    elif value_type == ValueEstimators.GAE:
         return {"gamma": 0.99, "lmbda": 0.95}
-    elif value_type == ValueFunctions.TDLambda:
+    elif value_type == ValueEstimators.TDLambda:
         return {"gamma": 0.99, "lmbda": 0.95}
     else:
         raise NotImplementedError(f"Unknown value type {value_type}.")
diff --git a/torchrl/objectives/value/__init__.py b/torchrl/objectives/value/__init__.py
index ef224940ddf..11ae2e6d9e2 100644
--- a/torchrl/objectives/value/__init__.py
+++ b/torchrl/objectives/value/__init__.py
@@ -6,7 +6,10 @@
 from .advantages import (
     GAE,
     TD0Estimate,
+    TD0Estimator,
     TD1Estimate,
+    TD1Estimator,
     TDLambdaEstimate,
-    ValueFunctionBase,
+    TDLambdaEstimator,
+    ValueEstimatorBase,
 )
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 6695fbc2488..a8f681c2163 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -3,6 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import abc
+import warnings
+from copy import deepcopy
 from functools import wraps
 from typing import Callable, List, Optional, Tuple, Union
 
@@ -31,7 +33,7 @@ def new_fun(self, *args, **kwargs):
     return new_fun
 
 
-class ValueFunctionBase(nn.Module):
+class ValueEstimatorBase(nn.Module):
     """An abstract parent class for value function modules.
 
     Its :meth:`ValueFunctionBase.forward` method will compute the value (given
@@ -111,7 +113,7 @@ def is_stateless(self):
         return self.value_network._is_stateless
 
 
-class TD0Estimate(ValueFunctionBase):
+class TD0Estimator(ValueEstimatorBase):
     """Myopic Temporal Difference (TD(0)) estimate of advantage function.
 
     Args:
@@ -293,7 +295,7 @@ def value_estimate(
         return value_target
 
 
-class TD1Estimate(ValueFunctionBase):
+class TD1Estimator(ValueEstimatorBase):
     """Bootstrapped Temporal Difference (TD(1)) estimate of advantage function.
 
     Args:
@@ -475,7 +477,7 @@ def value_estimate(
         return value_target
 
 
-class TDLambdaEstimate(ValueFunctionBase):
+class TDLambdaEstimator(ValueEstimatorBase):
     r"""TD(:math:`\lambda`) estimate of advantage function.
 
     Args:
@@ -577,7 +579,7 @@ def forward(
             >>> value_net = TensorDictModule(
             ...     nn.Linear(3, 1), in_keys=["obs"], out_keys=["state_value"]
             ... )
-            >>> module = TDLambdaEstimate(
+            >>> module = TDLambdaEstimator(
             ...     gamma=0.98,
             ...     lmbda=0.94,
             ...     value_network=value_net,
@@ -596,7 +598,7 @@ def forward(
             >>> value_net = TensorDictModule(
             ...     nn.Linear(3, 1), in_keys=["obs"], out_keys=["state_value"]
             ... )
-            >>> module = TDLambdaEstimate(
+            >>> module = TDLambdaEstimator(
             ...     gamma=0.98,
             ...     lmbda=0.94,
             ...     value_network=value_net,
@@ -672,7 +674,7 @@ def value_estimate(
         return val
 
 
-class GAE(ValueFunctionBase):
+class GAE(ValueEstimatorBase):
     """A class wrapper around the generalized advantage estimate functional.
 
     Refer to "HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION"
@@ -914,3 +916,20 @@ def value_estimate(
             gamma, lmbda, value, next_value, reward, done
         )
         return value_target
+
+
+def _deprecate_class(cls, new_cls):
+    @wraps(cls.__init__)
+    def new_init(self, *args, **kwargs):
+        warnings.warn(f"class {cls} is deprecated, please use {new_cls} instead.")
+        cls.__init__(self, *args, **kwargs)
+
+    cls.__init__ = new_init
+
+
+TD0Estimate = deepcopy(TD0Estimator)
+_deprecate_class(TD0Estimate, TD0Estimator)
+TD1Estimate = deepcopy(TD1Estimator)
+_deprecate_class(TD1Estimate, TD1Estimator)
+TDLambdaEstimate = deepcopy(TDLambdaEstimator)
+_deprecate_class(TDLambdaEstimate, TDLambdaEstimator)

From f00803445385dc200ad3f237bf427df6022eb1fd Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 13:57:05 +0100
Subject: [PATCH 23/89] amend

---
 docs/source/reference/objectives.rst   |  15 +-
 test/test_cost.py                      | 176 +++++++++-
 test/test_modules.py                   |   6 +-
 torchrl/objectives/a2c.py              |  10 +-
 torchrl/objectives/common.py           |  14 +-
 torchrl/objectives/ddpg.py             |   8 +-
 torchrl/objectives/deprecated.py       |   8 +-
 torchrl/objectives/dqn.py              |  10 +-
 torchrl/objectives/dreamer.py          |  10 +-
 torchrl/objectives/iql.py              |   8 +-
 torchrl/objectives/ppo.py              |  14 +-
 torchrl/objectives/redq.py             |  10 +-
 torchrl/objectives/reinforce.py        |  10 +-
 torchrl/objectives/sac.py              |  24 +-
 torchrl/objectives/td3.py              |   8 +-
 torchrl/objectives/utils.py            |   8 +-
 torchrl/objectives/value/advantages.py |  19 +-
 torchrl/objectives/value/functional.py | 436 ++++++++++++++++++++-----
 18 files changed, 632 insertions(+), 162 deletions(-)

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index 01a7379e298..ba91adc2f5e 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -188,13 +188,18 @@ Returns
     TD1Estimator
     TDLambdaEstimator
     GAE
-    functional.generalized_advantage_estimate
-    functional.vec_generalized_advantage_estimate
-    functional.vec_td_lambda_return_estimate
-    functional.vec_td_lambda_advantage_estimate
+    functional.td0_return_estimate
+    functional.td0_advantage_estimate
+    functional.td1_return_estimate
+    functional.vec_td1_return_estimate
+    functional.td1_advantage_estimate
+    functional.vec_td1_advantage_estimate
     functional.td_lambda_return_estimate
+    functional.vec_td_lambda_return_estimate
     functional.td_lambda_advantage_estimate
-    functional.td_advantage_estimate
+    functional.vec_td_lambda_advantage_estimate
+    functional.generalized_advantage_estimate
+    functional.vec_generalized_advantage_estimate
 
 
 Utils
diff --git a/test/test_cost.py b/test/test_cost.py
index 0aa87bcb2c4..7ebb33a6771 100644
--- a/test/test_cost.py
+++ b/test/test_cost.py
@@ -96,9 +96,11 @@
 from torchrl.objectives.value.advantages import GAE, TD1Estimator, TDLambdaEstimator
 from torchrl.objectives.value.functional import (
     generalized_advantage_estimate,
-    td_advantage_estimate,
+    td0_advantage_estimate,
+    td1_advantage_estimate,
     td_lambda_advantage_estimate,
     vec_generalized_advantage_estimate,
+    vec_td1_advantage_estimate,
     vec_td_lambda_advantage_estimate,
 )
 from torchrl.objectives.value.utils import _custom_conv1d, _make_gammas_tensor
@@ -3727,6 +3729,30 @@ def test_tdlambda(self, device, gamma, lmbda, N, T, random_gamma, rolling_gamma)
         )
         torch.testing.assert_close(r1, r2, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("gamma", [0.1, 0.5, 0.99])
+    @pytest.mark.parametrize("N", [(3,), (7, 3)])
+    @pytest.mark.parametrize("T", [3, 5, 200])
+    # @pytest.mark.parametrize("random_gamma,rolling_gamma", [[True, False], [True, True], [False, None]])
+    @pytest.mark.parametrize("random_gamma,rolling_gamma", [[False, None]])
+    def test_td1(self, device, gamma, N, T, random_gamma, rolling_gamma):
+        torch.manual_seed(0)
+
+        done = torch.zeros(*N, T, 1, device=device, dtype=torch.bool).bernoulli_(0.1)
+        reward = torch.randn(*N, T, 1, device=device)
+        state_value = torch.randn(*N, T, 1, device=device)
+        next_state_value = torch.randn(*N, T, 1, device=device)
+        if random_gamma:
+            gamma = torch.rand_like(reward) * gamma
+
+        r1 = vec_td1_advantage_estimate(
+            gamma, state_value, next_state_value, reward, done, rolling_gamma
+        )
+        r2 = td1_advantage_estimate(
+            gamma, state_value, next_state_value, reward, done, rolling_gamma
+        )
+        torch.testing.assert_close(r1, r2, rtol=1e-4, atol=1e-4)
+
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("gamma", [0.99, 0.5, 0.1])
     @pytest.mark.parametrize("lmbda", [0.99, 0.5, 0.1])
@@ -3796,6 +3822,49 @@ def test_tdlambda_tensor_gamma(self, device, gamma, lmbda, N, T, has_done):
 
         torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("gamma", [0.5, 0.99, 0.1])
+    @pytest.mark.parametrize("N", [(3,), (7, 3)])
+    @pytest.mark.parametrize("T", [3, 5, 200])
+    @pytest.mark.parametrize("has_done", [True, False])
+    def test_td1_tensor_gamma(self, device, gamma, N, T, has_done):
+        """Tests vec_td_lambda_advantage_estimate against itself with
+        gamma being a tensor or a scalar
+
+        """
+        torch.manual_seed(0)
+
+        done = torch.zeros(*N, T, 1, device=device, dtype=torch.bool)
+        if has_done:
+            done = done.bernoulli_(0.1)
+        reward = torch.randn(*N, T, 1, device=device)
+        state_value = torch.randn(*N, T, 1, device=device)
+        next_state_value = torch.randn(*N, T, 1, device=device)
+
+        gamma_tensor = torch.full((*N, T, 1), gamma, device=device)
+
+        v1 = vec_td1_advantage_estimate(
+            gamma, state_value, next_state_value, reward, done
+        )
+        v2 = vec_td1_advantage_estimate(
+            gamma_tensor, state_value, next_state_value, reward, done
+        )
+
+        torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
+
+        # # same with last done being true
+        done[..., -1, :] = True  # terminating trajectory
+        gamma_tensor[..., -1, :] = 0.0
+
+        v1 = vec_td1_advantage_estimate(
+            gamma, state_value, next_state_value, reward, done
+        )
+        v2 = vec_td1_advantage_estimate(
+            gamma_tensor, state_value, next_state_value, reward, done
+        )
+
+        torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
+
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("gamma", [0.5, 0.99, 0.1])
     @pytest.mark.parametrize("lmbda", [0.1, 0.5, 0.99])
@@ -3843,6 +3912,48 @@ def test_vectdlambda_tensor_gamma(
 
         torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("gamma", [0.5, 0.99, 0.1])
+    @pytest.mark.parametrize("N", [(3,), (7, 3)])
+    @pytest.mark.parametrize("T", [3, 5, 50])
+    @pytest.mark.parametrize("has_done", [True, False])
+    def test_vectd1_tensor_gamma(
+        self, device, gamma, N, T, dtype_fixture, has_done  # noqa
+    ):
+        """Tests td_lambda_advantage_estimate against vec_td_lambda_advantage_estimate
+        with gamma being a tensor or a scalar
+
+        """
+
+        torch.manual_seed(0)
+
+        done = torch.zeros(*N, T, 1, device=device, dtype=torch.bool)
+        if has_done:
+            done = done.bernoulli_(0.1)
+        reward = torch.randn(*N, T, 1, device=device)
+        state_value = torch.randn(*N, T, 1, device=device)
+        next_state_value = torch.randn(*N, T, 1, device=device)
+
+        gamma_tensor = torch.full((*N, T, 1), gamma, device=device)
+
+        v1 = td1_advantage_estimate(gamma, state_value, next_state_value, reward, done)
+        v2 = vec_td1_advantage_estimate(
+            gamma_tensor, state_value, next_state_value, reward, done
+        )
+
+        torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
+
+        # same with last done being true
+        done[..., -1, :] = True  # terminating trajectory
+        gamma_tensor[..., -1, :] = 0.0
+
+        v1 = td1_advantage_estimate(gamma, state_value, next_state_value, reward, done)
+        v2 = vec_td1_advantage_estimate(
+            gamma_tensor, state_value, next_state_value, reward, done
+        )
+
+        torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
+
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("lmbda", [0.1, 0.5, 0.99])
     @pytest.mark.parametrize("N", [(3,), (7, 3)])
@@ -3904,6 +4015,63 @@ def test_vectdlambda_rand_gamma(
         )
         torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("N", [(3,), (7, 3)])
+    @pytest.mark.parametrize("T", [50, 3])
+    @pytest.mark.parametrize("rolling_gamma", [True, False, None])
+    @pytest.mark.parametrize("has_done", [True, False])
+    @pytest.mark.parametrize("seed", range(1))
+    def test_vectd1_rand_gamma(
+        self, device, N, T, rolling_gamma, dtype_fixture, has_done, seed  # noqa
+    ):
+        """Tests td_lambda_advantage_estimate against vec_td_lambda_advantage_estimate
+        with gamma being a random tensor
+
+        """
+        torch.manual_seed(seed)
+
+        done = torch.zeros(*N, T, 1, device=device, dtype=torch.bool)
+        if has_done:
+            done = done.bernoulli_(0.1)
+        reward = torch.randn(*N, T, 1, device=device)
+        state_value = torch.randn(*N, T, 1, device=device)
+        next_state_value = torch.randn(*N, T, 1, device=device)
+
+        # avoid low values of gamma
+        gamma_tensor = 0.5 + torch.rand_like(next_state_value) / 2
+
+        v1 = td1_advantage_estimate(
+            gamma_tensor,
+            state_value,
+            next_state_value,
+            reward,
+            done,
+            rolling_gamma,
+        )
+        if rolling_gamma is False and not done[..., 1:, :][done[..., :-1, :]].all():
+            # if a not-done follows a done, then rolling_gamma=False cannot be used
+            with pytest.raises(
+                NotImplementedError, match="When using rolling_gamma=False"
+            ):
+                vec_td1_advantage_estimate(
+                    gamma_tensor,
+                    state_value,
+                    next_state_value,
+                    reward,
+                    done,
+                    rolling_gamma,
+                )
+            return
+        v2 = vec_td1_advantage_estimate(
+            gamma_tensor,
+            state_value,
+            next_state_value,
+            reward,
+            done,
+            rolling_gamma,
+        )
+        torch.testing.assert_close(v1, v2, rtol=1e-4, atol=1e-4)
+
     @pytest.mark.parametrize("device", get_available_devices())
     @pytest.mark.parametrize("gamma", [0.99, "rand"])
     @pytest.mark.parametrize("N", [(3,), (3, 7)])
@@ -4066,21 +4234,21 @@ def test_successive_traj_tdadv(
         # avoid low values of gamma
         gamma_tensor = 0.5 + torch.rand_like(next_state_value) / 2
 
-        v1 = td_advantage_estimate(
+        v1 = td0_advantage_estimate(
             gamma_tensor,
             state_value,
             next_state_value,
             reward,
             done,
         )
-        v1a = td_advantage_estimate(
+        v1a = td0_advantage_estimate(
             gamma_tensor[..., : T // 2, :],
             state_value[..., : T // 2, :],
             next_state_value[..., : T // 2, :],
             reward[..., : T // 2, :],
             done[..., : T // 2, :],
         )
-        v1b = td_advantage_estimate(
+        v1b = td0_advantage_estimate(
             gamma_tensor[..., T // 2 :, :],
             state_value[..., T // 2 :, :],
             next_state_value[..., T // 2 :, :],
diff --git a/test/test_modules.py b/test/test_modules.py
index ab3ee0303c6..bd16a1097aa 100644
--- a/test/test_modules.py
+++ b/test/test_modules.py
@@ -478,9 +478,9 @@ def test_MPPI(self, device, batch_size, seed=1):
         value_net = nn.LazyLinear(1, device=device)
         value_net = ValueOperator(value_net, in_keys=["observation"])
         advantage_module = TDLambdaEstimator(
-            0.99,
-            0.95,
-            value_net,
+            gamma=0.99,
+            lmbda=0.95,
+            value_network=value_net,
         )
         value_net(env.reset())
         planner = MPPIPlanner(
diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
index af91ce86385..4b7c40c56c2 100644
--- a/torchrl/objectives/a2c.py
+++ b/torchrl/objectives/a2c.py
@@ -153,7 +153,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.advantage_key, None)
         if advantage is None:
-            self.value_function(
+            self.value_estimator(
                 tensordict,
                 params=self.critic_params,
                 target_params=self.target_critic_params,
@@ -178,19 +178,19 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
             hp["gamma"] = self.gamma
         value_key = "state_value"
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
-            self._value_function = GAE(
+            self._value_estimator = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index b6dd85119e9..770d3f3e406 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -59,7 +59,7 @@ class LossModule(nn.Module):
     def __init__(self):
         super().__init__()
         self._param_maps = {}
-        self._value_function = None
+        self._value_estimator = None
         # self.register_forward_pre_hook(_parameters_to_tensordict)
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
@@ -364,17 +364,17 @@ def cpu(self) -> LossModule:
         return self.to(torch.device("cpu"))
 
     @property
-    def value_function(self) -> ValueEstimatorBase:
+    def value_estimator(self) -> ValueEstimatorBase:
         """The value function blends in the reward and value estimate(s) from upcoming state(s)/state-action pair(s) into a target value estimate for the value network."""
-        out = self._value_function
+        out = self._value_estimator
         if out is None:
             self._default_value_estimator()
-            return self._value_function
+            return self._value_estimator
         return out
 
-    @value_function.setter
-    def value_function(self, value):
-        self._value_function = value
+    @value_estimator.setter
+    def value_estimator(self, value):
+        self._value_estimator = value
 
     def _default_value_estimator(self):
         """A value-function constructor when none is provided.
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index f87a5cc5423..c1cacd7349e 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -169,7 +169,7 @@ def _loss_value(
             device=self.target_actor_network_params.device,
         )
         with set_exploration_mode("mode"):
-            target_value = self.value_function.value_estimate(
+            target_value = self.value_estimator.value_estimate(
                 tensordict, target_params=target_params
             ).squeeze(-1)
 
@@ -187,11 +187,11 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp.update(hyperparams)
         value_key = "state_action_value"
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
@@ -199,7 +199,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=self.actor_critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/deprecated.py b/torchrl/objectives/deprecated.py
index 17e7e346808..9116a29e59c 100644
--- a/torchrl/objectives/deprecated.py
+++ b/torchrl/objectives/deprecated.py
@@ -253,7 +253,7 @@ def _qvalue_loss(self, tensordict: TensorDictBase) -> Tensor:
             next_state_value = next_state_value.min(0)[0]
 
         tensordict.set(("next", "state_value"), next_state_value)
-        target_value = self.value_function.value_estimate(tensordict).squeeze(-1)
+        target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
             tensordict.select(*self.qvalue_network.in_keys),
             self.qvalue_network_params,
@@ -289,11 +289,11 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
@@ -301,7 +301,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index 77017064509..e584b894ed7 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -75,7 +75,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
@@ -83,7 +83,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 value_key="chosen_action_value",
             )
         elif value_type is ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
@@ -95,7 +95,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=self.value_network,
                 advantage_key="advantage",
@@ -155,7 +155,7 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             action = action.to(torch.float)
             pred_val_index = (pred_val * action).sum(-1)
 
-        target_value = self.value_function.value_estimate(
+        target_value = self.value_estimator.value_estimate(
             tensordict.clone(False), target_params=self.target_value_network_params
         ).squeeze(-1)
 
@@ -191,7 +191,7 @@ class DistributionalDQNLoss(LossModule):
         gamma (scalar): a discount factor for return computation.
             .. note::
               Unlike :class:`DQNLoss`, this class does not currently support
-              custom value functions. The next value estimation is not
+              custom value functions. The next value estimation is always
               bootstrapped.
         delay_value (bool): whether to duplicate the value network into a new
             target value network to create double DQN
diff --git a/torchrl/objectives/dreamer.py b/torchrl/objectives/dreamer.py
index 03f215d0953..cfad2825a4e 100644
--- a/torchrl/objectives/dreamer.py
+++ b/torchrl/objectives/dreamer.py
@@ -208,7 +208,7 @@ def forward(self, tensordict: TensorDict) -> Tuple[TensorDict, TensorDict]:
         fake_data.set("lambda_target", lambda_target)
 
         if self.discount_loss:
-            gamma = self.value_function.gamma.to(tensordict.device)
+            gamma = self.value_estimator.gamma.to(tensordict.device)
             discount = gamma.expand(lambda_target.shape)
             discount[..., 0, :] = 1
             discount = discount.cumprod(dim=-2)
@@ -228,7 +228,7 @@ def lambda_target(self, reward: torch.Tensor, value: torch.Tensor) -> torch.Tens
             },
             [],
         )
-        return self.value_function.value_estimate(input_tensordict)
+        return self.value_estimator.value_estimate(input_tensordict)
 
     def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_net = None
@@ -238,14 +238,14 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
         elif value_type is ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -260,7 +260,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         elif value_type is ValueEstimators.TDLambda:
             if hasattr(self, "lmbda"):
                 hp["lmbda"] = self.lmbda
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
index ca1ea2a01a0..4e993ef5579 100644
--- a/torchrl/objectives/iql.py
+++ b/torchrl/objectives/iql.py
@@ -223,7 +223,7 @@ def _loss_qvalue(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
         obs_keys = self.actor_network.in_keys
         tensordict = tensordict.select("next", *obs_keys, "action")
 
-        target_value = self.value_function.value_estimate(
+        target_value = self.value_estimator.value_estimate(
             tensordict, target_params=self.target_value_network_params
         ).squeeze(-1)
         tensordict_expand = vmap(self.qvalue_network, (None, 0))(
@@ -252,14 +252,14 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
             hp["gamma"] = self.gamma
         hp.update(hyperparams)
         if value_type is ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
         elif value_type is ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -270,7 +270,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
index c3b89b45bb6..638174d21a6 100644
--- a/torchrl/objectives/ppo.py
+++ b/torchrl/objectives/ppo.py
@@ -190,7 +190,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.advantage_key, None)
         if advantage is None:
-            self.value_function(
+            self.value_estimator(
                 tensordict,
                 params=self.critic_params,
                 target_params=self.target_critic_params,
@@ -220,19 +220,19 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp.update(hyperparams)
         value_key = "state_value"
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
-            self._value_function = GAE(
+            self._value_estimator = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
@@ -340,7 +340,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.advantage_key, None)
         if advantage is None:
-            self.value_function(
+            self.value_estimator(
                 tensordict,
                 params=self.critic_params,
                 target_params=self.target_critic_params,
@@ -507,7 +507,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
         tensordict = tensordict.clone(False)
         advantage = tensordict.get(self.advantage_key, None)
         if advantage is None:
-            self.value_function(
+            self.value_estimator(
                 tensordict,
                 params=self.critic_params,
                 target_params=self.target_critic_params,
diff --git a/torchrl/objectives/redq.py b/torchrl/objectives/redq.py
index dc7b146b142..417b6d90fcf 100644
--- a/torchrl/objectives/redq.py
+++ b/torchrl/objectives/redq.py
@@ -275,7 +275,9 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         next_state_value = next_state_value.min(0)[0]
 
         tensordict_select.set(("next", "state_value"), next_state_value.unsqueeze(-1))
-        target_value = self.value_function.value_estimate(tensordict_select).squeeze(-1)
+        target_value = self.value_estimator.value_estimate(tensordict_select).squeeze(
+            -1
+        )
 
         pred_val = state_action_value_qvalue
         td_error = (pred_val - target_value).pow(2)
@@ -330,11 +332,11 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_key = "state_value"
         # we do not need a value network bc the next state value is already passed
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
@@ -342,7 +344,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/reinforce.py b/torchrl/objectives/reinforce.py
index 98384444298..baa0a4c2ae8 100644
--- a/torchrl/objectives/reinforce.py
+++ b/torchrl/objectives/reinforce.py
@@ -108,7 +108,7 @@ def __init__(
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         advantage = tensordict.get(self.advantage_key, None)
         if advantage is None:
-            self.value_function(
+            self.value_estimator(
                 tensordict,
                 params=self.critic_params,
                 target_params=self.target_critic_params,
@@ -160,19 +160,19 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp.update(hyperparams)
         value_key = "state_value"
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
-            self._value_function = GAE(
+            self._value_estimator = GAE(
                 value_network=self.critic, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=self.critic, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 3d82d141af6..8177c2f393c 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -203,14 +203,14 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         hp = dict(default_value_kwargs(value_type))
         hp.update(hyperparams)
         if value_type is ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
         elif value_type is ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -221,7 +221,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -317,7 +317,7 @@ def _loss_qvalue_v1(self, tensordict: TensorDictBase) -> Tuple[Tensor, Tensor]:
             _run_checks=False,
         )
         with set_exploration_mode("mode"):
-            target_value = self.value_function.value_estimate(
+            target_value = self.value_estimator.value_estimate(
                 tensordict, target_params=target_params
             ).squeeze(-1)
 
@@ -383,8 +383,8 @@ def _get_value_v2(self, tensordict, _alpha, actor_params, qval_params):
                 sample_log_prob = sample_log_prob.unsqueeze(-1)
             state_value = state_action_value - _alpha * sample_log_prob
             state_value = state_value.min(0)[0]
-            tensordict.set(("next", self.value_function.value_key), state_value)
-            target_value = self.value_function.value_estimate(
+            tensordict.set(("next", self.value_estimator.value_key), state_value)
+            target_value = self.value_estimator.value_estimate(
                 tensordict,
                 _alpha=self._alpha,
                 actor_params=self.target_actor_network_params,
@@ -668,8 +668,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             * (next_state_action_value_qvalue.min(0)[0] - self.alpha * logp_pi[1])
         ).sum(dim=-1, keepdim=True)
 
-        tensordict_select.set(("next", self.value_function.value_key), pred_next_val)
-        target_value = self.value_function.value_estimate(tensordict_select).squeeze(-1)
+        tensordict_select.set(("next", self.value_estimator.value_key), pred_next_val)
+        target_value = self.value_estimator.value_estimate(tensordict_select).squeeze(
+            -1
+        )
 
         actions = torch.argmax(tensordict_select["action"], dim=-1)
 
@@ -737,14 +739,14 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         if hasattr(self, "gamma"):
             hp["gamma"] = self.gamma
         if value_type is ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
                 value_key=value_key,
             )
         elif value_type is ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
@@ -755,7 +757,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type is ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
                 value_target_key="value_target",
diff --git a/torchrl/objectives/td3.py b/torchrl/objectives/td3.py
index 1f8ed97e37f..41043852ada 100644
--- a/torchrl/objectives/td3.py
+++ b/torchrl/objectives/td3.py
@@ -195,7 +195,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
         next_state_value = next_state_action_value_qvalue.min(0)[0]
         tensordict.set(("next", "state_action_value"), next_state_value.unsqueeze(-1))
-        target_value = self.value_function.value_estimate(tensordict).squeeze(-1)
+        target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
         pred_val = state_action_value_qvalue
         td_error = (pred_val - target_value).pow(2)
         loss_qval = (
@@ -237,11 +237,11 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         value_key = "state_action_value"
         # we do not need a value network bc the next state value is already passed
         if value_type == ValueEstimators.TD1:
-            self._value_function = TD1Estimator(
+            self._value_estimator = TD1Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.TD0:
-            self._value_function = TD0Estimator(
+            self._value_estimator = TD0Estimator(
                 value_network=None, value_key=value_key, **hp
             )
         elif value_type == ValueEstimators.GAE:
@@ -249,7 +249,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
                 f"Value type {value_type} it not implemented for loss {type(self)}."
             )
         elif value_type == ValueEstimators.TDLambda:
-            self._value_function = TDLambdaEstimator(
+            self._value_estimator = TDLambdaEstimator(
                 value_network=None, value_key=value_key, **hp
             )
         else:
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 63891c0fe27..250087a34a4 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -34,10 +34,10 @@ class ValueEstimators(Enum):
 
     """
 
-    TD0 = 1
-    TD1 = 2
-    TDLambda = 3
-    GAE = 4
+    TD0 = "Bootstrapped TD (1-step return)"
+    TD1 = "TD(1) (infinity-step return)"
+    TDLambda = "TD(lambda)"
+    GAE = "Generalized advantage estimate"
 
 
 def default_value_kwargs(value_type: ValueEstimators):
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index a8f681c2163..e3dada133c2 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 import abc
 import warnings
-from copy import deepcopy
 from functools import wraps
 from typing import Callable, List, Optional, Tuple, Union
 
@@ -17,9 +16,9 @@
 
 from torchrl.objectives.utils import hold_out_net
 from torchrl.objectives.value.functional import (
-    td_advantage_estimate,
     td_lambda_advantage_estimate,
     vec_generalized_advantage_estimate,
+    vec_td1_advantage_estimate,
     vec_td_lambda_advantage_estimate,
 )
 
@@ -114,7 +113,9 @@ def is_stateless(self):
 
 
 class TD0Estimator(ValueEstimatorBase):
-    """Myopic Temporal Difference (TD(0)) estimate of advantage function.
+    """Temporal Difference (TD(0)) estimate of advantage function.
+
+    AKA bootstrapped temporal difference or 1-step return.
 
     Args:
         gamma (scalar): exponential mean discount.
@@ -296,7 +297,7 @@ def value_estimate(
 
 
 class TD1Estimator(ValueEstimatorBase):
-    """Bootstrapped Temporal Difference (TD(1)) estimate of advantage function.
+    r""":math:`\infty`-Temporal Difference (TD(1)) estimate of advantage function.
 
     Args:
         gamma (scalar): exponential mean discount.
@@ -471,7 +472,7 @@ def value_estimate(
         next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
-        value_target = td_advantage_estimate(
+        value_target = vec_td1_advantage_estimate(
             gamma, torch.zeros_like(next_value), next_value, reward, done
         )
         return value_target
@@ -927,9 +928,11 @@ def new_init(self, *args, **kwargs):
     cls.__init__ = new_init
 
 
-TD0Estimate = deepcopy(TD0Estimator)
+TD0Estimate = type("TD0Estimate", TD0Estimator.__bases__, dict(TD0Estimator.__dict__))
 _deprecate_class(TD0Estimate, TD0Estimator)
-TD1Estimate = deepcopy(TD1Estimator)
+TD1Estimate = type("TD1Estimate", TD1Estimator.__bases__, dict(TD1Estimator.__dict__))
 _deprecate_class(TD1Estimate, TD1Estimator)
-TDLambdaEstimate = deepcopy(TDLambdaEstimator)
+TDLambdaEstimate = type(
+    "TDLambdaEstimate", TDLambdaEstimator.__bases__, dict(TDLambdaEstimator.__dict__)
+)
 _deprecate_class(TDLambdaEstimate, TDLambdaEstimator)
diff --git a/torchrl/objectives/value/functional.py b/torchrl/objectives/value/functional.py
index 534eb47306d..b7afeea8664 100644
--- a/torchrl/objectives/value/functional.py
+++ b/torchrl/objectives/value/functional.py
@@ -10,15 +10,24 @@
 __all__ = [
     "generalized_advantage_estimate",
     "vec_generalized_advantage_estimate",
-    "vec_td_lambda_return_estimate",
-    "vec_td_lambda_advantage_estimate",
+    "td0_advantage_estimate",
+    "td0_return_estimate",
+    "td1_return_estimate",
+    "vec_td1_return_estimate",
+    "td1_advantage_estimate",
+    "vec_td1_advantage_estimate",
     "td_lambda_return_estimate",
+    "vec_td_lambda_return_estimate",
     "td_lambda_advantage_estimate",
-    "td_advantage_estimate",
+    "vec_td_lambda_advantage_estimate",
 ]
 
 from torchrl.objectives.value.utils import _custom_conv1d, _make_gammas_tensor
 
+########################################################################
+# GAE
+# ---
+
 
 def generalized_advantage_estimate(
     gamma: float,
@@ -28,7 +37,7 @@ def generalized_advantage_estimate(
     reward: torch.Tensor,
     done: torch.Tensor,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Get generalized advantage estimate of a trajectory.
+    """Generalized advantage estimate of a trajectory.
 
     Refer to "HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION"
     https://arxiv.org/pdf/1506.02438.pdf for more context.
@@ -37,13 +46,14 @@ def generalized_advantage_estimate(
         gamma (scalar): exponential mean discount.
         lmbda (scalar): trajectory discount.
         state_value (Tensor): value function result with old_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         next_state_value (Tensor): value function result with new_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
@@ -84,7 +94,7 @@ def vec_generalized_advantage_estimate(
     reward: torch.Tensor,
     done: torch.Tensor,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Get generalized advantage estimate of a trajectory.
+    """Vectorized Generalized advantage estimate of a trajectory.
 
     Refer to "HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION"
     https://arxiv.org/pdf/1506.02438.pdf for more context.
@@ -93,13 +103,14 @@ def vec_generalized_advantage_estimate(
         gamma (scalar): exponential mean discount.
         lmbda (scalar): trajectory discount.
         state_value (Tensor): value function result with old_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         next_state_value (Tensor): value function result with new_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
@@ -148,61 +159,90 @@ def vec_generalized_advantage_estimate(
     return advantage, value_target
 
 
-def td_advantage_estimate(
+########################################################################
+# TD(0)
+# -----
+
+
+def td0_advantage_estimate(
     gamma: float,
     state_value: torch.Tensor,
     next_state_value: torch.Tensor,
     reward: torch.Tensor,
     done: torch.Tensor,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    """Get generalized advantage estimate of a trajectory.
+    """TD(0) advantage estimate of a trajectory.
 
-    Refer to "HIGH-DIMENSIONAL CONTINUOUS CONTROL USING GENERALIZED ADVANTAGE ESTIMATION"
-    https://arxiv.org/pdf/1506.02438.pdf for more context.
+    Also known as bootstrapped Temporal Difference or one-step return.
 
     Args:
         gamma (scalar): exponential mean discount.
         state_value (Tensor): value function result with old_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         next_state_value (Tensor): value function result with new_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
             "All input tensors (value, reward and done states) must share a unique shape."
         )
-    for tensor in (next_state_value, state_value, reward, done):
-        if tensor.shape[-1] != 1:
-            raise RuntimeError(
-                "Last dimension of generalized_advantage_estimate inputs must be a singleton dimension."
-            )
     not_done = 1 - done.to(next_state_value.dtype)
     advantage = reward + gamma * not_done * next_state_value - state_value
     return advantage
 
 
-def td_lambda_return_estimate(
+def td0_return_estimate(
     gamma: float,
-    lmbda: float,
     next_state_value: torch.Tensor,
     reward: torch.Tensor,
     done: torch.Tensor,
-    rolling_gamma: bool = None,
-) -> torch.Tensor:
-    """TD(lambda) return estimate.
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """TD(0) discounted return estimate of a trajectory.
+
+    Also known as bootstrapped Temporal Difference or one-step return.
 
     Args:
         gamma (scalar): exponential mean discount.
-        lmbda (scalar): trajectory discount.
         next_state_value (Tensor): value function result with new_state input.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
             must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
+
+    """
+    if not (next_state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
+    not_done = 1 - done.to(next_state_value.dtype)
+    advantage = reward + gamma * not_done * next_state_value
+    return advantage
+
+
+########################################################################
+# TD(1)
+# ----------
+
+
+def td1_return_estimate(
+    gamma: float,
+    next_state_value: torch.Tensor,
+    reward: torch.Tensor,
+    done: torch.Tensor,
+    rolling_gamma: bool = None,
+) -> torch.Tensor:
+    r"""TD(1) return estimate.
+
+    Args:
+        gamma (scalar): exponential mean discount.
+        next_state_value (Tensor): value function result with new_state input.
+        reward (Tensor): reward of taking actions in the environment.
+        done (Tensor): boolean flag for end of episode.
         rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
               gamma = [g1, g2, g3, g4]
@@ -225,84 +265,65 @@ def td_lambda_return_estimate(
               ]
             Default is True.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
             "All input tensors (value, reward and done states) must share a unique shape."
         )
-    for tensor in (next_state_value, reward, done):
-        if tensor.shape[-1] != 1:
-            raise RuntimeError(
-                "Last dimension of generalized_advantage_estimate inputs must be a singleton dimension."
-            )
     not_done = 1 - done.to(next_state_value.dtype)
 
     returns = torch.empty_like(next_state_value)
 
     T = returns.shape[-2]
 
-    # if gamma is not a tensor of the same shape as other inputs, we use rolling_gamma = True
     single_gamma = False
     if not (isinstance(gamma, torch.Tensor) and gamma.shape == not_done.shape):
         single_gamma = True
         gamma = torch.full_like(next_state_value, gamma)
 
-    single_lambda = False
-    if not (isinstance(lmbda, torch.Tensor) and lmbda.shape == not_done.shape):
-        single_lambda = True
-        lmbda = torch.full_like(next_state_value, lmbda)
-
     if rolling_gamma is None:
         rolling_gamma = True
-    elif not rolling_gamma and single_gamma and single_lambda:
+    elif not rolling_gamma and single_gamma:
         raise RuntimeError(
-            "rolling_gamma=False is expected only with time-sensitive gamma or lambda values"
+            "rolling_gamma=False is expected only with time-sensitive gamma values"
         )
 
     if rolling_gamma:
         gamma = gamma * not_done
         g = next_state_value[..., -1, :]
         for i in reversed(range(T)):
-            g = returns[..., i, :] = reward[..., i, :] + gamma[..., i, :] * (
-                (1 - lmbda[..., i, :]) * next_state_value[..., i, :]
-                + lmbda[..., i, :] * g
-            )
+            g = returns[..., i, :] = reward[..., i, :] + gamma[..., i, :] * g
     else:
         for k in range(T):
             g = next_state_value[..., -1, :]
             _gamma = gamma[..., k, :]
-            _lambda = lmbda[..., k, :]
             nd = not_done
             _gamma = _gamma.unsqueeze(-2) * nd
             for i in reversed(range(k, T)):
-                g = reward[..., i, :] + _gamma[..., i, :] * (
-                    (1 - _lambda) * next_state_value[..., i, :] + _lambda * g
-                )
+                g = reward[..., i, :] + _gamma[..., i, :] * g
             returns[..., k, :] = g
-
     return returns
 
 
-def td_lambda_advantage_estimate(
+def td1_advantage_estimate(
     gamma: float,
-    lmbda: float,
     state_value: torch.Tensor,
     next_state_value: torch.Tensor,
     reward: torch.Tensor,
     done: torch.Tensor,
     rolling_gamma: bool = None,
 ) -> torch.Tensor:
-    """TD(lambda) advantage estimate.
+    """TD(1) advantage estimate.
 
     Args:
         gamma (scalar): exponential mean discount.
-        lmbda (scalar): trajectory discount.
         state_value (Tensor): value function result with old_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         next_state_value (Tensor): value function result with new_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
         rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
@@ -326,6 +347,10 @@ def td_lambda_advantage_estimate(
               ]
             Default is True.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
@@ -333,34 +358,73 @@ def td_lambda_advantage_estimate(
         )
     if not state_value.shape == next_state_value.shape:
         raise RuntimeError("shape of state_value and next_state_value must match")
-    returns = td_lambda_return_estimate(
-        gamma, lmbda, next_state_value, reward, done, rolling_gamma
-    )
+    returns = td1_return_estimate(gamma, next_state_value, reward, done, rolling_gamma)
     advantage = returns - state_value
     return advantage
 
 
-def vec_td_lambda_advantage_estimate(
+def vec_td1_return_estimate(
+    gamma, next_state_value, reward, done, rolling_gamma: Optional[bool] = None
+):
+    """Vectorized TD(1) return estimate.
+
+    Args:
+        gamma (scalar, Tensor): exponential mean discount. If tensor-valued,
+        next_state_value (Tensor): value function result with new_state input.
+        reward (Tensor): reward of taking actions in the environment.
+        done (Tensor): boolean flag for end of episode.
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
+            if a gamma tensor is tied to a single event:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1 g2 v3 + g1 g2 g3 v4,
+                v2 + g2 v3 + g2 g3 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            if False, it is assumed that each gamma is tied to the upcoming
+            trajectory:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1**2 v3 + g**3 v4,
+                v2 + g2 v3 + g2**2 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            Default is True.
+
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
+    """
+    return vec_td_lambda_return_estimate(
+        gamma=gamma,
+        next_state_value=next_state_value,
+        reward=reward,
+        done=done,
+        rolling_gamma=rolling_gamma,
+        lmbda=1,
+    )
+
+
+def vec_td1_advantage_estimate(
     gamma,
-    lmbda,
     state_value,
     next_state_value,
     reward,
     done,
     rolling_gamma: bool = None,
 ):
-    """Vectorized TD(lambda) advantage estimate.
+    """Vectorized TD(1) advantage estimate.
 
     Args:
         gamma (scalar, Tensor): exponential mean discount. If tensor-valued,
-            must be a [Batch x TimeSteps x 1] tensor.
-        lmbda (scalar): trajectory discount.
         state_value (Tensor): value function result with old_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         next_state_value (Tensor): value function result with new_state input.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         reward (Tensor): reward of taking actions in the environment.
-            must be a [Batch x TimeSteps x 1] or [Batch x TimeSteps] tensor
         done (Tensor): boolean flag for end of episode.
         rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
             if a gamma tensor is tied to a single event:
@@ -384,23 +448,188 @@ def vec_td_lambda_advantage_estimate(
               ]
             Default is True.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
             "All input tensors (value, reward and done states) must share a unique shape."
         )
     return (
-        vec_td_lambda_return_estimate(
-            gamma, lmbda, next_state_value, reward, done, rolling_gamma
-        )
+        vec_td1_return_estimate(gamma, next_state_value, reward, done, rolling_gamma)
         - state_value
     )
 
 
+########################################################################
+# TD(lambda)
+# ----------
+
+
+def td_lambda_return_estimate(
+    gamma: float,
+    lmbda: float,
+    next_state_value: torch.Tensor,
+    reward: torch.Tensor,
+    done: torch.Tensor,
+    rolling_gamma: bool = None,
+) -> torch.Tensor:
+    r"""TD(:math:`\lambda`) return estimate.
+
+    Args:
+        gamma (scalar): exponential mean discount.
+        lmbda (scalar): trajectory discount.
+        next_state_value (Tensor): value function result with new_state input.
+        reward (Tensor): reward of taking actions in the environment.
+        done (Tensor): boolean flag for end of episode.
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
+            if a gamma tensor is tied to a single event:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1 g2 v3 + g1 g2 g3 v4,
+                v2 + g2 v3 + g2 g3 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            if False, it is assumed that each gamma is tied to the upcoming
+            trajectory:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1**2 v3 + g**3 v4,
+                v2 + g2 v3 + g2**2 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            Default is True.
+
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
+    """
+    if not (next_state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
+    for tensor in (next_state_value, reward, done):
+        if tensor.shape[-1] != 1:
+            raise RuntimeError(
+                "Last dimension of generalized_advantage_estimate inputs must be a singleton dimension."
+            )
+    not_done = 1 - done.to(next_state_value.dtype)
+
+    returns = torch.empty_like(next_state_value)
+
+    T = returns.shape[-2]
+
+    # if gamma is not a tensor of the same shape as other inputs, we use rolling_gamma = True
+    single_gamma = False
+    if not (isinstance(gamma, torch.Tensor) and gamma.shape == not_done.shape):
+        single_gamma = True
+        gamma = torch.full_like(next_state_value, gamma)
+
+    single_lambda = False
+    if not (isinstance(lmbda, torch.Tensor) and lmbda.shape == not_done.shape):
+        single_lambda = True
+        lmbda = torch.full_like(next_state_value, lmbda)
+
+    if rolling_gamma is None:
+        rolling_gamma = True
+    elif not rolling_gamma and single_gamma and single_lambda:
+        raise RuntimeError(
+            "rolling_gamma=False is expected only with time-sensitive gamma or lambda values"
+        )
+
+    if rolling_gamma:
+        gamma = gamma * not_done
+        g = next_state_value[..., -1, :]
+        for i in reversed(range(T)):
+            g = returns[..., i, :] = reward[..., i, :] + gamma[..., i, :] * (
+                (1 - lmbda[..., i, :]) * next_state_value[..., i, :]
+                + lmbda[..., i, :] * g
+            )
+    else:
+        for k in range(T):
+            g = next_state_value[..., -1, :]
+            _gamma = gamma[..., k, :]
+            _lambda = lmbda[..., k, :]
+            nd = not_done
+            _gamma = _gamma.unsqueeze(-2) * nd
+            for i in reversed(range(k, T)):
+                g = reward[..., i, :] + _gamma[..., i, :] * (
+                    (1 - _lambda) * next_state_value[..., i, :] + _lambda * g
+                )
+            returns[..., k, :] = g
+
+    return returns
+
+
+def td_lambda_advantage_estimate(
+    gamma: float,
+    lmbda: float,
+    state_value: torch.Tensor,
+    next_state_value: torch.Tensor,
+    reward: torch.Tensor,
+    done: torch.Tensor,
+    rolling_gamma: bool = None,
+) -> torch.Tensor:
+    r"""TD(:math:`\lambda`) advantage estimate.
+
+    Args:
+        gamma (scalar): exponential mean discount.
+        lmbda (scalar): trajectory discount.
+        state_value (Tensor): value function result with old_state input.
+        next_state_value (Tensor): value function result with new_state input.
+        reward (Tensor): reward of taking actions in the environment.
+        done (Tensor): boolean flag for end of episode.
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
+            if a gamma tensor is tied to a single event:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1 g2 v3 + g1 g2 g3 v4,
+                v2 + g2 v3 + g2 g3 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            if False, it is assumed that each gamma is tied to the upcoming
+            trajectory:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1**2 v3 + g**3 v4,
+                v2 + g2 v3 + g2**2 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            Default is True.
+
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
+    """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
+    if not state_value.shape == next_state_value.shape:
+        raise RuntimeError("shape of state_value and next_state_value must match")
+    returns = td_lambda_return_estimate(
+        gamma, lmbda, next_state_value, reward, done, rolling_gamma
+    )
+    advantage = returns - state_value
+    return advantage
+
+
 def vec_td_lambda_return_estimate(
     gamma, lmbda, next_state_value, reward, done, rolling_gamma: Optional[bool] = None
 ):
-    """Vectorized TD(lambda) return estimate.
+    r"""Vectorized TD(:math:`\lambda`) return estimate.
 
     Args:
         gamma (scalar, Tensor): exponential mean discount. If tensor-valued,
@@ -433,6 +662,10 @@ def vec_td_lambda_return_estimate(
               ]
             Default is True.
 
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
     """
     if not (next_state_value.shape == reward.shape == done.shape):
         raise RuntimeError(
@@ -538,3 +771,60 @@ def vec_td_lambda_return_estimate(
         v3[..., :-1] = 0
         v3 = _custom_conv1d(v3, dec * (gammas * lambdas).transpose(1, 2))
         return (v1 + v2 + v3).view(shape)
+
+
+def vec_td_lambda_advantage_estimate(
+    gamma,
+    lmbda,
+    state_value,
+    next_state_value,
+    reward,
+    done,
+    rolling_gamma: bool = None,
+):
+    r"""Vectorized TD(:math:`\lambda`) advantage estimate.
+
+    Args:
+        gamma (scalar, Tensor): exponential mean discount. If tensor-valued,
+        lmbda (scalar): trajectory discount.
+        state_value (Tensor): value function result with old_state input.
+        next_state_value (Tensor): value function result with new_state input.
+        reward (Tensor): reward of taking actions in the environment.
+        done (Tensor): boolean flag for end of episode.
+        rolling_gamma (bool, optional): if ``True``, it is assumed that each gamma
+            if a gamma tensor is tied to a single event:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1 g2 v3 + g1 g2 g3 v4,
+                v2 + g2 v3 + g2 g3 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            if False, it is assumed that each gamma is tied to the upcoming
+            trajectory:
+              gamma = [g1, g2, g3, g4]
+              value = [v1, v2, v3, v4]
+              return = [
+                v1 + g1 v2 + g1**2 v3 + g**3 v4,
+                v2 + g2 v3 + g2**2 v4,
+                v3 + g3 v4,
+                v4,
+              ]
+            Default is True.
+
+    All tensors (values, reward and done) must have shape
+    ``[*Batch x TimeSteps x F]``, with ``F`` features (for single agent,
+    single task, single objective F=1).
+
+    """
+    if not (next_state_value.shape == state_value.shape == reward.shape == done.shape):
+        raise RuntimeError(
+            "All input tensors (value, reward and done states) must share a unique shape."
+        )
+    return (
+        vec_td_lambda_return_estimate(
+            gamma, lmbda, next_state_value, reward, done, rolling_gamma
+        )
+        - state_value
+    )

From 400cfd13850ae455f42d10d5ec2bce6342ceb98f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 14:56:33 +0100
Subject: [PATCH 24/89] amend

---
 examples/a2c/a2c.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/a2c/a2c.py b/examples/a2c/a2c.py
index 2ce1a32336b..7d821abea04 100644
--- a/examples/a2c/a2c.py
+++ b/examples/a2c/a2c.py
@@ -10,7 +10,7 @@
 from hydra.core.config_store import ConfigStore
 from torchrl.envs.transforms import RewardScaling
 from torchrl.envs.utils import set_exploration_mode
-from torchrl.objectives.value import TDEstimate
+from torchrl.objectives.value import TD0Estimate
 from torchrl.record.loggers import generate_exp_name, get_logger
 from torchrl.trainers.helpers.collectors import (
     make_collector_onpolicy,
@@ -144,7 +144,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
     )
 
     critic_model = model.get_value_operator()
-    advantage = TDEstimate(
+    advantage = TD0Estimate(
         cfg.gamma,
         value_network=critic_model,
         average_rewards=True,

From 64768b07ad0a7a35aa5c2b2c304aa7d4dc7121e4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 15:27:59 +0100
Subject: [PATCH 25/89] amend

---
 torchrl/objectives/value/advantages.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index e3dada133c2..6fe2ed7b0ca 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -16,10 +16,10 @@
 
 from torchrl.objectives.utils import hold_out_net
 from torchrl.objectives.value.functional import (
-    td_lambda_advantage_estimate,
+    td_lambda_return_estimate,
     vec_generalized_advantage_estimate,
-    vec_td1_advantage_estimate,
-    vec_td_lambda_advantage_estimate,
+    vec_td1_return_estimate,
+    vec_td_lambda_return_estimate,
 )
 
 
@@ -472,9 +472,7 @@ def value_estimate(
         next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
-        value_target = vec_td1_advantage_estimate(
-            gamma, torch.zeros_like(next_value), next_value, reward, done
-        )
+        value_target = vec_td1_return_estimate(gamma, next_value, reward, done)
         return value_target
 
 
@@ -665,13 +663,9 @@ def value_estimate(
 
         done = tensordict.get(("next", "done"))
         if self.vectorized:
-            val = vec_td_lambda_advantage_estimate(
-                gamma, lmbda, torch.zeros_like(next_value), next_value, reward, done
-            )
+            val = vec_td_lambda_return_estimate(gamma, lmbda, next_value, reward, done)
         else:
-            val = td_lambda_advantage_estimate(
-                gamma, lmbda, torch.zeros_like(next_value), next_value, reward, done
-            )
+            val = td_lambda_return_estimate(gamma, lmbda, next_value, reward, done)
         return val
 
 
From f5550df8b2ca5594bca996e4c5189119bb341eee Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 16:14:29 +0100
Subject: [PATCH 26/89] amend

---
 examples/a2c/a2c.py                    |  7 ++--
 examples/ppo/ppo.py                    |  3 +-
 torchrl/objectives/value/advantages.py | 51 ++++++++++++++++++--------
 3 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/examples/a2c/a2c.py b/examples/a2c/a2c.py
index 7d821abea04..0a2783127cf 100644
--- a/examples/a2c/a2c.py
+++ b/examples/a2c/a2c.py
@@ -10,7 +10,7 @@
 from hydra.core.config_store import ConfigStore
 from torchrl.envs.transforms import RewardScaling
 from torchrl.envs.utils import set_exploration_mode
-from torchrl.objectives.value import TD0Estimate
+from torchrl.objectives.value import TD0Estimator
 from torchrl.record.loggers import generate_exp_name, get_logger
 from torchrl.trainers.helpers.collectors import (
     make_collector_onpolicy,
@@ -144,14 +144,15 @@ def main(cfg: "DictConfig"):  # noqa: F821
     )
 
     critic_model = model.get_value_operator()
-    advantage = TD0Estimate(
+    advantage = TD0Estimator(
         cfg.gamma,
         value_network=critic_model,
         average_rewards=True,
+        differentiable=True,
     )
     trainer.register_op(
         "process_optim_batch",
-        advantage,
+        torch.no_grad()(advantage),
     )
 
     final_seed = collector.set_seed(cfg.seed)
diff --git a/examples/ppo/ppo.py b/examples/ppo/ppo.py
index 3c9bb8e4a1e..e7ce860f173 100644
--- a/examples/ppo/ppo.py
+++ b/examples/ppo/ppo.py
@@ -168,10 +168,11 @@ def main(cfg: "DictConfig"):  # noqa: F821
         cfg.lmbda,
         value_network=critic_model,
         average_gae=True,
+        differentiable=True,
     )
     trainer.register_op(
         "process_optim_batch",
-        lambda tensordict: advantage(tensordict.to(device)),
+        lambda tensordict: torch.no_grad()(advantage(tensordict.to(device))),
     )
     trainer._process_optim_batch_ops = [
         trainer._process_optim_batch_ops[-1],
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 6fe2ed7b0ca..a6081a71b8c 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -46,6 +46,10 @@ class ValueEstimatorBase(nn.Module):
 
     value_network: Union[TensorDictModule, Callable]
     value_key: Union[Tuple[str], str]
+    DIFF_DEPREC_MSG = "differentiable=False will soon be deprecated and all value computations will be made" \
+                      "differentiable. " \
+                      "Consider using differentiable=True and " \
+                      "decorate your function with `torch.no_grad()` or pass detached functional parameters."
 
     @abc.abstractmethod
     def forward(
@@ -54,7 +58,7 @@ def forward(
         params: Optional[TensorDictBase] = None,
         target_params: Optional[TensorDictBase] = None,
     ) -> TensorDictBase:
-        """Computes the a value estimate given the data in tensordict.
+        """Computes the advantage estimate given the data in tensordict.
 
         If a functional module is provided, a nested TensorDict containing the parameters
         (and if relevant the target parameters) can be passed to the module.
@@ -123,8 +127,12 @@ class TD0Estimator(ValueEstimatorBase):
             the value estimates.
         average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if ``True``, gradients are propagated throught
+        differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+            .. note::
+              The proper way to make the function call non-differentiable is to
+              decorate it in a `torch.no_grad()` context manager/decorator or
+              pass detached parameters for functional modules.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -155,6 +163,8 @@ def __init__(
 
         self.average_rewards = average_rewards
         self.differentiable = differentiable
+        if not differentiable:
+            warnings.warn(self.DIFF_DEPREC_MSG)
         self.value_key = value_key
         if (
             hasattr(value_network, "out_keys")
@@ -187,7 +197,7 @@ def forward(
         params: Optional[TensorDictBase] = None,
         target_params: Optional[TensorDictBase] = None,
     ) -> TensorDictBase:
-        """Computes the TDEstimate given the data in tensordict.
+        """Computes the TD(0) advantage given the data in tensordict.
 
         If a functional module is provided, a nested TensorDict containing the parameters
         (and if relevant the target parameters) can be passed to the module.
@@ -214,7 +224,6 @@ def forward(
             >>> module = TDEstimate(
             ...     gamma=0.98,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -232,7 +241,6 @@ def forward(
             >>> module = TDEstimate(
             ...     gamma=0.98,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -304,8 +312,12 @@ class TD1Estimator(ValueEstimatorBase):
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
         average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if ``True``, gradients are propagated throught
+        differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+            .. note::
+              The proper way to make the function call non-differentiable is to
+              decorate it in a `torch.no_grad()` context manager/decorator or
+              pass detached parameters for functional modules.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -336,6 +348,8 @@ def __init__(
 
         self.average_rewards = average_rewards
         self.differentiable = differentiable
+        if not differentiable:
+            warnings.warn(self.DIFF_DEPREC_MSG)
         self.value_key = value_key
         if (
             hasattr(value_network, "out_keys")
@@ -367,7 +381,7 @@ def forward(
         params: Optional[TensorDictBase] = None,
         target_params: Optional[TensorDictBase] = None,
     ) -> TensorDictBase:
-        """Computes the TDEstimate given the data in tensordict.
+        """Computes the TD(1) advantage given the data in tensordict.
 
         If a functional module is provided, a nested TensorDict containing the parameters
         (and if relevant the target parameters) can be passed to the module.
@@ -394,7 +408,6 @@ def forward(
             >>> module = TDEstimate(
             ...     gamma=0.98,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -412,7 +425,6 @@ def forward(
             >>> module = TDEstimate(
             ...     gamma=0.98,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -485,8 +497,12 @@ class TDLambdaEstimator(ValueEstimatorBase):
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
         average_rewards (bool, optional): if ``True``, rewards will be standardized
             before the TD is computed.
-        differentiable (bool, optional): if ``True``, gradients are propagated throught
+        differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+            .. note::
+              The proper way to make the function call non-differentiable is to
+              decorate it in a `torch.no_grad()` context manager/decorator or
+              pass detached parameters for functional modules.
         vectorized (bool, optional): whether to use the vectorized version of the
             lambda return. Default is `True`.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
@@ -523,6 +539,8 @@ def __init__(
 
         self.average_rewards = average_rewards
         self.differentiable = differentiable
+        if not differentiable:
+            warnings.warn(self.DIFF_DEPREC_MSG)
         self.value_key = value_key
         if (
             hasattr(value_network, "out_keys")
@@ -554,7 +572,7 @@ def forward(
         params: Optional[List[Tensor]] = None,
         target_params: Optional[List[Tensor]] = None,
     ) -> TensorDictBase:
-        """Computes the TDLambdaEstimate given the data in tensordict.
+        r"""Computes the TD(:math:`\lambda`) advantage given the data in tensordict.
 
         If a functional module is provided, a nested TensorDict containing the parameters
         (and if relevant the target parameters) can be passed to the module.
@@ -582,7 +600,6 @@ def forward(
             ...     gamma=0.98,
             ...     lmbda=0.94,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -601,7 +618,6 @@ def forward(
             ...     gamma=0.98,
             ...     lmbda=0.94,
             ...     value_network=value_net,
-            ...     differentiable=False,
             ... )
             >>> obs, next_obs = torch.randn(2, 1, 10, 3)
             >>> reward = torch.randn(1, 10, 1)
@@ -681,8 +697,12 @@ class GAE(ValueEstimatorBase):
         value_network (TensorDictModule): value operator used to retrieve the value estimates.
         average_gae (bool): if ``True``, the resulting GAE values will be standardized.
             Default is ``False``.
-        differentiable (bool, optional): if ``True``, gradients are propagated throught
+        differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+            .. note::
+              The proper way to make the function call non-differentiable is to
+              decorate it in a `torch.no_grad()` context manager/decorator or
+              pass detached parameters for functional modules.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -730,7 +750,8 @@ def __init__(
 
         self.average_gae = average_gae
         self.differentiable = differentiable
-
+        if not differentiable:
+            warnings.warn(self.DIFF_DEPREC_MSG)
         self.advantage_key = advantage_key
         self.value_target_key = value_target_key
 

From 218ab1ae63fc4d488787e15cc267255488b09683 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 16:19:59 +0100
Subject: [PATCH 27/89] amend

---
 examples/a2c/a2c.py                    |  2 +-
 torchrl/objectives/value/advantages.py | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/a2c/a2c.py b/examples/a2c/a2c.py
index 0a2783127cf..f6d3de8b29d 100644
--- a/examples/a2c/a2c.py
+++ b/examples/a2c/a2c.py
@@ -145,7 +145,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
     critic_model = model.get_value_operator()
     advantage = TD0Estimator(
-        cfg.gamma,
+        gamma=cfg.gamma,
         value_network=critic_model,
         average_rewards=True,
         differentiable=True,
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index a6081a71b8c..f3354d59a34 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -46,10 +46,12 @@ class ValueEstimatorBase(nn.Module):
 
     value_network: Union[TensorDictModule, Callable]
     value_key: Union[Tuple[str], str]
-    DIFF_DEPREC_MSG = "differentiable=False will soon be deprecated and all value computations will be made" \
-                      "differentiable. " \
-                      "Consider using differentiable=True and " \
-                      "decorate your function with `torch.no_grad()` or pass detached functional parameters."
+    DIFF_DEPREC_MSG = (
+        "differentiable=False will soon be deprecated and all value computations will be made"
+        "differentiable. "
+        "Consider using differentiable=True and "
+        "decorate your function with `torch.no_grad()` or pass detached functional parameters."
+    )
 
     @abc.abstractmethod
     def forward(

From 6cdcc8e24b50673583488d5971b56c26625bff52 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 16:20:18 +0100
Subject: [PATCH 28/89] amend

---
 examples/ppo/ppo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/ppo/ppo.py b/examples/ppo/ppo.py
index e7ce860f173..0a7d8b60315 100644
--- a/examples/ppo/ppo.py
+++ b/examples/ppo/ppo.py
@@ -164,8 +164,8 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
     critic_model = model.get_value_operator()
     advantage = GAE(
-        cfg.gamma,
-        cfg.lmbda,
+        gamma=cfg.gamma,
+        lmbda=cfg.lmbda,
         value_network=critic_model,
         average_gae=True,
         differentiable=True,

From b47dee21b1206e74178e5b642c7bd1bae9747e1f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 16:53:41 +0100
Subject: [PATCH 29/89] amend

---
 torchrl/objectives/value/advantages.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index f3354d59a34..0f7c0a2feea 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -16,6 +16,7 @@
 
 from torchrl.objectives.utils import hold_out_net
 from torchrl.objectives.value.functional import (
+    td0_return_estimate,
     td_lambda_return_estimate,
     vec_generalized_advantage_estimate,
     vec_td1_return_estimate,
@@ -302,7 +303,9 @@ def value_estimate(
         next_value = step_td.get(self.value_key)
 
         done = tensordict.get(("next", "done"))
-        value_target = reward + gamma * (1 - done.to(reward.dtype)) * next_value
+        value_target = td0_return_estimate(
+            gamma=gamma, next_state_value=next_value, reward=reward, done=done
+        )
         return value_target
 
 
From e9bb239a33c8ad7a254e1c5d9078525d86bd4b25 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 17:31:13 +0100
Subject: [PATCH 30/89] amend

---
 docs/source/reference/objectives.rst      |   6 +-
 tutorials/sphinx-tutorials/coding_ddpg.py | 149 ++++++++++++++--------
 2 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index ba91adc2f5e..1eb9d17bb16 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -16,13 +16,15 @@ The main characteristics of TorchRL losses are:
   method will receive a tensordict as input that contains all the necessary
   information to return a loss value.
 - They output a :class:`tensordict.TensorDict` instance with the loss values
-  written under a ``"loss_<smth>`` where ``smth`` is a string describing the
+  written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
   loss. Additional keys in the tensordict may be useful metrics to log during
   training time.
   .. note::
     The reason we return independent losses is to let the user use a different
     optimizer for different sets of parameters for instance. Summing the losses
-    can be simply done via ``sum(loss for key, loss in loss_vals.items() if key.startswith("loss_")``.
+    can be simply done via
+
+        >>> loss_val = sum(loss for key, loss in loss_vals.items() if key.startswith("loss_"))
 
 Training value functions
 ------------------------
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index c35bd87c41e..b39596914e7 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -25,7 +25,8 @@
 #
 # Key learnings:
 #
-# - how to build an environment in TorchRL, including transforms
+# - how to write a loss module and customize its value estimator;
+# - how to build an environment in torchrl, including transforms
 #   (e.g. data normalization) and parallel execution;
 # - how to design a policy and value network;
 # - how to collect data from your environment efficiently and store them
@@ -34,18 +35,17 @@
 # - and finally how to evaluate your model.
 #
 # This tutorial assumes that you have completed the PPO tutorial which gives
-# an overview of the TorchRL components.
-#
-#
-# This tutorial assumes the reader is familiar with some of TorchRL primitives,
-# such as :class:`tensordict.TensorDict` and
-# :class:`tensordict.nn.TensorDictModules`, although it should be
+# an overview of the torchrl components and dependencies, such as
+# :class:`tensordict.TensorDict` and :class:`tensordict.nn.TensorDictModules`,
+# although it should be
 # sufficiently transparent to be understood without a deep understanding of
 # these classes.
 #
-# We do not aim at giving a SOTA implementation of the algorithm, but rather
-# to provide a high-level illustration of TorchRL features in the context of
-# this algorithm.
+# .. note::
+#   We do not aim at giving a SOTA implementation of the algorithm, but rather
+#   to provide a high-level illustration of torchrl's loss implementations
+#   and the library features that are to be used in the context of
+#   this algorithm.
 #
 # Imports
 # -------
@@ -100,56 +100,90 @@
 from torchrl.trainers import Recorder
 
 ###############################################################################
-# TorchRL LossModule
-# ------------------
+# torchrl :class:`torchrl.objectives.LossModule`
+# ----------------------------------------------
+#
+# TorchRL provides a series of losses to use in your training scripts.
+# The aim is to have losses that are easily reusable/swappable and that have
+# a simple signature.
+#
+# The main characteristics of TorchRL losses are:
+#
+# - they are stateful objects: they contain a copy of the trainable parameters
+#   such that ``loss_module.parameters()`` gives whatever is needed to train the
+#   algorithm.
+# - They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
+#   method will receive a tensordict as input that contains all the necessary
+#   information to return a loss value.
+#
+#       >>> data = replay_buffer.sample()
+#       >>> loss_dict = loss_module(data)
+#
+# - They output a :class:`tensordict.TensorDict` instance with the loss values
+#   written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
+#   loss. Additional keys in the tensordict may be useful metrics to log during
+#   training time.
+#   .. note::
+#     The reason we return independent losses is to let the user use a different
+#     optimizer for different sets of parameters for instance. Summing the losses
+#     can be simply done via
+#
+#       >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
 #
 # The ``__init__`` method
 # ~~~~~~~~~~~~~~~~~~~~~~~
 #
 # The parent class of all losses is :class:`torchrl.objectives.LossModule`.
-# As many other components of the library, its :meth:`__call__` method expects
-# as input a :class:`tensordict.TensorDict` instance sampled from an expenrience
-# replay buffer. Using this format makes it possible to re-use the module across
+# As many other components of the library, its :meth:`torchrl.objectives.LossModule.forward` method expects
+# as input a :class:`tensordict.TensorDict` instance sampled from an experience
+# replay buffer, or any similar data structure. Using this format makes it
+# possible to re-use the module across
 # modalities, or in complex settings where the model needs to read multiple
-# entries for instance.
+# entries for instance. In other words, it allows us to code a loss module that
+# is oblivious to the data type that is being given to is and that focuses on
+# running the elementary steps of the loss function and only those.
 #
 # To keep the tutorial as didactic as we can, we'll be displaying each method
-# of the class independently and we'll be populating the class at a later stage.
+# of the class independently and we'll be populating the class at a later
+# stage.
 #
-# Let us start with the :meth:`__init__` method. DDPG aims at a simple goal:
+# Let us start with the :meth:`torchrl.objectives.LossModule.__init__`
+# method. DDPG aims at solving a control task with a simple strategy:
 # training a policy to output actions that maximise the value predicted by
 # a value network. Hence, our loss module needs to receive two networks in its
 # constructor: an actor and a value networks. We expect both of these to be
-# tensordict-compatible objects, such as :class:`tensordict.nn.TensorDictModule`.
+# tensordict-compatible objects, such as
+# :class:`tensordict.nn.TensorDictModule`.
+# Our loss function will need to compute a target value and fit the value
+# network to this, and generate an action and fit the policy such that its
+# value estimate is maximised.
 #
 # The crucial step of the :meth:`LossModule.__init__` method is the call to
-# :meth:`LossModule.convert_to_functional`. This method will extract the
-# parameters from the module and convert it to a functional module.
+# :meth:`torchrl.LossModule.convert_to_functional`. This method will extract
+# the parameters from the module and convert it to a functional module.
+# Strictly speaking, this is not necessary and one may perfectly code all
+# the losses without it. However, we encourage its usage for the following
+# reason.
+#
 # The reason TorchRL does this is that RL algorithms often execute the same
-# model with different sets of parameters, called "trainable" and "target" parameters.
+# model with different sets of parameters, called "trainable" and "target"
+# parameters.
 # The "trainable" parameters are those that the optimizer needs to fit. The
 # "target" parameters are usually a copy of the formers with some time lag
-# (absolute or diluted through a moving average). These target parameters
-# are used to compute the value associated with the next observation.
-# One the advantages of using a set of target parameters for the value model
-# that do not match exactly the current configuration is that they provide
-# a pessimistic bound on the value function being computed.
+# (absolute or diluted through a moving average).
+# These target parameters are used to compute the value associated with the
+# next observation. One the advantages of using a set of target parameters
+# for the value model that do not match exactly the current configuration is
+# that they provide a pessimistic bound on the value function being computed.
 # Pay attention to the ``create_target_params`` keyword argument below: this
 # argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
 # method to create a set of target parameters in the loss module to be used
 # for target value computation. If this is set to ``False`` (see the actor network
 # for instance) the ``target_actor_network_params`` attribute will still be
-# accessible but this will just return a detached version of the actor parameters.
+# accessible but this will just return a **detached** version of the
+# actor parameters.
 #
-# Later, we will see how the target parameters should be updated in TorchRL.
-#
-# We also incorporate an advantage module. This will be used to compute the
-# next state value using our value network. We'll see later in this tutorial
-# how various advantage modules can be used. If none is provided, we'll
-# be using the TD(lambda) method, which is usually preferable to TD(0).
-# Notice that this choice makes it necessary that the tensordict provided
-# has its last dimension representing the time span of the experiment (ie
-# our replay buffer must be populated using non-flatten data).
+# Later, we will see how the target parameters should be updated in torchrl.
 #
 
 
@@ -157,7 +191,6 @@ def _init(
     self,
     actor_network: TensorDictModule,
     value_network: TensorDictModule,
-    advantage="td(lambda)",
 ) -> None:
     super(type(self), self).__init__()
 
@@ -178,24 +211,29 @@ def _init(
     # Since the value we'll be using is based on the actor and value network,
     # we put them together in a single actor-critic container.
     actor_critic = ActorCriticWrapper(actor_network, value_network)
-    if advantage == "td(lambda)":
-        advantage_module = TDLambdaEstimate(
-            gamma=0.99,
-            lmbda=0.95,
-            value_network=actor_critic,
-            value_key="state_action_value",
-        )
-    elif advantage == "td(0)":
-        advantage_module = TDEstimate(
-            gamma=0.99, value_network=actor_critic, value_key="state_action_value"
-        )
-    else:
-        raise NotImplementedError("advantage must be one of 'td(lambda)' or 'td(0)'.")
-    self.advantage = advantage
-    self.advantage_module = advantage_module
-
     self.loss_funtion = "l2"
 
+###############################################################################
+# The value estimator loss method
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In many RL algorithm, the value network (or Q-value network) is trained based
+# on an empirical value estimate. This can be bootstrapped (TD(0), low
+# variance, high bias), meaning
+# that the target value is obtained using the next reward and nothing else, or
+# a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
+# sequence of upcoming rewards will be used (high variance, low bias). An
+# intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
+# bias and variance.
+# TorchRL makes it easy to use one or the other estimator via the
+# :class:`torchrl.objectives.utils.ValueEstimators` Enum class, which contains
+# pointers to all the value estimators implemented. Let us define the default
+# value function here. We will take the simplest version (TD(0)), and show later
+# on how this can be changed.
+
+from torchrl.objectives.utils import ValueEstimators
+
+default_value_estimator = ValueEstimators.TD0
 
 ###############################################################################
 # The actor loss method
@@ -321,6 +359,7 @@ def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
 
 
 class DDPGLoss(LossModule):
+    default_value_estimator = default_value_estimator
     __init__ = _init
     forward = _forward
     loss_value = _loss_value
@@ -563,7 +602,7 @@ def make_t_env():
 # value network, trained to estimate the value of a state-action pair, and a
 # parametric actor that learns how to select actions that maximize this value.
 #
-# Recall that building a torchrl module requires two steps:
+# Recall that building a TorchRL module requires two steps:
 #
 # - writing the :class:`torch.nn.Module` that will be used as network,
 # - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the

From 34469f2f45ab1b3ec47aeb4e51a46b7fbfa066a5 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 17:34:10 +0100
Subject: [PATCH 31/89] differentiable=True

---
 torchrl/objectives/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 250087a34a4..3daf5e70876 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -53,13 +53,13 @@ def default_value_kwargs(value_type: ValueEstimators):
 
     """
     if value_type == ValueEstimators.TD1:
-        return {"gamma": 0.99}
+        return {"gamma": 0.99, "differentiable": True}
     elif value_type == ValueEstimators.TD0:
-        return {"gamma": 0.99}
+        return {"gamma": 0.99, "differentiable": True}
     elif value_type == ValueEstimators.GAE:
-        return {"gamma": 0.99, "lmbda": 0.95}
+        return {"gamma": 0.99, "lmbda": 0.95, "differentiable": True}
     elif value_type == ValueEstimators.TDLambda:
-        return {"gamma": 0.99, "lmbda": 0.95}
+        return {"gamma": 0.99, "lmbda": 0.95, "differentiable": True}
     else:
         raise NotImplementedError(f"Unknown value type {value_type}.")
 

From aae2bbe6002dde8034d3378c4a344490978bdf58 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 17:37:20 +0100
Subject: [PATCH 32/89] differentiable=True

---
 torchrl/objectives/a2c.py       | 2 +-
 torchrl/objectives/ppo.py       | 6 +++---
 torchrl/objectives/reinforce.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/torchrl/objectives/a2c.py b/torchrl/objectives/a2c.py
index 4b7c40c56c2..2ac6ba7a4ec 100644
--- a/torchrl/objectives/a2c.py
+++ b/torchrl/objectives/a2c.py
@@ -155,7 +155,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         if advantage is None:
             self.value_estimator(
                 tensordict,
-                params=self.critic_params,
+                params=self.critic_params.detach(),
                 target_params=self.target_critic_params,
             )
             advantage = tensordict.get(self.advantage_key)
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
index 638174d21a6..2b4e115d35b 100644
--- a/torchrl/objectives/ppo.py
+++ b/torchrl/objectives/ppo.py
@@ -192,7 +192,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         if advantage is None:
             self.value_estimator(
                 tensordict,
-                params=self.critic_params,
+                params=self.critic_params.detach(),
                 target_params=self.target_critic_params,
             )
             advantage = tensordict.get(self.advantage_key)
@@ -342,7 +342,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         if advantage is None:
             self.value_estimator(
                 tensordict,
-                params=self.critic_params,
+                params=self.critic_params.detach(),
                 target_params=self.target_critic_params,
             )
             advantage = tensordict.get(self.advantage_key)
@@ -509,7 +509,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDict:
         if advantage is None:
             self.value_estimator(
                 tensordict,
-                params=self.critic_params,
+                params=self.critic_params.detach(),
                 target_params=self.target_critic_params,
             )
             advantage = tensordict.get(self.advantage_key)
diff --git a/torchrl/objectives/reinforce.py b/torchrl/objectives/reinforce.py
index baa0a4c2ae8..21f8e3c40db 100644
--- a/torchrl/objectives/reinforce.py
+++ b/torchrl/objectives/reinforce.py
@@ -110,7 +110,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         if advantage is None:
             self.value_estimator(
                 tensordict,
-                params=self.critic_params,
+                params=self.critic_params.detach(),
                 target_params=self.target_critic_params,
             )
             advantage = tensordict.get(self.advantage_key)

From c9c106baf162f1f48518d1046dfeb5eeca4a4d16 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 21:28:10 +0100
Subject: [PATCH 33/89] amend

---
 torchrl/data/__init__.py                  |   2 +-
 torchrl/trainers/trainers.py              |  44 +-
 tutorials/sphinx-tutorials/coding_ddpg.py |  82 ++-
 tutorials/sphinx-tutorials/coding_dqn.py  | 703 ++++------------------
 4 files changed, 218 insertions(+), 613 deletions(-)

diff --git a/torchrl/data/__init__.py b/torchrl/data/__init__.py
index 6608b49cade..fa26ce0c6a9 100644
--- a/torchrl/data/__init__.py
+++ b/torchrl/data/__init__.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from . import datasets
 from .postprocs import MultiStep
 from .replay_buffers import (
     LazyMemmapStorage,
@@ -30,4 +31,3 @@
     UnboundedContinuousTensorSpec,
     UnboundedDiscreteTensorSpec,
 )
-from . import datasets
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 4f040696271..1608f853ad4 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -601,8 +601,10 @@ class ReplayBufferTrainer(TrainerHookBase):
 
     Args:
         replay_buffer (TensorDictReplayBuffer): replay buffer to be used.
-        batch_size (int): batch size when sampling data from the
-            latest collection or from the replay buffer.
+        batch_size (int, optional): batch size when sampling data from the
+            latest collection or from the replay buffer. If none is provided,
+            the replay buffer batch-size will be used (preferred option for
+            unchanged batch-sizes).
         memmap (bool, optional): if ``True``, a memmap tensordict is created.
             Default is False.
         device (device, optional): device where the samples must be placed.
@@ -630,7 +632,7 @@ class ReplayBufferTrainer(TrainerHookBase):
     def __init__(
         self,
         replay_buffer: TensorDictReplayBuffer,
-        batch_size: int,
+        batch_size: Optional[int] = None,
         memmap: bool = False,
         device: DEVICE_TYPING = "cpu",
         flatten_tensordicts: bool = True,
@@ -640,6 +642,12 @@ def __init__(
         self.batch_size = batch_size
         self.memmap = memmap
         self.device = device
+        if flatten_tensordicts:
+            warnings.warn(
+                "flatten_tensordicts default value will soon be changed "
+                "to False for a faster execution. Make sure your "
+                "code is robust to this change."
+            )
         self.flatten_tensordicts = flatten_tensordicts
         self.max_dims = max_dims
 
@@ -668,7 +676,7 @@ def extend(self, batch: TensorDictBase) -> TensorDictBase:
         self.replay_buffer.extend(batch)
 
     def sample(self, batch: TensorDictBase) -> TensorDictBase:
-        sample = self.replay_buffer.sample(self.batch_size)
+        sample = self.replay_buffer.sample(batch_size=self.batch_size)
         return sample.to(self.device, non_blocking=True)
 
     def update_priority(self, batch: TensorDictBase) -> None:
@@ -1094,7 +1102,7 @@ def register(self, trainer: Trainer, name: str = "batch_subsampler"):
 
 
 class Recorder(TrainerHookBase):
-    """Recorder hook for Trainer.
+    """Recorder hook for :class:`torchrl.trainers.Trainer`.
 
     Args:
         record_interval (int): total number of optimisation steps
@@ -1118,33 +1126,45 @@ class Recorder(TrainerHookBase):
             the performance of the policy, it should be possible to turn off
             the explorative behaviour by calling the
             `set_exploration_mode('mode')` context manager.
-        recorder (EnvBase): An environment instance to be used
+        environment (EnvBase): An environment instance to be used
             for testing.
         exploration_mode (str, optional): exploration mode to use for the
             policy. By default, no exploration is used and the value used is
             "mode". Set to "random" to enable exploration
-        out_key (str, optional): reward key to set to the logger. Default is
-            `"reward_evaluation"`.
+        log_keys (sequence of str or tuples or str, optional): keys to read in the tensordict
+            for logging. Defaults to ``[("next", "reward")]``.
+        out_keys (Dict[str, str], optional): a dictionary mapping the ``log_keys``
+            to their name in the logs. Defaults to ``{("next", "reward"): "r_evaluation"}``.
         suffix (str, optional): suffix of the video to be recorded.
         log_pbar (bool, optional): if ``True``, the reward value will be logged on
             the progression bar. Default is `False`.
 
     """
 
+    ENV_DEPREC = (
+        "the environment should be passed under the 'environment' key"
+        " and not the 'recorder' key."
+    )
+
     def __init__(
         self,
+        *,
         record_interval: int,
         record_frames: int,
         frame_skip: int,
         policy_exploration: TensorDictModule,
-        recorder: EnvBase,
+        environment: EnvBase = None,
         exploration_mode: str = "random",
-        log_keys: Optional[List[str]] = None,
-        out_keys: Optional[Dict[str, str]] = None,
+        log_keys: Optional[List[Union[str, Tuple[str]]]] = None,
+        out_keys: Optional[Dict[Union[str, Tuple[str]], str]] = None,
         suffix: Optional[str] = None,
         log_pbar: bool = False,
+        recorder: EnvBase = None,
     ) -> None:
-
+        if environment is None and recorder is not None:
+            warnings.warn(self.ENV_DEPREC)
+        elif environment is not None and recorder is not None:
+            raise ValueError("environment and recorder conflict.")
         self.policy_exploration = policy_exploration
         self.recorder = recorder
         self.record_frames = record_frames
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index b39596914e7..0d1c353d472 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -56,7 +56,6 @@
 from typing import Tuple
 
 from torchrl.objectives import LossModule
-from torchrl.objectives.value import TDEstimate, TDLambdaEstimate
 
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
@@ -92,11 +91,7 @@
     OrnsteinUhlenbeckProcessWrapper,
     ValueOperator,
 )
-from torchrl.objectives.utils import (
-    distance_loss,
-    hold_out_params,
-    SoftUpdate,
-)
+from torchrl.objectives.utils import distance_loss, SoftUpdate
 from torchrl.trainers import Recorder
 
 ###############################################################################
@@ -211,8 +206,10 @@ def _init(
     # Since the value we'll be using is based on the actor and value network,
     # we put them together in a single actor-critic container.
     actor_critic = ActorCriticWrapper(actor_network, value_network)
+    self.actor_critic = actor_critic
     self.loss_funtion = "l2"
 
+
 ###############################################################################
 # The value estimator loss method
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -236,6 +233,45 @@ def _init(
 default_value_estimator = ValueEstimators.TD0
 
 ###############################################################################
+# We also need to give some instructions to DDPG on how to build the value
+# estimator, depending on the user query. Depending on the estimator provided,
+# we will build the corresponding module to be used at train time:
+
+from torchrl.objectives.utils import default_value_kwargs
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
+
+
+def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
+    hp = dict(default_value_kwargs(value_type))
+    if hasattr(self, "gamma"):
+        hp["gamma"] = self.gamma
+    hp.update(hyperparams)
+    value_key = "state_action_value"
+    if value_type == ValueEstimators.TD1:
+        self._value_estimator = TD1Estimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
+        )
+    elif value_type == ValueEstimators.TD0:
+        self._value_estimator = TD0Estimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
+        )
+    elif value_type == ValueEstimators.GAE:
+        raise NotImplementedError(
+            f"Value type {value_type} it not implemented for loss {type(self)}."
+        )
+    elif value_type == ValueEstimators.TDLambda:
+        self._value_estimator = TDLambdaEstimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
+        )
+    else:
+        raise NotImplementedError(f"Unknown value type {value_type}")
+
+
+###############################################################################
+# The ``make_value_estimator`` method can but does not need to be called: if
+# not, the :class:`torchrl.objectives.LossModule` will query this method with
+# its default estimator.
+#
 # The actor loss method
 # ~~~~~~~~~~~~~~~~~~~~~
 #
@@ -249,6 +285,8 @@ def _init(
 # For this, the :func:`torchrl.objectives.utils.hold_out_params` function
 # can be used.
 
+from torchrl.objectives.utils import hold_out_params
+
 
 def _loss_actor(
     self,
@@ -274,8 +312,8 @@ def _loss_actor(
 # ~~~~~~~~~~~~~~~~~~~~~
 #
 # We now need to optimize our value network parameters.
-# To do this, we will rely on the advantage module provided during
-# the loss construction.
+# To do this, we will rely on the value estimator of our class:
+#
 
 
 def _loss_value(
@@ -284,18 +322,12 @@ def _loss_value(
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     td_copy = tensordict.detach()
 
+    # V(s, a)
+    self.value_network(td_copy, params=self.value_network_params)
+    pred_val = td_copy.get("state_action_value").squeeze(-1)
+
     # we manually reconstruct the parameters of the actor-critic, where the first
     # set of parameters belongs to the actor and the second to the value function.
-    params = TensorDict(
-        {
-            "module": {
-                "0": self.actor_network_params.detach(),
-                "1": self.value_network_params,
-            }
-        },
-        batch_size=self.target_actor_network_params.batch_size,
-        device=self.target_actor_network_params.device,
-    )
     target_params = TensorDict(
         {
             "module": {
@@ -306,14 +338,16 @@ def _loss_value(
         batch_size=self.target_actor_network_params.batch_size,
         device=self.target_actor_network_params.device,
     )
-    with set_exploration_mode("mode"):
-        self.advantage_module(td_copy, params=params, target_params=target_params)
-        target_value = td_copy.get(self.advantage_module.value_target_key)
-    pred_val = td_copy.get("state_action_value")
+    with set_exploration_mode("mode"):  # we make sure that no exploration is performed
+        target_value = self.value_estimator.value_estimate(
+            tensordict, target_params=target_params
+        ).squeeze(-1)
+
     # td_error = pred_val - target_value
     loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
+    td_error = (pred_val - target_value).pow(2)
 
-    return loss_value, (pred_val - target_value).pow(2), pred_val, target_value
+    return loss_value, td_error, pred_val, target_value
 
 
 ###############################################################################
@@ -874,6 +908,8 @@ def make_replay_buffer(buffer_size, prefetch=3):
 # updater.
 #
 loss_module = DDPGLoss(actor, qnet)
+# let's use the TD(lambda) estimator!
+loss_module.make_value_estimator(ValueEstimators.TDLambda)
 target_net_updater = SoftUpdate(loss_module, eps=0.98)
 target_net_updater.init_()
 
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 4124d87a492..9313e6e8b08 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -26,7 +26,8 @@
 # The trainer is fully customisable and offers a large set of functionalities.
 # The tutorial is organised around its construction.
 # We will be detailing how to build each of the components of the library first,
-# and then put the pieces together using the `torchrl.trainers.Trainer` class.
+# and then put the pieces together using the :class:`torchrl.trainers.Trainer`
+# class.
 #
 # Along the road, we will also focus on some other aspects of the library:
 #
@@ -86,7 +87,8 @@
 import warnings
 
 from torchrl.objectives import DQNLoss, SoftUpdate
-from torchrl.trainers import Trainer, ReplayBufferTrainer, UpdateWeights
+from torchrl.record.loggers.csv import CSVLogger
+from torchrl.trainers import Recorder, ReplayBufferTrainer, Trainer, UpdateWeights
 
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
@@ -177,7 +179,10 @@ def is_notebook() -> bool:
 #
 
 
-def make_env(parallel=False, obs_norm_sd=None, ):
+def make_env(
+    parallel=False,
+    obs_norm_sd=None,
+):
     if obs_norm_sd is None:
         obs_norm_sd = {"standard_normal": True}
     if parallel:
@@ -185,13 +190,19 @@ def make_env(parallel=False, obs_norm_sd=None, ):
             num_workers,
             EnvCreator(
                 lambda: GymEnv(
-                    "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
+                    "CartPole-v1",
+                    from_pixels=True,
+                    pixels_only=True,
+                    device=device,
                 )
             ),
         )
     else:
         base_env = GymEnv(
-            "CartPole-v1", from_pixels=True, pixels_only=True, device=device,
+            "CartPole-v1",
+            from_pixels=True,
+            pixels_only=True,
+            device=device,
         )
 
     env = TransformedEnv(
@@ -222,17 +233,19 @@ def make_env(parallel=False, obs_norm_sd=None, ):
 # not all dimensions disappear in the process:
 #
 
+
 def get_norm_stats():
     test_env = make_env()
     test_env.transform[-1].init_stats(
-    num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
-)
+        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+    )
     obs_norm_sd = test_env.transform[-1].state_dict()
     # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
     # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
     print(obs_norm_sd)
     return obs_norm_sd
 
+
 ###############################################################################
 # Building the model (Deep Q-network)
 # -----------------------------------
@@ -255,6 +268,7 @@ def get_norm_stats():
 # in the input :class:`tensordict.TensorDict`.
 #
 
+
 def make_model(dummy_env):
     cnn_kwargs = {
         "num_cells": [32, 64, 64],
@@ -317,13 +331,16 @@ def make_model(dummy_env):
 # The only requirement of this storage is that the data passed to it at write
 # time must always have the same shape.
 
-def get_replay_buffer(buffer_size, n_optim):
+
+def get_replay_buffer(buffer_size, n_optim, batch_size):
     replay_buffer = TensorDictReplayBuffer(
+        batch_size=batch_size,
         storage=LazyMemmapStorage(buffer_size),
         prefetch=n_optim,
     )
     return replay_buffer
 
+
 ###############################################################################
 # Data collector
 # ~~~~~~~~~~~~~~
@@ -354,12 +371,13 @@ def get_replay_buffer(buffer_size, n_optim):
 # out training loop must account for. For simplicity, we set the devices to
 # the same value for all sub-collectors.
 
-def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device):
+
+def get_collector(
+    obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device
+):
     data_collector = MultiaSyncDataCollector(
         [
-            make_env(
-                parallel=True, obs_norm_sd=obs_norm_sd
-            ),
+            make_env(parallel=True, obs_norm_sd=obs_norm_sd),
         ]
         * num_collectors,
         policy=actor_explore,
@@ -375,6 +393,7 @@ def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch,
     )
     return data_collector
 
+
 ###############################################################################
 # Loss function
 # -------------
@@ -394,11 +413,13 @@ def get_collector(obs_norm_sd, num_collectors, actor_explore, frames_per_batch,
 # in similar algorithms.
 #
 
+
 def get_loss_module(actor, gamma):
     loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
     target_updater = SoftUpdate(loss_module)
     return loss_module, target_updater
 
+
 ###############################################################################
 # Hyperparameters
 # ---------------
@@ -475,7 +496,6 @@ def get_loss_module(actor, gamma):
 num_workers = 2  # 8
 num_collectors = 2  # 4
 
-
 ###############################################################################
 # Environment and exploration
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -524,582 +544,111 @@ def get_loss_module(actor, gamma):
 # Get model
 actor, actor_explore = make_model(test_env)
 loss_module, target_net_updater = get_loss_module(actor, gamma)
-collector = get_collector(stats, num_collectors, actor_explore, frames_per_batch, total_frames, device)
-optimizer = torch.optim.Adam(loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas)
+collector = get_collector(
+    stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
+)
+optimizer = torch.optim.Adam(
+    loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
+)
+logger = CSVLogger(exp_name="dqn_exp", log_dir="./")
+
 trainer = Trainer(
-            collector=collector,
+    collector=collector,
     total_frames=total_frames,
     frame_skip=1,
     loss_module=loss_module,
     optimizer=optimizer,
-    logger=None,
-    optim_steps_per_batch = n_optim,
+    logger=logger,
+    optim_steps_per_batch=n_optim,
 )
 
-buffer_hook = ReplayBufferTrainer(get_replay_buffer(buffer_size, n_optim))
+###############################################################################
+# Registering hooks
+# ~~~~~~~~~~~~~~~~~
+#
+# Registering hooks can be achieved in two separate ways:
+#
+# - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
+#   method is the first choice. One just needs to provide the trainer as input
+#   and the hook will be registered with a default name at a default location.
+#   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
+#   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
+#   can be cumbersome to implement.
+buffer_hook = ReplayBufferTrainer(
+    get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
+    flatten_tensordicts=False,
+)
 buffer_hook.register(trainer)
 weight_updater = UpdateWeights(collector, update_weights_interval=1)
 weight_updater.register(trainer)
+recorder = Recorder(
+    record_interval=100,  # log every 100 optimization steps
+    record_frames=10_000,  # maximum number of frames in the record
+    frame_skip=1,
+    policy_exploration=actor_explore,
+    environment=test_env,
+    exploration_mode="mode",
+    log_keys=[("next", "reward")],
+    out_keys={("next", "reward"): "rewards"},
+)
+recorder.register(trainer)
 
+###############################################################################
+# - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
+#   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
+#   In this case, a location must be explicitely passed (). This method gives
+#   more control over the location of the hook but it also requires more
+#   understanding of the Trainer mechanism.
+#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+#   for a detailed description of the trainer hooks.
+#
 trainer.register_op("post_optim", target_net_updater.step)
 
+###############################################################################
+# Here we are, ready to train our algorithm! A simple call to
+# ``trainer.train()`` and we'll be getting our results logged in.
+#
 trainer.train()
 
-# ###############################################################################
-# # We represent the parameters and targets as flat structures, but unflattening
-# # them is quite easy:
-#
-# params_flat = params.flatten_keys(".")
-#
-# ###############################################################################
-# # We will be using the adam optimizer:
-#
-# optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-#
-# ###############################################################################
-# # We create a test environment for evaluation of the policy:
-#
-# test_env = make_env(
-#     parallel=False, obs_norm_sd=obs_norm_sd
-# )
-# # sanity check:
-# print(actor_explore(test_env.reset()))
-#
-# ###############################################################################
-# # Training loop of a regular DQN
-# # ------------------------------
-# #
-# # We'll start with a simple implementation of DQN where the returns are
-# # computed without bootstrapping, i.e.
-# #
-# # .. math::
-# #
-# #       Q_{t}(s, a) = R(s, a) + \gamma * V_{t+1}(s)
-# #
-# # where :math:`Q(s, a)` is the Q-value of the current state-action pair,
-# # :math:`R(s, a)` is the result of the reward function, and :math:`V(s)` is a
-# # value function that returns 0 for terminating states.
-# #
-# # We store the logs in a defaultdict:
-#
-# logs_exp1 = defaultdict(list)
-# prev_traj_count = 0
-#
-# pbar = tqdm.tqdm(total=total_frames)
-# for j, data in enumerate(data_collector):
-#     current_frames = data.numel()
-#     pbar.update(current_frames)
-#     data = data.view(-1)
-#
-#     # We store the values on the replay buffer, after placing them on CPU.
-#     # When called for the first time, this will instantiate our storage
-#     # object which will print its content.
-#     replay_buffer.extend(data.cpu())
-#
-#     # some logging
-#     if len(logs_exp1["frames"]):
-#         logs_exp1["frames"].append(current_frames + logs_exp1["frames"][-1])
-#     else:
-#         logs_exp1["frames"].append(current_frames)
-#
-#     if data["next", "done"].any():
-#         done = data["next", "done"].squeeze(-1)
-#         logs_exp1["traj_lengths"].append(
-#             data["next", "step_count"][done].float().mean().item()
-#         )
-#
-#     # check that we have enough data to start training
-#     if sum(logs_exp1["frames"]) > init_random_frames:
-#         for _ in range(n_optim):
-#             # sample from the RB and send to device
-#             sampled_data = replay_buffer.sample(batch_size)
-#             sampled_data = sampled_data.to(device, non_blocking=True)
-#
-#             # collect data from RB
-#             reward = sampled_data["next", "reward"].squeeze(-1)
-#             done = sampled_data["next", "done"].squeeze(-1).to(reward.dtype)
-#             action = sampled_data["action"].clone()
-#
-#             # Compute action value (of the action actually taken) at time t
-#             # By default, TorchRL uses one-hot encodings for discrete actions
-#             sampled_data_out = sampled_data.select(*actor.in_keys)
-#             sampled_data_out = factor(sampled_data_out, params=params)
-#             action_value = sampled_data_out["action_value"]
-#             action_value = (action_value * action.to(action_value.dtype)).sum(-1)
-#             with torch.no_grad():
-#                 # compute best action value for the next step, using target parameters
-#                 tdstep = step_mdp(sampled_data)
-#                 next_value = factor(
-#                     tdstep.select(*actor.in_keys),
-#                     params=params_target,
-#                 )["chosen_action_value"].squeeze(-1)
-#                 exp_value = reward + gamma * next_value * (1 - done)
-#             assert exp_value.shape == action_value.shape
-#             # we use MSE loss but L1 or smooth L1 should also work
-#             error = nn.functional.mse_loss(exp_value, action_value).mean()
-#             error.backward()
-#
-#             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-#
-#             optim.step()
-#             optim.zero_grad()
-#
-#             # update of the target parameters
-#             params_target.apply(
-#                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-#                 params.detach(),
-#                 inplace=True,
-#             )
-#
-#         actor_explore.step(current_frames)
-#
-#         # Logging
-#         logs_exp1["grad_vals"].append(float(gv))
-#         logs_exp1["losses"].append(error.item())
-#         logs_exp1["values"].append(action_value.mean().item())
-#         logs_exp1["traj_count"].append(
-#             prev_traj_count + data["next", "done"].sum().item()
-#         )
-#         prev_traj_count = logs_exp1["traj_count"][-1]
-#
-#         if j % 10 == 0:
-#             with set_exploration_mode("mode"), torch.no_grad():
-#                 # execute a rollout. The `set_exploration_mode("mode")` has no effect here since the policy is deterministic, but we add it for completeness
-#                 eval_rollout = test_env.rollout(
-#                     max_steps=10000,
-#                     policy=actor,
-#                 ).cpu()
-#             logs_exp1["traj_lengths_eval"].append(eval_rollout.shape[-1])
-#             logs_exp1["evals"].append(eval_rollout["next", "reward"].sum().item())
-#             if len(logs_exp1["mavgs"]):
-#                 logs_exp1["mavgs"].append(
-#                     logs_exp1["evals"][-1] * 0.05 + logs_exp1["mavgs"][-1] * 0.95
-#                 )
-#             else:
-#                 logs_exp1["mavgs"].append(logs_exp1["evals"][-1])
-#             logs_exp1["traj_count_eval"].append(logs_exp1["traj_count"][-1])
-#             pbar.set_description(
-#                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp1['evals'][-1]: 4.4f}"
-#             )
-#
-#     # update policy weights
-#     data_collector.update_policy_weights_()
-#
-# ###############################################################################
-# # We write a custom plot function to display the performance of our algorithm
-# #
-#
-#
-# def plot(logs, name):
-#     plt.figure(figsize=(15, 10))
-#     plt.subplot(2, 3, 1)
-#     plt.plot(
-#         logs["frames"][-len(logs["evals"]) :],
-#         logs["evals"],
-#         label="return (eval)",
-#     )
-#     plt.plot(
-#         logs["frames"][-len(logs["mavgs"]) :],
-#         logs["mavgs"],
-#         label="mavg of returns (eval)",
-#     )
-#     plt.xlabel("frames collected")
-#     plt.ylabel("trajectory length (= return)")
-#     plt.subplot(2, 3, 2)
-#     plt.plot(
-#         logs["traj_count"][-len(logs["evals"]) :],
-#         logs["evals"],
-#         label="return",
-#     )
-#     plt.plot(
-#         logs["traj_count"][-len(logs["mavgs"]) :],
-#         logs["mavgs"],
-#         label="mavg",
-#     )
-#     plt.xlabel("trajectories collected")
-#     plt.legend()
-#     plt.subplot(2, 3, 3)
-#     plt.plot(logs["frames"][-len(logs["losses"]) :], logs["losses"])
-#     plt.xlabel("frames collected")
-#     plt.title("loss")
-#     plt.subplot(2, 3, 4)
-#     plt.plot(logs["frames"][-len(logs["values"]) :], logs["values"])
-#     plt.xlabel("frames collected")
-#     plt.title("value")
-#     plt.subplot(2, 3, 5)
-#     plt.plot(
-#         logs["frames"][-len(logs["grad_vals"]) :],
-#         logs["grad_vals"],
-#     )
-#     plt.xlabel("frames collected")
-#     plt.title("grad norm")
-#     if len(logs["traj_lengths"]):
-#         plt.subplot(2, 3, 6)
-#         plt.plot(logs["traj_lengths"])
-#         plt.xlabel("batches")
-#         plt.title("traj length (training)")
-#     plt.savefig(name)
-#     if is_notebook():
-#         plt.show()
-#
-#
-# ###############################################################################
-# # The performance of the policy can be measured as the length of trajectories.
-# # As we can see on the results of the :func:`plot` function, the performance
-# # of the policy increases, albeit slowly.
-# #
-# # .. code-block:: python
-# #
-# #    plot(logs_exp1, "dqn_td0.png")
-# #
-# # .. figure:: /_static/img/dqn_td0.png
-# #    :alt: Cart Pole results with TD(0)
-# #
-#
-# print("shutting down")
-# data_collector.shutdown()
-# del data_collector
-#
-# ###############################################################################
-# # DQN with TD(:math:`\lambda`)
-# # ----------------------------
-# #
-# # We can improve the above algorithm by getting a better estimate of the
-# # return, using not only the next state value but the whole sequence of rewards
-# # and values that follow a particular step.
-# #
-# # TorchRL provides a vectorized version of TD(lambda) named
-# # :func:`torchrl.objectives.value.functional.vec_td_lambda_advantage_estimate`.
-# # We'll use this to obtain a target value that the value network will be
-# # trained to match.
-# #
-# # The big difference in this implementation is that we'll store entire
-# # trajectories and not single steps in the replay buffer. This will be done
-# # automatically as long as we're not "flattening" the tensordict collected:
-# # by keeping a shape ``[Batch x timesteps]`` and giving this
-# # to the RB, we'll be creating a replay buffer of size
-# # ``[Capacity x timesteps]``.
-#
-#
-# from torchrl.objectives.value.functional import vec_td_lambda_advantage_estimate
-#
-# ###############################################################################
-# # We reset the actor parameters:
-# #
-#
-# (
-#     factor,
-#     actor,
-#     actor_explore,
-#     params,
-#     params_target,
-# ) = make_model(test_env)
-# params_flat = params.flatten_keys(".")
-#
-# optim = torch.optim.Adam(list(params_flat.values()), lr, betas=betas)
-# test_env = make_env(
-#     parallel=False, obs_norm_sd=obs_norm_sd
-# )
-# print(actor_explore(test_env.reset()))
-#
-# ###############################################################################
-# # Data: Replay buffer and collector
-# # ---------------------------------
-# #
-# # We need to build a new replay buffer of the appropriate size:
-# #
-#
-# max_size = frames_per_batch // num_workers
-#
-# replay_buffer = TensorDictReplayBuffer(
-#     storage=LazyMemmapStorage(-(-buffer_size // max_size)),
-#     prefetch=n_optim,
-# )
-#
-# data_collector = MultiaSyncDataCollector(
-#     [
-#         make_env(
-#             parallel=True, obs_norm_sd=obs_norm_sd
-#         ),
-#     ]
-#     * num_collectors,
-#     policy=actor_explore,
-#     frames_per_batch=frames_per_batch,
-#     total_frames=total_frames,
-#     exploration_mode="random",
-#     devices=[device] * num_collectors,
-#     storing_devices=[device] * num_collectors,
-#     # devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-#     # storing_devices=[f"cuda:{i}" for i in range(1, 1 + num_collectors)],
-#     split_trajs=False,
-# )
-#
-#
-# logs_exp2 = defaultdict(list)
-# prev_traj_count = 0
-#
-# ###############################################################################
-# # Training loop
-# # -------------
-# #
-# # There are very few differences with the training loop above:
-# #
-# # - The tensordict received by the collector is used as-is, without being
-# #   flattened (recall the ``data.view(-1)`` above), to keep the temporal
-# #   relation between consecutive steps.
-# # - We use :func:`vec_td_lambda_advantage_estimate` to compute the target
-# #   value.
-#
-# pbar = tqdm.tqdm(total=total_frames)
-# for j, data in enumerate(data_collector):
-#     current_frames = data.numel()
-#     pbar.update(current_frames)
-#
-#     replay_buffer.extend(data.cpu())
-#     if len(logs_exp2["frames"]):
-#         logs_exp2["frames"].append(current_frames + logs_exp2["frames"][-1])
-#     else:
-#         logs_exp2["frames"].append(current_frames)
-#
-#     if data["next", "done"].any():
-#         done = data["next", "done"].squeeze(-1)
-#         logs_exp2["traj_lengths"].append(
-#             data["next", "step_count"][done].float().mean().item()
-#         )
-#
-#     if sum(logs_exp2["frames"]) > init_random_frames:
-#         for _ in range(n_optim):
-#             sampled_data = replay_buffer.sample(batch_size // max_size)
-#             sampled_data = sampled_data.clone().to(device, non_blocking=True)
-#
-#             reward = sampled_data["next", "reward"]
-#             done = sampled_data["next", "done"].to(reward.dtype)
-#             action = sampled_data["action"].clone()
-#
-#             sampled_data_out = sampled_data.select(*actor.in_keys)
-#             sampled_data_out = vmap(factor, (0, None))(sampled_data_out, params)
-#             action_value = sampled_data_out["action_value"]
-#             action_value = (action_value * action.to(action_value.dtype)).sum(-1, True)
-#             with torch.no_grad():
-#                 tdstep = step_mdp(sampled_data)
-#                 next_value = vmap(factor, (0, None))(
-#                     tdstep.select(*actor.in_keys), params
-#                 )
-#                 next_value = next_value["chosen_action_value"]
-#             error = vec_td_lambda_advantage_estimate(
-#                 gamma,
-#                 lmbda,
-#                 action_value,
-#                 next_value,
-#                 reward,
-#                 done,
-#             ).pow(2)
-#             error = error.mean()
-#             error.backward()
-#
-#             gv = nn.utils.clip_grad_norm_(list(params_flat.values()), 1)
-#
-#             optim.step()
-#             optim.zero_grad()
-#
-#             # update of the target parameters
-#             params_target.apply(
-#                 lambda p_target, p_orig: p_orig * tau + p_target * (1 - tau),
-#                 params.detach(),
-#                 inplace=True,
-#             )
-#
-#         actor_explore.step(current_frames)
-#
-#         # Logging
-#         logs_exp2["grad_vals"].append(float(gv))
-#
-#         logs_exp2["losses"].append(error.item())
-#         logs_exp2["values"].append(action_value.mean().item())
-#         logs_exp2["traj_count"].append(
-#             prev_traj_count + data["next", "done"].sum().item()
-#         )
-#         prev_traj_count = logs_exp2["traj_count"][-1]
-#         if j % 10 == 0:
-#             with set_exploration_mode("mode"), torch.no_grad():
-#                 # execute a rollout. The `set_exploration_mode("mode")` has
-#                 # no effect here since the policy is deterministic, but we add
-#                 # it for completeness
-#                 eval_rollout = test_env.rollout(
-#                     max_steps=10000,
-#                     policy=actor,
-#                 ).cpu()
-#             logs_exp2["traj_lengths_eval"].append(eval_rollout.shape[-1])
-#             logs_exp2["evals"].append(eval_rollout["next", "reward"].sum().item())
-#             if len(logs_exp2["mavgs"]):
-#                 logs_exp2["mavgs"].append(
-#                     logs_exp2["evals"][-1] * 0.05 + logs_exp2["mavgs"][-1] * 0.95
-#                 )
-#             else:
-#                 logs_exp2["mavgs"].append(logs_exp2["evals"][-1])
-#             logs_exp2["traj_count_eval"].append(logs_exp2["traj_count"][-1])
-#             pbar.set_description(
-#                 f"error: {error: 4.4f}, value: {action_value.mean(): 4.4f}, test return: {logs_exp2['evals'][-1]: 4.4f}"
-#             )
-#
-#     # update policy weights
-#     data_collector.update_policy_weights_()
-#
-#
-# ###############################################################################
-# # TD(:math:`\lambda`) performs significantly better than TD(0) because it
-# # retrieves a much less biased estimate of the state-action value.
-# #
-# # .. code-block:: python
-# #
-# #    plot(logs_exp2, "dqn_tdlambda.png")
-# #
-# # .. figure:: /_static/img/dqn_tdlambda.png
-# #    :alt: Cart Pole results with TD(lambda)
-# #
-#
-#
-# print("shutting down")
-# data_collector.shutdown()
-# del data_collector
-#
-# ###############################################################################
-# # Let's compare the results on a single plot. Because the TD(lambda) version
-# # works better, we'll have fewer episodes collected for a given number of
-# # frames (as there are more frames per episode).
-# #
-# # **Note**: As already mentioned above, to get a more reasonable performance,
-# # use a greater value for ``total_frames`` e.g. 500000.
-#
-#
-# def plot_both():
-#     frames_td0 = logs_exp1["frames"]
-#     frames_tdlambda = logs_exp2["frames"]
-#     evals_td0 = logs_exp1["evals"]
-#     evals_tdlambda = logs_exp2["evals"]
-#     mavgs_td0 = logs_exp1["mavgs"]
-#     mavgs_tdlambda = logs_exp2["mavgs"]
-#     traj_count_td0 = logs_exp1["traj_count_eval"]
-#     traj_count_tdlambda = logs_exp2["traj_count_eval"]
-#
-#     plt.figure(figsize=(15, 10))
-#     plt.subplot(1, 2, 1)
-#     plt.plot(frames_td0[-len(evals_td0) :], evals_td0, label="return (td0)", alpha=0.5)
-#     plt.plot(
-#         frames_tdlambda[-len(evals_tdlambda) :],
-#         evals_tdlambda,
-#         label="return (td(lambda))",
-#         alpha=0.5,
-#     )
-#     plt.plot(frames_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-#     plt.plot(
-#         frames_tdlambda[-len(mavgs_tdlambda) :],
-#         mavgs_tdlambda,
-#         label="mavg (td(lambda))",
-#     )
-#     plt.xlabel("frames collected")
-#     plt.ylabel("trajectory length (= return)")
-#
-#     plt.subplot(1, 2, 2)
-#     plt.plot(
-#         traj_count_td0[-len(evals_td0) :],
-#         evals_td0,
-#         label="return (td0)",
-#         alpha=0.5,
-#     )
-#     plt.plot(
-#         traj_count_tdlambda[-len(evals_tdlambda) :],
-#         evals_tdlambda,
-#         label="return (td(lambda))",
-#         alpha=0.5,
-#     )
-#     plt.plot(traj_count_td0[-len(mavgs_td0) :], mavgs_td0, label="mavg (td0)")
-#     plt.plot(
-#         traj_count_tdlambda[-len(mavgs_tdlambda) :],
-#         mavgs_tdlambda,
-#         label="mavg (td(lambda))",
-#     )
-#     plt.xlabel("trajectories collected")
-#     plt.legend()
-#
-#     plt.savefig("dqn.png")
-#
-#
-# ###############################################################################
-# # .. code-block:: python
-# #
-# #    plot_both()
-# #
-# # .. figure:: /_static/img/dqn.png
-# #    :alt: Cart Pole results from the TD(:math:`lambda`) trained policy.
-# #
-# # Finally, we generate a new video to check what the algorithm has learnt.
-# # If all goes well, the duration should be significantly longer than with a
-# # random rollout.
-# #
-# # To get the raw pixels of the rollout, we insert a
-# # :class:`torchrl.envs.CatTensors` transform that precedes all others and copies
-# # the ``"pixels"`` key onto a ``"pixels_save"`` key. This is necessary because
-# # the other transforms that modify this key will update its value in-place in
-# # the output tensordict.
-# #
-#
-# test_env.transform.insert(0, CatTensors(["pixels"], "pixels_save", del_keys=False))
-# eval_rollout = test_env.rollout(max_steps=10000, policy=actor, auto_reset=True).cpu()
-#
-# # sphinx_gallery_start_ignore
-# import imageio
-#
-# imageio.mimwrite("cartpole.gif", eval_rollout["pixels_save"].numpy(), fps=30)
-# # sphinx_gallery_end_ignore
-#
-# del test_env
-#
-# ###############################################################################
-# # The video of the rollout can be saved using the imageio package:
-# #
-# # .. code-block::
-# #
-# #   import imageio
-# #   imageio.mimwrite('cartpole.mp4', eval_rollout["pixels_save"].numpy(), fps=30);
-# #
-# # .. figure:: /_static/img/cartpole.gif
-# #    :alt: Cart Pole results from the TD(:math:`\lambda`) trained policy.
-#
-# ###############################################################################
-# # Conclusion and possible improvements
-# # ------------------------------------
-# #
-# # In this tutorial we have learnt:
-# #
-# # - How to train a policy that read pixel-based states, what transforms to
-# #   include and how to normalize the data;
-# # - How to create a policy that picks up the action with the highest value
-# #   with :class:`torchrl.modules.QValueNetwork`;
-# # - How to build a multiprocessed data collector;
-# # - How to train a DQN with TD(:math:`\lambda`) returns.
-# #
-# # We have seen that using TD(:math:`\lambda`) greatly improved the performance
-# # of DQN. Other possible improvements could include:
-# #
-# # - Using the Multi-Step post-processing. Multi-step will project an action
-# #   to the nth following step, and create a discounted sum of the rewards in
-# #   between. This trick can make the algorithm noticebly less myopic. To use
-# #   this, simply create the collector with
-# #
-# #       from torchrl.data.postprocs.postprocs import MultiStep
-# #       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-# #
-# #   where ``n`` is the number of looking-forward steps. Pay attention to the
-# #   fact that the ``gamma`` factor has to be corrected by the number of
-# #   steps till the next observation when being passed to
-# #   ``vec_td_lambda_advantage_estimate``:
-# #
-# #       gamma = gamma ** tensordict["steps_to_next_obs"]
-# # - A prioritized replay buffer could also be used. This will give a
-# #   higher priority to samples that have the worst value accuracy.
-# # - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
-# #   for more information).
-# # - More fancy exploration techniques, such as NoisyLinear layers and such
-# #   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
-# #   ``MLP`` class used in our Dueling DQN).
+###############################################################################
+# We can now quickly check the CSVs with the results.
+
+# TODO
+
+###############################################################################
+# Conclusion and possible improvements
+# ------------------------------------
+#
+# In this tutorial we have learnt:
+#
+# - How to train a policy that read pixel-based states, what transforms to
+#   include and how to normalize the data;
+# - How to create a policy that picks up the action with the highest value
+#   with :class:`torchrl.modules.QValueNetwork`;
+# - How to build a multiprocessed data collector;
+# - How to train a DQN with TD(:math:`\lambda`) returns.
+#
+# We have seen that using TD(:math:`\lambda`) greatly improved the performance
+# of DQN. Other possible improvements could include:
+#
+# - Using the Multi-Step post-processing. Multi-step will project an action
+#   to the nth following step, and create a discounted sum of the rewards in
+#   between. This trick can make the algorithm noticebly less myopic. To use
+#   this, simply create the collector with
+#
+#       from torchrl.data.postprocs.postprocs import MultiStep
+#       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+#
+#   where ``n`` is the number of looking-forward steps. Pay attention to the
+#   fact that the ``gamma`` factor has to be corrected by the number of
+#   steps till the next observation when being passed to
+#   ``vec_td_lambda_advantage_estimate``:
+#
+#       gamma = gamma ** tensordict["steps_to_next_obs"]
+# - A prioritized replay buffer could also be used. This will give a
+#   higher priority to samples that have the worst value accuracy.
+# - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
+#   for more information).
+# - More fancy exploration techniques, such as NoisyLinear layers and such
+#   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
+#   ``MLP`` class used in our Dueling DQN).

From c957916cfcfe431e2edcf6c605bd57d2532a868d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 28 Mar 2023 21:33:34 +0100
Subject: [PATCH 34/89] fix trainer

---
 test/test_trainer.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/test/test_trainer.py b/test/test_trainer.py
index 1251d4edd48..8544efb6b6f 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -89,11 +89,10 @@ class MockingLossModule(nn.Module):
 
 def mocking_trainer(file=None, optimizer=_mocking_optim) -> Trainer:
     trainer = Trainer(
-        MockingCollector(),
-        *[
-            None,
-        ]
-        * 2,
+        collector=MockingCollector(),
+        total_frames=None,
+        frame_skip=None,
+        optim_steps_per_batch=None,
         loss_module=MockingLossModule(),
         optimizer=optimizer,
         save_trainer_file=file,

From 03007282a351f4c825af1cf8d0a783b1c5e9dca4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 08:55:36 +0100
Subject: [PATCH 35/89] no grad

---
 tutorials/sphinx-tutorials/coding_ppo.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py
index 77ed207837f..274269a3dac 100644
--- a/tutorials/sphinx-tutorials/coding_ppo.py
+++ b/tutorials/sphinx-tutorials/coding_ppo.py
@@ -602,7 +602,8 @@
         # We'll need an "advantage" signal to make PPO work.
         # We re-compute it at each epoch as its value depends on the value
         # network which is updated in the inner loop.
-        advantage_module(tensordict_data)
+        with torch.no_grad():
+            advantage_module(tensordict_data)
         data_view = tensordict_data.reshape(-1)
         replay_buffer.extend(data_view.cpu())
         for _ in range(frames_per_batch // sub_batch_size):

From 86915fe373a7e247a67d6af3a6fb870c0756e928 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 11:01:43 +0100
Subject: [PATCH 36/89] init

---
 torchrl/data/replay_buffers/replay_buffers.py | 426 +++++++++++++++---
 torchrl/data/replay_buffers/samplers.py       |   4 +
 2 files changed, 374 insertions(+), 56 deletions(-)

diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
index 0a20dc6dff7..1e824817b92 100644
--- a/torchrl/data/replay_buffers/replay_buffers.py
+++ b/torchrl/data/replay_buffers/replay_buffers.py
@@ -82,32 +82,80 @@ def decorated_fun(self, *args, **kwargs):
 class ReplayBuffer:
     """A generic, composable replay buffer class.
 
+    All arguments are keyword-only arguments.
+
     Args:
         storage (Storage, optional): the storage to be used. If none is provided
-            a default ListStorage with max_size of 1_000 will be created.
-        sampler (Sampler, optional): the sampler to be used. If none is provided
-            a default RandomSampler() will be used.
+            a default :class:`torchrl.data.replay_buffers.ListStorage` with
+            ``max_size`` of ``1_000`` will be created.
+        sampler (Sampler, optional): the sampler to be used. If none is provided,
+            a default :class:`torchrl.data.replay_buffers.RandomSampler`
+            will be used.
         writer (Writer, optional): the writer to be used. If none is provided
-            a default RoundRobinWriter() will be used.
+            a default :class:`torchrl.data.replay_buffers.RoundRobinWriter`
+            will be used.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
-            loading from a map-style dataset.
+            loading from a map-style dataset. The default value will be decided
+            based on the storage type.
         pin_memory (bool): whether pin_memory() should be called on the rb
             samples.
         prefetch (int, optional): number of next batches to be prefetched
-            using multithreading.
-        transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            using multithreading. Defaults to None (no prefetching).
+        transform (Transform, optional): Transform to be executed when
+            sample() is called.
+            To chain transforms use the :class:`torchrl.envs.Compose` class.
             Transforms should be used with :class:`tensordict.TensorDict`
             content. If used with other structures, the transforms should be
-            encoded with a `"data"` leading key that will be used to
+            encoded with a ``"data"`` leading key that will be used to
             construct a tensordict from the non-tensordict content.
-        batch_size (int, optional): the batch size to be used when sample() is called.
+        batch_size (int, optional): the batch size to be used when sample() is
+            called.
+            .. note::
+              The batch-size can be specified at construction time via the
+              ``batch_size`` argument, or at sampling time. The former should
+              be preferred whenever the batch-size is consistent across the
+              experiment. If the batch-size is likely to change, it can be
+              passed to the :meth:`~.sample` method. This option is
+              incompatible with prefetching (since this requires to know the
+              batch-size in advance) as well as with samplers that have a
+              ``drop_last`` argument.
 
+    Examples:
+        >>> import torch
+        >>>
+        >>> from torchrl.data import ReplayBuffer, ListStorage
+        >>>
+        >>> torch.manual_seed(0)
+        >>> rb = ReplayBuffer(
+        ...     storage=ListStorage(max_size=1000),
+        ...     batch_size=5,
+        ... )
+        >>> # populate the replay buffer
+        >>> data = range(10)
+        >>> rb.extend(data)
+        >>> # sample will return as many elements as specified in the constructor
+        >>> sample = rb.sample()
+        >>> print(sample)
+        tensor([4, 9, 3, 0, 3])
+        >>> # Passing the batch-size to the sample method overrides the one in the constructor
+        >>> sample = rb.sample(batch_size=3)
+        >>> print(sample)
+        tensor([9, 7, 3])
+        >>> # one cans sample using the ``sample`` method or iterate over the buffer
+        >>> for i, batch in enumerate(rb):
+        ...     print(i, batch)
+        ...     if i == 3:
+        ...         break
+        0 tensor([7, 3, 1, 6, 6])
+        1 tensor([9, 8, 6, 6, 8])
+        2 tensor([4, 3, 6, 9, 1])
+        3 tensor([4, 4, 1, 9, 9])
     """
 
     def __init__(
         self,
+        *,
         storage: Optional[Storage] = None,
         sampler: Optional[Sampler] = None,
         writer: Optional[Writer] = None,
@@ -147,10 +195,21 @@ def __init__(
         transform.eval()
         self._transform = transform
 
-        if batch_size is None:
-            warnings.warn(
-                "Constructing replay buffer without specifying behaviour is no longer "
-                "recommended, and will be deprecated in the future."
+        if batch_size is None and prefetch:
+            raise ValueError(
+                "Dynamic batch-size specification is incompatible "
+                "with multithreaded sampling. "
+                "When using prefetch, the batch-size must be specified in "
+                "advance. "
+            )
+        if (
+            batch_size is None
+            and hasattr(self._sampler, "drop_last")
+            and self._sampler.drop_last
+        ):
+            raise ValueError(
+                "Samplers with drop_last=True must work with a predictible batch-size. "
+                "Please pass the batch-size to the ReplayBuffer constructor."
             )
         self._batch_size = batch_size
 
@@ -247,6 +306,7 @@ def update_priority(
     def _sample(self, batch_size: int) -> Tuple[Any, dict]:
         with self._replay_lock:
             index, info = self._sampler.sample(self._storage, batch_size)
+            info["index"] = index
             data = self._storage[index]
         if not isinstance(index, INT_CLASSES):
             data = self._collate_fn(data)
@@ -279,17 +339,26 @@ def sample(
             A batch of data selected in the replay buffer.
             A tuple containing this batch and info if return_info flag is set to True.
         """
-        if batch_size is not None:
+        if (
+            batch_size is not None
+            and self._batch_size is not None
+            and batch_size != self._batch_size
+        ):
             warnings.warn(
-                "batch_size argument in sample has been deprecated. Set the batch_size "
-                "when constructing the replay buffer instead."
+                f"Got conflicting batch_sizes in constructor ({self._batch_size}) "
+                f"and `sample` ({batch_size}). Refer to the ReplayBuffer documentation "
+                "for a proper usage of the batch-size arguments. "
+                "The batch-size provided to the sample method "
+                "will prevail."
             )
-        elif self._batch_size is not None:
+        elif batch_size is None and self._batch_size is not None:
             batch_size = self._batch_size
-        else:
+        elif batch_size is None:
             raise RuntimeError(
                 "batch_size not specified. You can specify the batch_size when "
-                "constructing the replay buffer"
+                "constructing the replay buffer, or pass it to the sample method. "
+                "Refer to the ReplayBuffer documentation "
+                "for a proper usage of the batch-size arguments."
             )
         if not self._prefetch:
             ret = self._sample(batch_size)
@@ -336,9 +405,12 @@ def insert_transform(self, index: int, transform: "Transform") -> None:  # noqa-
         self._transform.insert(index, transform)
 
     def __iter__(self):
+        if self._sampler.ran_out:
+            self._sampler.ran_out = False
         if self._batch_size is None:
             raise RuntimeError(
-                "batch_size was not specified during construction of the replay buffer"
+                "Cannot iterate over the replay buffer. "
+                "Batch_size was not specified during construction of the replay buffer."
             )
         while not self._sampler.ran_out:
             data = self.sample()
@@ -348,6 +420,8 @@ def __iter__(self):
 class PrioritizedReplayBuffer(ReplayBuffer):
     """Prioritized replay buffer.
 
+    All arguments are keyword-only arguments.
+
     Presented in
         "Schaul, T.; Quan, J.; Antonoglou, I.; and Silver, D. 2015.
         Prioritized experience replay."
@@ -359,22 +433,75 @@ class PrioritizedReplayBuffer(ReplayBuffer):
         beta (float): importance sampling negative exponent.
         eps (float): delta added to the priorities to ensure that the buffer
             does not contain null priorities.
-        dtype (torch.dtype): type of the data. Can be torch.float or torch.double.
         storage (Storage, optional): the storage to be used. If none is provided
-            a default ListStorage with max_size of 1_000 will be created.
+            a default :class:`torchrl.data.replay_buffers.ListStorage` with
+            ``max_size`` of ``1_000`` will be created.
         collate_fn (callable, optional): merges a list of samples to form a
             mini-batch of Tensor(s)/outputs.  Used when using batched
-            loading from a map-style dataset.
+            loading from a map-style dataset. The default value will be decided
+            based on the storage type.
         pin_memory (bool): whether pin_memory() should be called on the rb
             samples.
         prefetch (int, optional): number of next batches to be prefetched
-            using multithreading.
-        transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            using multithreading. Defaults to None (no prefetching).
+        transform (Transform, optional): Transform to be executed when
+            sample() is called.
+            To chain transforms use the :class:`torchrl.envs.Compose` class.
+            Transforms should be used with :class:`tensordict.TensorDict`
+            content. If used with other structures, the transforms should be
+            encoded with a ``"data"`` leading key that will be used to
+            construct a tensordict from the non-tensordict content.
+        batch_size (int, optional): the batch size to be used when sample() is
+            called.
+            .. note::
+              The batch-size can be specified at construction time via the
+              ``batch_size`` argument, or at sampling time. The former should
+              be preferred whenever the batch-size is consistent across the
+              experiment. If the batch-size is likely to change, it can be
+              passed to the :meth:`~.sample` method. This option is
+              incompatible with prefetching (since this requires to know the
+              batch-size in advance) as well as with samplers that have a
+              ``drop_last`` argument.
+
+    .. note::
+        Generic prioritized replay buffers (ie. non-tensordict backed) require
+        calling :meth:`~.sample` with the ``return_info`` argument set to
+        ``True`` to have access to the indices, and hence update the priority.
+        Using :class:`tensordict.TensorDict` and the related
+        :class:`torchrl.data.TensorDictPrioritizedReplayBuffer` simplifies this
+        process.
+
+    Examples:
+        >>> import torch
+        >>>
+        >>> from torchrl.data import ListStorage, PrioritizedReplayBuffer
+        >>>
+        >>> torch.manual_seed(0)
+        >>>
+        >>> rb = PrioritizedReplayBuffer(alpha=0.7, beta=0.9, storage=ListStorage(10))
+        >>> data = range(10)
+        >>> rb.extend(data)
+        >>> sample = rb.sample(3)
+        >>> print(sample)
+        tensor([1, 0, 1])
+        >>> # get the info to find what the indices are
+        >>> sample, info = rb.sample(5, return_info=True)
+        >>> print(sample, info)
+        tensor([2, 7, 4, 3, 5]) {'_weight': array([1., 1., 1., 1., 1.], dtype=float32), 'index': array([2, 7, 4, 3, 5])}
+        >>> # update priority
+        >>> priority = torch.ones(5) * 5
+        >>> rb.update_priority(info["index"], priority)
+        >>> # and now a new sample, the weights should be updated
+        >>> sample, info = rb.sample(5, return_info=True)
+        >>> print(sample, info)
+        tensor([2, 5, 2, 2, 5]) {'_weight': array([0.36278465, 0.36278465, 0.36278465, 0.36278465, 0.36278465],
+              dtype=float32), 'index': array([2, 5, 2, 2, 5])}
+
     """
 
     def __init__(
         self,
+        *,
         alpha: float,
         beta: float,
         eps: float = 1e-8,
@@ -401,15 +528,114 @@ def __init__(
 
 
 class TensorDictReplayBuffer(ReplayBuffer):
-    """TensorDict-specific wrapper around the ReplayBuffer class.
+    """TensorDict-specific wrapper around the :class:`torchrl.data.ReplayBuffer` class.
+
+    All arguments are keyword-only arguments.
 
     Args:
-        priority_key (str): the key at which priority is assumed to be stored
-            within TensorDicts added to this ReplayBuffer.
+        storage (Storage, optional): the storage to be used. If none is provided
+            a default :class:`torchrl.data.replay_buffers.ListStorage` with
+            ``max_size`` of ``1_000`` will be created.
+        sampler (Sampler, optional): the sampler to be used. If none is provided
+            a default RandomSampler() will be used.
+        writer (Writer, optional): the writer to be used. If none is provided
+            a default :class:`torchrl.data.replay_buffers.RoundRobinWriter`
+            will be used.
+        collate_fn (callable, optional): merges a list of samples to form a
+            mini-batch of Tensor(s)/outputs.  Used when using batched
+            loading from a map-style dataset. The default value will be decided
+            based on the storage type.
+        pin_memory (bool): whether pin_memory() should be called on the rb
+            samples.
+        prefetch (int, optional): number of next batches to be prefetched
+            using multithreading. Defaults to None (no prefetching).
+        transform (Transform, optional): Transform to be executed when
+            sample() is called.
+            To chain transforms use the :class:`torchrl.envs.Compose` class.
+            Transforms should be used with :class:`tensordict.TensorDict`
+            content. If used with other structures, the transforms should be
+            encoded with a ``"data"`` leading key that will be used to
+            construct a tensordict from the non-tensordict content.
+        batch_size (int, optional): the batch size to be used when sample() is
+            called.
+            .. note::
+              The batch-size can be specified at construction time via the
+              ``batch_size`` argument, or at sampling time. The former should
+              be preferred whenever the batch-size is consistent across the
+              experiment. If the batch-size is likely to change, it can be
+              passed to the :meth:`~.sample` method. This option is
+              incompatible with prefetching (since this requires to know the
+              batch-size in advance) as well as with samplers that have a
+              ``drop_last`` argument.
+        priority_key (str, optional): the key at which priority is assumed to
+            be stored within TensorDicts added to this ReplayBuffer.
+            This is to be used when the sampler is of type
+            :class:`torchrl.data.PrioritizedSampler`.
+            Defaults to ``"td_error"``.
+
+    Examples:
+        >>> import torch
+        >>>
+        >>> from torchrl.data import LazyTensorStorage, TensorDictReplayBuffer
+        >>> from tensordict import TensorDict
+        >>>
+        >>> torch.manual_seed(0)
+        >>>
+        >>> rb = TensorDictReplayBuffer(storage=LazyTensorStorage(10), batch_size=5)
+        >>> data = TensorDict({"a": torch.ones(10, 3), ("b", "c"): torch.zeros(10, 1, 1)}, [10])
+        >>> rb.extend(data)
+        >>> sample = rb.sample(3)
+        >>> # samples keep track of the index
+        >>> print(sample)
+        TensorDict(
+            fields={
+                a: Tensor(shape=torch.Size([3, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                b: TensorDict(
+                    fields={
+                        c: Tensor(shape=torch.Size([3, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([3]),
+                    device=cpu,
+                    is_shared=False),
+                index: Tensor(shape=torch.Size([3]), device=cpu, dtype=torch.int32, is_shared=False)},
+            batch_size=torch.Size([3]),
+            device=cpu,
+            is_shared=False)
+        >>> # we can iterate over the buffer
+        >>> for i, data in enumerate(rb):
+        ...     print(i, data)
+        ...     if i == 2:
+        ...         break
+        0 TensorDict(
+            fields={
+                a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                b: TensorDict(
+                    fields={
+                        c: Tensor(shape=torch.Size([5, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([5]),
+                    device=cpu,
+                    is_shared=False),
+                index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int32, is_shared=False)},
+            batch_size=torch.Size([5]),
+            device=cpu,
+            is_shared=False)
+        1 TensorDict(
+            fields={
+                a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                b: TensorDict(
+                    fields={
+                        c: Tensor(shape=torch.Size([5, 1, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([5]),
+                    device=cpu,
+                    is_shared=False),
+                index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int32, is_shared=False)},
+            batch_size=torch.Size([5]),
+            device=cpu,
+            is_shared=False)
+
     """
 
-    def __init__(self, *args, priority_key: str = "td_error", **kw) -> None:
-        super().__init__(*args, **kw)
+    def __init__(self, *, priority_key: str = "td_error", **kw) -> None:
+        super().__init__(**kw)
         self.priority_key = priority_key
 
     def _get_priority(self, tensordict: TensorDictBase) -> Optional[torch.Tensor]:
@@ -498,8 +724,8 @@ def update_tensordict_priority(self, data: TensorDictBase) -> None:
     def sample(
         self,
         batch_size: Optional[int] = None,
-        include_info: bool = False,
         return_info: bool = False,
+        include_info: bool = None,
     ) -> TensorDictBase:
         """Samples a batch of data from the replay buffer.
 
@@ -509,7 +735,6 @@ def sample(
             batch_size (int, optional): size of data to be collected. If none
                 is provided, this method will sample a batch-size as indicated
                 by the sampler.
-            include_info (bool): whether to add info to the returned tensordict.
             return_info (bool): whether to return info. If True, the result
                 is a tuple (data, info). If False, the result is the data.
 
@@ -517,10 +742,18 @@ def sample(
             A tensordict containing a batch of data selected in the replay buffer.
             A tuple containing this tensordict and info if return_info flag is set to True.
         """
+        if include_info is not None:
+            warnings.warn(
+                "include_info is going to be deprecated soon."
+                "The default behaviour has changed to `include_info=True` "
+                "to avoid bugs linked to wrongly preassigned values in the "
+                "output tensordict."
+            )
+
         data, info = super().sample(batch_size, return_info=True)
-        if include_info:
+        if include_info in (True, None):
             for k, v in info.items():
-                data.set(k, torch.tensor(v, device=data.device), inplace=True)
+                data.set(k, torch.tensor(v, device=data.device))
         if "_batch_size" in data.keys():
             # we need to reset the batch-size
             shape = data.pop("_batch_size")
@@ -539,40 +772,119 @@ def sample(
 
 
 class TensorDictPrioritizedReplayBuffer(TensorDictReplayBuffer):
-    """TensorDict-specific wrapper around the PrioritizedReplayBuffer class.
+    """TensorDict-specific wrapper around the :class:`torchrl.data.PrioritizedReplayBuffer` class.
 
-    This class returns tensordicts with a new key "index" that represents
+    All arguments are keyword-only arguments.
+
+    This class returns tensordicts with a new key ``"index"`` that represents
     the index of each element in the replay buffer. It also provides the
-    'update_tensordict_priority' method that only requires for the
+    :meth:`~.update_tensordict_priority` method that only requires for the
     tensordict to be passed to it with its new priority value.
 
     Args:
-        alpha (float): exponent α determines how much prioritization is
-            used, with α = 0 corresponding to the uniform case.
+        alpha (float): exponent α determines how much prioritization is used,
+            with α = 0 corresponding to the uniform case.
         beta (float): importance sampling negative exponent.
-        priority_key (str, optional): key where the priority value can be
-            found in the stored tensordicts. Default is :obj:`"td_error"`
-        eps (float, optional): delta added to the priorities to ensure that the
-            buffer does not contain null priorities.
-        dtype (torch.dtype): type of the data. Can be torch.float or torch.double.
+        eps (float): delta added to the priorities to ensure that the buffer
+            does not contain null priorities.
         storage (Storage, optional): the storage to be used. If none is provided
-            a default ListStorage with max_size of 1_000 will be created.
+            a default :class:`torchrl.data.replay_buffers.ListStorage` with
+            ``max_size`` of ``1_000`` will be created.
         collate_fn (callable, optional): merges a list of samples to form a
-            mini-batch of Tensor(s)/outputs.  Used when using batched loading
-            from a map-style dataset.
-        pin_memory (bool, optional): whether pin_memory() should be called on
-            the rb samples. Default is ``False``.
+            mini-batch of Tensor(s)/outputs.  Used when using batched
+            loading from a map-style dataset. The default value will be decided
+            based on the storage type.
+        pin_memory (bool): whether pin_memory() should be called on the rb
+            samples.
         prefetch (int, optional): number of next batches to be prefetched
-            using multithreading.
-        transform (Transform, optional): Transform to be executed when sample() is called.
-            To chain transforms use the :obj:`Compose` class.
+            using multithreading. Defaults to None (no prefetching).
+        transform (Transform, optional): Transform to be executed when
+            sample() is called.
+            To chain transforms use the :class:`torchrl.envs.Compose` class.
+            Transforms should be used with :class:`tensordict.TensorDict`
+            content. If used with other structures, the transforms should be
+            encoded with a ``"data"`` leading key that will be used to
+            construct a tensordict from the non-tensordict content.
+        batch_size (int, optional): the batch size to be used when sample() is
+            called.
+            .. note::
+              The batch-size can be specified at construction time via the
+              ``batch_size`` argument, or at sampling time. The former should
+              be preferred whenever the batch-size is consistent across the
+              experiment. If the batch-size is likely to change, it can be
+              passed to the :meth:`~.sample` method. This option is
+              incompatible with prefetching (since this requires to know the
+              batch-size in advance) as well as with samplers that have a
+              ``drop_last`` argument.
+        priority_key (str, optional): the key at which priority is assumed to
+            be stored within TensorDicts added to this ReplayBuffer.
+            This is to be used when the sampler is of type
+            :class:`torchrl.data.PrioritizedSampler`.
+            Defaults to ``"td_error"``.
         reduction (str, optional): the reduction method for multidimensional
             tensordicts (ie stored trajectories). Can be one of "max", "min",
             "median" or "mean".
+
+    Examples:
+        >>> import torch
+        >>>
+        >>> from torchrl.data import LazyTensorStorage, TensorDictPrioritizedReplayBuffer
+        >>> from tensordict import TensorDict
+        >>>
+        >>> torch.manual_seed(0)
+        >>>
+        >>> rb = TensorDictPrioritizedReplayBuffer(alpha=0.7, beta=1.1, storage=LazyTensorStorage(10), batch_size=5)
+        >>> data = TensorDict({"a": torch.ones(10, 3), ("b", "c"): torch.zeros(10, 3, 1)}, [10])
+        >>> rb.extend(data)
+        >>> print("len of rb", len(rb))
+        len of rb 10
+        >>> sample = rb.sample(5)
+        >>> print(sample)
+        TensorDict(
+            fields={
+                _weight: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.float32, is_shared=False),
+                a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                b: TensorDict(
+                    fields={
+                        c: Tensor(shape=torch.Size([5, 3, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([5]),
+                    device=cpu,
+                    is_shared=False),
+                index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int64, is_shared=False)},
+            batch_size=torch.Size([5]),
+            device=cpu,
+            is_shared=False)
+        >>> print("index", sample["index"])
+        index tensor([9, 5, 2, 2, 7])
+        >>> # give a high priority to these samples...
+        >>> sample.set("td_error", 100*torch.ones(sample.shape))
+        >>> # and update priority
+        >>> rb.update_tensordict_priority(sample)
+        >>> # the new sample should have a high overlap with the previous one
+        >>> sample = rb.sample(5)
+        >>> print(sample)
+        TensorDict(
+            fields={
+                _weight: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.float32, is_shared=False),
+                a: Tensor(shape=torch.Size([5, 3]), device=cpu, dtype=torch.float32, is_shared=False),
+                b: TensorDict(
+                    fields={
+                        c: Tensor(shape=torch.Size([5, 3, 1]), device=cpu, dtype=torch.float32, is_shared=False)},
+                    batch_size=torch.Size([5]),
+                    device=cpu,
+                    is_shared=False),
+                index: Tensor(shape=torch.Size([5]), device=cpu, dtype=torch.int64, is_shared=False)},
+            batch_size=torch.Size([5]),
+            device=cpu,
+            is_shared=False)
+        >>> print("index", sample["index"])
+        index tensor([2, 5, 5, 9, 7])
+
     """
 
     def __init__(
         self,
+        *,
         alpha: float,
         beta: float,
         priority_key: str = "td_error",
@@ -612,10 +924,12 @@ def __init__(self, *args, **kwargs):
     def sample(
         self,
         batch_size: Optional[int] = None,
-        include_info: bool = False,
+        include_info: bool = None,
         return_info: bool = False,
     ) -> TensorDictBase:
-        return super().sample(batch_size, include_info, return_info)
+        return super().sample(
+            batch_size=batch_size, include_info=include_info, return_info=return_info
+        )
 
     def add(self, data: TensorDictBase) -> int:
         return super().add(data)
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
index 9fd0fab8af4..564b1197c2c 100644
--- a/torchrl/data/replay_buffers/samplers.py
+++ b/torchrl/data/replay_buffers/samplers.py
@@ -137,6 +137,10 @@ def sample(self, storage: Storage, batch_size: int) -> Tuple[Any, dict]:
     def ran_out(self):
         return self._ran_out
 
+    @ran_out.setter
+    def ran_out(self, value):
+        self._ran_out = value
+
 
 class PrioritizedSampler(Sampler):
     """Prioritized sampler for replay buffer.

From d23af8b1760a0879af6592922db350dc23771d7d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 11:06:51 +0100
Subject: [PATCH 37/89] tests

---
 test/test_rb.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/test/test_rb.py b/test/test_rb.py
index 6c829ca5668..8d75d287236 100644
--- a/test/test_rb.py
+++ b/test/test_rb.py
@@ -580,6 +580,54 @@ def test_index(self, rbtype, storage, size, prefetch):
         assert b
 
 
+def test_multi_loops():
+    """Tests that one can iterate multiple times over a buffer without rep."""
+    rb = ReplayBuffer(
+        batch_size=5, storage=ListStorage(10), sampler=SamplerWithoutReplacement()
+    )
+    rb.extend(torch.zeros(10))
+    for i, d in enumerate(rb):  # noqa: B007
+        assert (d == 0).all()
+    assert i == 1
+    for i, d in enumerate(rb):  # noqa: B007
+        assert (d == 0).all()
+    assert i == 1
+
+
+def test_batch_errors():
+    """Tests error messages related to batch-size"""
+    rb = ReplayBuffer(
+        storage=ListStorage(10), sampler=SamplerWithoutReplacement(drop_last=False)
+    )
+    rb.extend(torch.zeros(10))
+    rb.sample(3)  # that works
+    with pytest.raises(
+        RuntimeError,
+        match="Cannot iterate over the replay buffer. Batch_size was not specified",
+    ):
+        for _ in rb:
+            pass
+    with pytest.raises(RuntimeError, match="batch_size not specified"):
+        rb.sample()
+    with pytest.raises(ValueError, match="Samplers with drop_last=True"):
+        ReplayBuffer(
+            storage=ListStorage(10), sampler=SamplerWithoutReplacement(drop_last=True)
+        )
+    # that works
+    ReplayBuffer(
+        storage=ListStorage(10),
+    )
+    rb = ReplayBuffer(
+        storage=ListStorage(10),
+        sampler=SamplerWithoutReplacement(drop_last=False),
+        batch_size=3,
+    )
+    rb.extend(torch.zeros(10))
+    for _ in rb:
+        pass
+    rb.sample()
+
+
 @pytest.mark.parametrize("priority_key", ["pk", "td_error"])
 @pytest.mark.parametrize("contiguous", [True, False])
 @pytest.mark.parametrize("device", get_available_devices())

From 68c34425b1c9fb8658c440b91c82a4245b49a5a3 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 12:04:00 +0100
Subject: [PATCH 38/89] empty commit


From dec5c56ba3a7b6742fd233e8f8d1ac16655ca54c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 13:16:20 +0100
Subject: [PATCH 39/89] tests

---
 test/test_trainer.py    | 18 +++++++------
 test/test_transforms.py | 56 ++++++++++++++++++++---------------------
 2 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/test/test_trainer.py b/test/test_trainer.py
index 1251d4edd48..533fd4f0b0d 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -203,7 +203,9 @@ def test_rb_trainer(self, prioritized):
         S = 100
         storage = ListStorage(S)
         if prioritized:
-            replay_buffer = TensorDictPrioritizedReplayBuffer(1.1, 0.9, storage=storage)
+            replay_buffer = TensorDictPrioritizedReplayBuffer(
+                alpha=1.1, beta=0.9, storage=storage
+            )
         else:
             replay_buffer = TensorDictReplayBuffer(storage=storage)
 
@@ -260,8 +262,8 @@ def test_rb_trainer_state_dict(self, prioritized, storage_type):
 
         if prioritized:
             replay_buffer = TensorDictPrioritizedReplayBuffer(
-                1.1,
-                0.9,
+                alpha=1.1,
+                beta=0.9,
                 storage=storage,
             )
         else:
@@ -293,7 +295,7 @@ def test_rb_trainer_state_dict(self, prioritized, storage_type):
         trainer2 = mocking_trainer()
         if prioritized:
             replay_buffer2 = TensorDictPrioritizedReplayBuffer(
-                1.1, 0.9, storage=storage
+                alpha=1.1, beta=0.9, storage=storage
             )
         else:
             replay_buffer2 = TensorDictReplayBuffer(storage=storage)
@@ -398,8 +400,8 @@ def make_storage():
             storage = make_storage()
             if prioritized:
                 replay_buffer = TensorDictPrioritizedReplayBuffer(
-                    1.1,
-                    0.9,
+                    alpha=1.1,
+                    beta=0.9,
                     storage=storage,
                 )
             else:
@@ -430,8 +432,8 @@ def make_storage():
             storage2 = make_storage()
             if prioritized:
                 replay_buffer2 = TensorDictPrioritizedReplayBuffer(
-                    1.1,
-                    0.9,
+                    alpha=1.1,
+                    beta=0.9,
                     storage=storage2,
                 )
             else:
diff --git a/test/test_transforms.py b/test/test_transforms.py
index 0b7d9391e6a..b28cb9a758a 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -281,7 +281,7 @@ def test_transform_rb(self):
         batch = [20]
         torch.manual_seed(0)
         br = BinarizeReward()
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(br)
         reward = torch.randn(*batch, 1, device=device)
         misc = torch.randn(*batch, 1, device=device)
@@ -419,7 +419,7 @@ def test_transform_rb(self):
         key_tensors = [key1_tensor, key2_tensor]
         td = TensorDict(dict(zip(keys, key_tensors)), batch_size, device=device)
         cat_frames = CatFrames(N=N, in_keys=keys, dim=dim)
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(cat_frames)
         rb.extend(td)
         with pytest.raises(
@@ -651,7 +651,7 @@ def test_transform_rb(self, model, device):
             out_keys=out_keys,
             tensor_pixels_keys=tensor_pixels_key,
         )
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(r3m)
         td = TensorDict({"pixels": torch.randint(255, (10, 244, 244, 3))}, [10])
         rb.extend(td)
@@ -1027,7 +1027,7 @@ def test_transform_env(self):
 
     def test_transform_rb(self):
         transform = StepCounter(10)
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         td = TensorDict({"a": torch.randn(10)}, [10])
         rb.extend(td)
         rb.append_transform(transform)
@@ -1345,7 +1345,7 @@ def test_transform_rb(self):
             dim=-1,
             del_keys=True,
         )
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(ct)
         td = (
             TensorDict(
@@ -1525,7 +1525,7 @@ def test_transform_rb(
             batch,
         )
         td.set("dont touch", dont_touch.clone())
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(cc)
         rb.extend(td)
         td = rb.sample(10)
@@ -1668,7 +1668,7 @@ def test_transform_model(self, include_forward):
 
     @pytest.mark.parametrize("include_forward", [True, False])
     def test_transform_rb(self, include_forward):
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         t = DiscreteActionProjection(7, 10, include_forward=include_forward)
         rb.append_transform(t)
         td = TensorDict(
@@ -1863,7 +1863,7 @@ def test_transform_model(self, dtype_fixture):  # noqa: F811
     def test_transform_rb(
         self,
     ):
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         t = DoubleToFloat(in_keys=["observation"], in_keys_inv=["action"])
         rb.append_transform(t)
         td = TensorDict(
@@ -2029,7 +2029,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = ExcludeTransform("a")
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict(
             {
@@ -2193,7 +2193,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = SelectTransform("b", "c")
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict(
             {
@@ -2377,7 +2377,7 @@ def test_transform_model(self, out_keys):
     def test_transform_rb(self, out_keys):
         t = FlattenObservation(-3, -1, out_keys=out_keys)
         td = TensorDict({"pixels": torch.randint(255, (10, 10, 3))}, []).expand(10)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         rb.extend(td)
         td = rb.sample(2)
@@ -2480,7 +2480,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = FrameSkipTransform(2)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         tensordict = TensorDict({"a": torch.zeros(10)}, [10])
         rb.extend(tensordict)
@@ -2678,7 +2678,7 @@ def test_transform_model(self, out_keys):
     @pytest.mark.parametrize("out_keys", [None, ["stuff"]])
     def test_transform_rb(self, out_keys):
         td = TensorDict({"pixels": torch.rand(3, 12, 12)}, []).expand(3)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(GrayScale(out_keys=out_keys))
         rb.extend(td)
         r = rb.sample(3)
@@ -2751,7 +2751,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = NoopResetEnv()
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict({}, [10])
         rb.extend(td)
@@ -3025,7 +3025,7 @@ def test_transform_rb(self):
                 standard_normal=standard_normal,
             )
         )
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
 
         obs = torch.randn(7)
@@ -3449,7 +3449,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = Resize(20, 21, in_keys=["pixels"])
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict({"pixels": torch.randn(3, 32, 32)}, []).expand(10)
         rb.extend(td)
@@ -3527,7 +3527,7 @@ def test_transform_model(self):
 
     def test_transform_rb(self):
         t = RewardClipping(-0.1, 0.1)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         td = TensorDict({"reward": torch.randn(10)}, []).expand(10)
         rb.append_transform(t)
         rb.extend(td)
@@ -3677,7 +3677,7 @@ def test_transform_rb(self, standard_normal):
         loc = 0.5
         scale = 1.5
         t = RewardScaling(0.5, 1.5, standard_normal=standard_normal)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         reward = torch.randn(10)
         td = TensorDict({"reward": reward}, []).expand(10)
         rb.append_transform(t)
@@ -3768,7 +3768,7 @@ def test_transform_rb(
         self,
     ):
         t = RewardSum()
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         reward = torch.randn(10)
         td = TensorDict({("next", "reward"): reward}, []).expand(10)
         rb.append_transform(t)
@@ -4102,7 +4102,7 @@ def test_transform_rb(self, out_keys, unsqueeze_dim):
             out_keys=out_keys,
             allow_positive_dim=True,
         )
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict(
             {"observation": TensorDict({"stuff": torch.randn(3, 4)}, [3, 4])}, []
@@ -4349,7 +4349,7 @@ def test_transform_rb(self, out_keys):
             out_keys=out_keys,
             allow_positive_dim=True,
         )
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict(
             {"observation": TensorDict({"stuff": torch.randn(3, 1, 4)}, [3, 1, 4])}, []
@@ -4544,7 +4544,7 @@ def test_transform_model(self, out_keys):
     @pytest.mark.parametrize("out_keys", [None, ["stuff"]])
     def test_transform_rb(self, out_keys):
         t = ToTensorImage(in_keys=["pixels"], out_keys=out_keys)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict({"pixels": torch.randint(255, (21, 22, 3))}, [])
         rb.extend(td.expand(10))
@@ -4587,7 +4587,7 @@ def test_transform_model(self):
     def test_transform_rb(self):
         batch_size = (2,)
         t = TensorDictPrimer(mykey=UnboundedContinuousTensorSpec([*batch_size, 3]))
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict({"a": torch.zeros(())}, [])
         rb.extend(td.expand(10))
@@ -4882,7 +4882,7 @@ def test_transform_rb(self):
             in_keys=["observation"],
             T=3,
         )
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(t)
         rb.extend(td)
         with pytest.raises(
@@ -5010,7 +5010,7 @@ def test_transform_rb(self):
         action_dim = 5
         batch_size = (2,)
         t = gSDENoise(state_dim=state_dim, action_dim=action_dim, shape=batch_size)
-        rb = ReplayBuffer(LazyTensorStorage(10))
+        rb = ReplayBuffer(storage=LazyTensorStorage(10))
         rb.append_transform(t)
         td = TensorDict({"a": torch.zeros(())}, [])
         rb.extend(td.expand(10))
@@ -5158,7 +5158,7 @@ def test_transform_rb(self, model, device):
             out_keys=out_keys,
             tensor_pixels_keys=tensor_pixels_key,
         )
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(vip)
         td = TensorDict({"pixels": torch.randint(255, (10, 244, 244, 3))}, [10])
         rb.extend(td)
@@ -6583,7 +6583,7 @@ def test_transform_rb(self, create_copy, inverse):
         else:
             t = RenameTransform(["a"], ["b"], ["a"], ["b"], create_copy=create_copy)
             tensordict = TensorDict({"b": torch.randn(())}, []).expand(10)
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(t)
         rb.extend(tensordict)
         assert "a" in rb._storage._storage.keys()
@@ -6679,7 +6679,7 @@ def test_transform_model(self):
     def test_transform_rb(self):
         batch = [1]
         device = "cpu"
-        rb = ReplayBuffer(LazyTensorStorage(20))
+        rb = ReplayBuffer(storage=LazyTensorStorage(20))
         rb.append_transform(InitTracker())
         reward = torch.randn(*batch, 1, device=device)
         misc = torch.randn(*batch, 1, device=device)

From 76120983cde50ca84e3d52071c81f184e83ed518 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 13:18:29 +0100
Subject: [PATCH 40/89] tests

---
 test/test_rb_distributed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_rb_distributed.py b/test/test_rb_distributed.py
index 252913500f3..7443601c76d 100644
--- a/test/test_rb_distributed.py
+++ b/test/test_rb_distributed.py
@@ -53,7 +53,7 @@ def sample_from_buffer_remotely_returns_correct_tensordict_test(rank, name, worl
         _, inserted = _add_random_tensor_dict_to_buffer(buffer)
         sampled = _sample_from_buffer(buffer, 1)
         assert type(sampled) is type(inserted) is TensorDict
-        assert (sampled == inserted)["a"].item()
+        assert (sampled["a"] == inserted["a"]).all()
 
 
 @pytest.mark.skipif(

From 69eb921397db5dcac91c2f99b696ca59ed0ff8a7 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 13:25:32 +0100
Subject: [PATCH 41/89] amend

---
 tutorials/sphinx-tutorials/coding_ddpg.py |  2 ++
 tutorials/sphinx-tutorials/coding_dqn.py  | 30 +++++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 0d1c353d472..53fd830f3be 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -394,6 +394,8 @@ def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
 
 class DDPGLoss(LossModule):
     default_value_estimator = default_value_estimator
+    make_value_estimator = make_value_estimator
+
     __init__ = _init
     forward = _forward
     loss_value = _loss_value
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 9313e6e8b08..a0943107486 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -84,6 +84,8 @@
 # of this algorithm.
 
 # sphinx_gallery_start_ignore
+import os
+import uuid
 import warnings
 
 from torchrl.objectives import DQNLoss, SoftUpdate
@@ -550,7 +552,8 @@ def get_loss_module(actor, gamma):
 optimizer = torch.optim.Adam(
     loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
 )
-logger = CSVLogger(exp_name="dqn_exp", log_dir="./")
+exp_name = f"dqn_exp_{uuid.uuid1()}"
+logger = CSVLogger(exp_name=exp_name, log_dir="./")
 
 trainer = Trainer(
     collector=collector,
@@ -613,7 +616,30 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # We can now quickly check the CSVs with the results.
 
-# TODO
+def print_csv_files_in_folder(folder_path):
+    """
+    Find all CSV files in a folder and print the first 10 lines of each file.
+
+    Args:
+        folder_path (str): The relative path to the folder.
+
+    Returns:
+        list: A list of all CSV files in the folder.
+    """
+    csv_files = []
+    for file in os.listdir(folder_path):
+        if file.endswith(".csv"):
+            csv_files.append(os.path.join(folder_path, file))
+    for csv_file in csv_files:
+        print(f"File: {csv_file}")
+        with open(csv_file, "r") as f:
+            for i, line in enumerate(f):
+                if i == 10:
+                    break
+                print(line.strip())
+        print("\n")
+
+print_csv_files_in_folder(exp_name)
 
 ###############################################################################
 # Conclusion and possible improvements

From 5d32e102d2461e772f07b34e5db7546fc5ca6cdb Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 13:53:38 +0100
Subject: [PATCH 42/89] amend

---
 torchrl/data/replay_buffers/replay_buffers.py | 2 ++
 tutorials/sphinx-tutorials/coding_dqn.py      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
index 0a20dc6dff7..a8044c063ec 100644
--- a/torchrl/data/replay_buffers/replay_buffers.py
+++ b/torchrl/data/replay_buffers/replay_buffers.py
@@ -482,6 +482,8 @@ def extend(self, tensordicts: Union[List, TensorDictBase]) -> torch.Tensor:
         return index
 
     def update_tensordict_priority(self, data: TensorDictBase) -> None:
+        if not isinstance(self._sampler, PrioritizedSampler):
+            return
         priority = torch.tensor(
             [self._get_priority(td) for td in data],
             dtype=torch.float,
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index a0943107486..21fda673cd2 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -616,6 +616,7 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # We can now quickly check the CSVs with the results.
 
+
 def print_csv_files_in_folder(folder_path):
     """
     Find all CSV files in a folder and print the first 10 lines of each file.
@@ -639,6 +640,7 @@ def print_csv_files_in_folder(folder_path):
                 print(line.strip())
         print("\n")
 
+
 print_csv_files_in_folder(exp_name)
 
 ###############################################################################

From c471b96a31d2ade23d50566c82674f5c4409e3eb Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:03:05 +0100
Subject: [PATCH 43/89] fix examples

---
 torchrl/trainers/helpers/replay_buffer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchrl/trainers/helpers/replay_buffer.py b/torchrl/trainers/helpers/replay_buffer.py
index 4f9c48bf4b9..229a22cbe8e 100644
--- a/torchrl/trainers/helpers/replay_buffer.py
+++ b/torchrl/trainers/helpers/replay_buffer.py
@@ -35,6 +35,7 @@ def make_replay_buffer(
         sampler=sampler,
         pin_memory=device != torch.device("cpu"),
         prefetch=cfg.buffer_prefetch,
+        batch_size=cfg.batch_size,
     )
     return buffer
 

From d9ab47770cfaeeb9097a01dea9baa7bf9b7cbeb4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:12:27 +0100
Subject: [PATCH 44/89] fix dqn updater

---
 tutorials/sphinx-tutorials/coding_dqn.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 21fda673cd2..15d98ad092a 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -546,6 +546,8 @@ def get_loss_module(actor, gamma):
 # Get model
 actor, actor_explore = make_model(test_env)
 loss_module, target_net_updater = get_loss_module(actor, gamma)
+target_net_updater.init_()
+
 collector = get_collector(
     stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
 )

From fb7d5de0dc2122de9fab64346c99c98cdd3e50a6 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:18:05 +0100
Subject: [PATCH 45/89] fix doc

---
 docs/source/reference/data.rst       | 4 +---
 docs/source/reference/envs.rst       | 1 -
 docs/source/reference/objectives.rst | 2 +-
 torchrl/data/datasets/__init__.py    | 1 +
 torchrl/data/postprocs/postprocs.py  | 6 +++---
 torchrl/objectives/common.py         | 2 +-
 torchrl/objectives/utils.py          | 4 ++--
 7 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
index 7a0143e746e..079e5877654 100644
--- a/docs/source/reference/data.rst
+++ b/docs/source/reference/data.rst
@@ -180,8 +180,6 @@ Here's an example:
 
 .. currentmodule:: torchrl.data.datasets
 
-.. currentmodule:: torchrl.data.datasets
-
 .. autosummary::
     :toctree: generated/
     :template: rl_template.rst
@@ -220,7 +218,7 @@ Check the :obj:`torchrl.envs.utils.check_env_specs` method for a sanity check.
 Utils
 -----
 
-.. currentmodule:: torchrl.data.datasets
+.. currentmodule:: torchrl.data
 
 .. autosummary::
     :toctree: generated/
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
index 8b661bfa391..430dea36996 100644
--- a/docs/source/reference/envs.rst
+++ b/docs/source/reference/envs.rst
@@ -114,7 +114,6 @@ provides more information on how to design a custom environment from scratch.
     EnvBase
     GymLikeEnv
     EnvMetaData
-    Specs
 
 Vectorized envs
 ---------------
diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
index 1eb9d17bb16..384117de4c9 100644
--- a/docs/source/reference/objectives.rst
+++ b/docs/source/reference/objectives.rst
@@ -218,5 +218,5 @@ Utils
     next_state_value
     SoftUpdate
     HardUpdate
-    ValueFunctions
+    ValueEstimators
     default_value_kwargs
diff --git a/torchrl/data/datasets/__init__.py b/torchrl/data/datasets/__init__.py
index 6fcc35a0d46..81a668648d0 100644
--- a/torchrl/data/datasets/__init__.py
+++ b/torchrl/data/datasets/__init__.py
@@ -1 +1,2 @@
 from .d4rl import D4RLExperienceReplay
+from .openml import OpenMLExperienceReplay
diff --git a/torchrl/data/postprocs/postprocs.py b/torchrl/data/postprocs/postprocs.py
index dba8cafde4a..21f51115d6c 100644
--- a/torchrl/data/postprocs/postprocs.py
+++ b/torchrl/data/postprocs/postprocs.py
@@ -82,9 +82,9 @@ def _get_reward(
 class MultiStep(nn.Module):
     """Multistep reward transform.
 
-    Presented in 'Sutton, R. S. 1988. Learning to
-    predict by the methods of temporal differences. Machine learning 3(
-    1):9–44.'
+    Presented in
+
+    | Sutton, R. S. 1988. Learning to predict by the methods of temporal differences. Machine learning 3(1):9–44.
 
     This module maps the "next" observation to the t + n "next" observation.
     It is an identity transform whenever :attr:`n_steps` is 0.
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 770d3f3e406..9c37b1cbdca 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -392,7 +392,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
         this method.
 
         Args:
-            value_type (ValueEstimators): A :class:`torchrl.objectives.utils.ValueFunctions`
+            value_type (ValueEstimators): A :class:`torchrl.objectives.utils.ValueEstimators`
                 enum type indicating the value function to use.
             **hyperparams: hyperparameters to use for the value function.
                 If not provided, the value indicated by
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 3daf5e70876..9d393a51d05 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -18,7 +18,7 @@
 _GAMMA_LMBDA_DEPREC_WARNING = (
     "Passing gamma / lambda parameters through the loss constructor "
     "is deprecated and will be removed soon. To customize your value function, "
-    "run `loss_module.make_value_estimator(ValueFunctions.<value_fun>, gamma=val)`."
+    "run `loss_module.make_value_estimator(ValueEstimators.<value_fun>, gamma=val)`."
 )
 
 
@@ -45,7 +45,7 @@ def default_value_kwargs(value_type: ValueEstimators):
 
     Args:
         value_type (Enum.value): the value function type, from the
-        :class:`torchrl.objectives.utils.ValueFunctions` class.
+        :class:`torchrl.objectives.utils.ValueEstimators` class.
 
     Examples:
         >>> kwargs = default_value_kwargs(ValueEstimators.TDLambda)

From 180b5b2becceef318c20291c9c198f3e49b5cf1e Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:21:46 +0100
Subject: [PATCH 46/89] print td shape

---
 torchrl/data/datasets/openml.py           | 9 +++++++--
 tutorials/sphinx-tutorials/coding_ddpg.py | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/torchrl/data/datasets/openml.py b/torchrl/data/datasets/openml.py
index 78b90793682..76ccb66f601 100644
--- a/torchrl/data/datasets/openml.py
+++ b/torchrl/data/datasets/openml.py
@@ -8,8 +8,13 @@
 import numpy as np
 from tensordict.tensordict import TensorDict
 
-from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
-from torchrl.data.replay_buffers import Sampler, SamplerWithoutReplacement, Writer
+from torchrl.data.replay_buffers import (
+    LazyMemmapStorage,
+    Sampler,
+    SamplerWithoutReplacement,
+    TensorDictReplayBuffer,
+    Writer,
+)
 
 
 class OpenMLExperienceReplay(TensorDictReplayBuffer):
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 53fd830f3be..3b4176a817c 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -1063,6 +1063,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
     # extend the replay buffer with the new data
     current_frames = tensordict.numel()
     collected_frames += current_frames
+    print("Tensordict shape: ", tensordict.shape)
     replay_buffer.extend(tensordict.cpu())
 
     # optimization steps

From 921b91b954493be83d6b7f1c65632ca1035ec0dc Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:33:01 +0100
Subject: [PATCH 47/89] fix recorder

---
 test/test_trainer.py                      |  8 ++++----
 torchrl/trainers/trainers.py              | 17 +++++++++--------
 tutorials/sphinx-tutorials/coding_ddpg.py |  1 -
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/test/test_trainer.py b/test/test_trainer.py
index 8544efb6b6f..c83460eb753 100644
--- a/test/test_trainer.py
+++ b/test/test_trainer.py
@@ -859,7 +859,7 @@ def test_recorder(self, N=8):
         with tempfile.TemporaryDirectory() as folder:
             logger = TensorboardLogger(exp_name=folder)
 
-            recorder = transformed_env_constructor(
+            environment = transformed_env_constructor(
                 args,
                 video_tag="tmp",
                 norm_obs_only=True,
@@ -871,7 +871,7 @@ def test_recorder(self, N=8):
                 record_frames=args.record_frames,
                 frame_skip=args.frame_skip,
                 policy_exploration=None,
-                recorder=recorder,
+                environment=environment,
                 record_interval=args.record_interval,
             )
             trainer = mocking_trainer()
@@ -933,7 +933,7 @@ def _make_recorder_and_trainer(tmpdirname):
                 raise NotImplementedError
             trainer = mocking_trainer(file)
 
-            recorder = transformed_env_constructor(
+            environment = transformed_env_constructor(
                 args,
                 video_tag="tmp",
                 norm_obs_only=True,
@@ -945,7 +945,7 @@ def _make_recorder_and_trainer(tmpdirname):
                 record_frames=args.record_frames,
                 frame_skip=args.frame_skip,
                 policy_exploration=None,
-                recorder=recorder,
+                environment=environment,
                 record_interval=args.record_interval,
             )
             recorder.register(trainer)
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 1608f853ad4..52d58542442 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -1163,10 +1163,11 @@ def __init__(
     ) -> None:
         if environment is None and recorder is not None:
             warnings.warn(self.ENV_DEPREC)
+            environment = recorder
         elif environment is not None and recorder is not None:
             raise ValueError("environment and recorder conflict.")
         self.policy_exploration = policy_exploration
-        self.recorder = recorder
+        self.environment = environment
         self.record_frames = record_frames
         self.frame_skip = frame_skip
         self._count = 0
@@ -1189,8 +1190,8 @@ def __call__(self, batch: TensorDictBase) -> Dict:
             with set_exploration_mode(self.exploration_mode):
                 if isinstance(self.policy_exploration, torch.nn.Module):
                     self.policy_exploration.eval()
-                self.recorder.eval()
-                td_record = self.recorder.rollout(
+                self.environment.eval()
+                td_record = self.environment.rollout(
                     policy=self.policy_exploration,
                     max_steps=self.record_frames,
                     auto_reset=True,
@@ -1199,8 +1200,8 @@ def __call__(self, batch: TensorDictBase) -> Dict:
                 ).clone()
                 if isinstance(self.policy_exploration, torch.nn.Module):
                     self.policy_exploration.train()
-                self.recorder.train()
-                self.recorder.transform.dump(suffix=self.suffix)
+                self.environment.train()
+                self.environment.transform.dump(suffix=self.suffix)
 
                 out = {}
                 for key in self.log_keys:
@@ -1214,18 +1215,18 @@ def __call__(self, batch: TensorDictBase) -> Dict:
                     out[self.out_keys[key]] = value
                 out["log_pbar"] = self.log_pbar
         self._count += 1
-        self.recorder.close()
+        self.environment.close()
         return out
 
     def state_dict(self) -> Dict:
         return {
             "_count": self._count,
-            "recorder_state_dict": self.recorder.state_dict(),
+            "recorder_state_dict": self.environment.state_dict(),
         }
 
     def load_state_dict(self, state_dict: Dict) -> None:
         self._count = state_dict["_count"]
-        self.recorder.load_state_dict(state_dict["recorder_state_dict"])
+        self.environment.load_state_dict(state_dict["recorder_state_dict"])
 
     def register(self, trainer: Trainer, name: str = "recorder"):
         trainer.register_module(name, self)
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 3b4176a817c..53fd830f3be 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -1063,7 +1063,6 @@ def make_replay_buffer(buffer_size, prefetch=3):
     # extend the replay buffer with the new data
     current_frames = tensordict.numel()
     collected_frames += current_frames
-    print("Tensordict shape: ", tensordict.shape)
     replay_buffer.extend(tensordict.cpu())
 
     # optimization steps

From 8984654d4bbaf1d833955af32116dace1e6359e4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:39:33 +0100
Subject: [PATCH 48/89] fix examples

---
 examples/discrete_sac/discrete_sac.py |  6 +++---
 examples/iql/iql_online.py            | 13 +++++++++----
 examples/td3/td3.py                   | 14 ++++++++++----
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/examples/discrete_sac/discrete_sac.py b/examples/discrete_sac/discrete_sac.py
index 6fc101ff533..987571747f6 100644
--- a/examples/discrete_sac/discrete_sac.py
+++ b/examples/discrete_sac/discrete_sac.py
@@ -44,7 +44,7 @@ def make_replay_buffer(
     batch_size=256,
     buffer_scratch_dir="/tmp/",
     device="cpu",
-    make_replay_buffer=3,
+    prefetch=3,
 ):
     if prb:
         replay_buffer = TensorDictPrioritizedReplayBuffer(
@@ -52,7 +52,7 @@ def make_replay_buffer(
             beta=0.5,
             pin_memory=False,
             batch_size=batch_size,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
@@ -63,7 +63,7 @@ def make_replay_buffer(
         replay_buffer = TensorDictReplayBuffer(
             pin_memory=False,
             batch_size=batch_size,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
diff --git a/examples/iql/iql_online.py b/examples/iql/iql_online.py
index 4dcc5bea747..1512f471f10 100644
--- a/examples/iql/iql_online.py
+++ b/examples/iql/iql_online.py
@@ -36,33 +36,36 @@ def env_maker(env_name, frame_skip=1, device="cpu", from_pixels=False):
 
 
 def make_replay_buffer(
+    batch_size,
     prb=False,
     buffer_size=1000000,
     buffer_scratch_dir="/tmp/",
     device="cpu",
-    make_replay_buffer=3,
+    prefetch=3,
 ):
     if prb:
         replay_buffer = TensorDictPrioritizedReplayBuffer(
             alpha=0.7,
             beta=0.5,
             pin_memory=False,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
                 device=device,
             ),
+            batch_size=batch_size,
         )
     else:
         replay_buffer = TensorDictReplayBuffer(
             pin_memory=False,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
                 device=device,
             ),
+            batch_size=batch_size,
         )
     return replay_buffer
 
@@ -218,7 +221,9 @@ def env_factory(num_workers):
     collector.set_seed(cfg.seed)
 
     # Make Replay Buffer
-    replay_buffer = make_replay_buffer(buffer_size=cfg.buffer_size, device=device)
+    replay_buffer = make_replay_buffer(
+        buffer_size=cfg.buffer_size, device=device, batch_size=cfg.batch_size
+    )
 
     # Optimizers
     params = list(loss_module.parameters())
diff --git a/examples/td3/td3.py b/examples/td3/td3.py
index 659da599240..a285c29acef 100644
--- a/examples/td3/td3.py
+++ b/examples/td3/td3.py
@@ -60,33 +60,36 @@ def apply_env_transforms(env, reward_scaling=1.0):
 
 
 def make_replay_buffer(
+    batch_size,
     prb=False,
     buffer_size=1000000,
     buffer_scratch_dir="/tmp/",
     device="cpu",
-    make_replay_buffer=3,
+    prefetch=3,
 ):
     if prb:
         replay_buffer = TensorDictPrioritizedReplayBuffer(
             alpha=0.7,
             beta=0.5,
             pin_memory=False,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
                 device=device,
             ),
+            batch_size=batch_size,
         )
     else:
         replay_buffer = TensorDictReplayBuffer(
             pin_memory=False,
-            prefetch=make_replay_buffer,
+            prefetch=prefetch,
             storage=LazyMemmapStorage(
                 buffer_size,
                 scratch_dir=buffer_scratch_dir,
                 device=device,
             ),
+            batch_size=batch_size,
         )
     return replay_buffer
 
@@ -239,7 +242,10 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
     # Make Replay Buffer
     replay_buffer = make_replay_buffer(
-        prb=cfg.prb, buffer_size=cfg.buffer_size, device=device
+        batch_size=cfg.batch_size,
+        prb=cfg.prb,
+        buffer_size=cfg.buffer_size,
+        device=device,
     )
 
     # Optimizers

From a3f76d18f5d776e86826fcacd5b3fbeba99cda89 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:46:14 +0100
Subject: [PATCH 49/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 53fd830f3be..b1b74764c44 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -68,7 +68,7 @@
 from tensordict.nn import TensorDictModule
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import nn, optim
-from torchrl.collectors import MultiaSyncDataCollector
+from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
 from torchrl.data import CompositeSpec, TensorDictReplayBuffer
 from torchrl.data.postprocs import MultiStep
 from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
@@ -987,8 +987,10 @@ def make_replay_buffer(buffer_size, prefetch=3):
 else:
     multistep = None
 
-collector = MultiaSyncDataCollector(
-    create_env_fn=[create_env_fn, create_env_fn],
+warnings.warn("Change collector!!")
+
+collector = SyncDataCollector(
+    create_env_fn=create_env_fn,
     policy=actor_model_explore,
     total_frames=total_frames,
     max_frames_per_traj=max_frames_per_traj,
@@ -998,7 +1000,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
     postproc=multistep,
     split_trajs=True,
     device=device,  # device for execution
-    storing_devices=[device, device],  # device where data will be stored and passed
+    storing_device=device,  # device where data will be stored and passed
     update_at_each_batch=False,
     exploration_mode="random",
 )

From a10900acc6c94bded874b330f081798735761e40 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:46:55 +0100
Subject: [PATCH 50/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index b1b74764c44..288bc8a508f 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -818,7 +818,8 @@ def make_replay_buffer(buffer_size, prefetch=3):
 )
 
 # Number of environments in each data collector
-env_per_collector = 2
+warnings.warn("More envs!")
+env_per_collector = 1
 
 # Total frames we will use during training. Scale up to 500K - 1M for a more
 # meaningful training

From 2e65eef20f5d8483afb689e359d8d473f449a772 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:49:31 +0100
Subject: [PATCH 51/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 2062 ++++++++++-----------
 1 file changed, 1031 insertions(+), 1031 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 288bc8a508f..81542a4c1d0 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -60,1089 +60,1089 @@
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
 
-
-import numpy as np
-import torch.cuda
-import tqdm
-from matplotlib import pyplot as plt
-from tensordict.nn import TensorDictModule
-from tensordict.tensordict import TensorDict, TensorDictBase
-from torch import nn, optim
-from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
-from torchrl.data import CompositeSpec, TensorDictReplayBuffer
-from torchrl.data.postprocs import MultiStep
-from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
-from torchrl.data.replay_buffers.storages import LazyMemmapStorage
-from torchrl.envs import (
-    CatTensors,
-    DoubleToFloat,
-    EnvCreator,
-    ObservationNorm,
-    ParallelEnv,
-)
-from torchrl.envs.libs.dm_control import DMControlEnv
-from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.transforms import RewardScaling, TransformedEnv
-from torchrl.envs.utils import set_exploration_mode
-from torchrl.modules import (
-    Actor,
-    ActorCriticWrapper,
-    MLP,
-    OrnsteinUhlenbeckProcessWrapper,
-    ValueOperator,
-)
-from torchrl.objectives.utils import distance_loss, SoftUpdate
-from torchrl.trainers import Recorder
-
-###############################################################################
-# torchrl :class:`torchrl.objectives.LossModule`
-# ----------------------------------------------
-#
-# TorchRL provides a series of losses to use in your training scripts.
-# The aim is to have losses that are easily reusable/swappable and that have
-# a simple signature.
-#
-# The main characteristics of TorchRL losses are:
-#
-# - they are stateful objects: they contain a copy of the trainable parameters
-#   such that ``loss_module.parameters()`` gives whatever is needed to train the
-#   algorithm.
-# - They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
-#   method will receive a tensordict as input that contains all the necessary
-#   information to return a loss value.
-#
-#       >>> data = replay_buffer.sample()
-#       >>> loss_dict = loss_module(data)
-#
-# - They output a :class:`tensordict.TensorDict` instance with the loss values
-#   written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
-#   loss. Additional keys in the tensordict may be useful metrics to log during
-#   training time.
-#   .. note::
-#     The reason we return independent losses is to let the user use a different
-#     optimizer for different sets of parameters for instance. Summing the losses
-#     can be simply done via
-#
-#       >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
-#
-# The ``__init__`` method
-# ~~~~~~~~~~~~~~~~~~~~~~~
-#
-# The parent class of all losses is :class:`torchrl.objectives.LossModule`.
-# As many other components of the library, its :meth:`torchrl.objectives.LossModule.forward` method expects
-# as input a :class:`tensordict.TensorDict` instance sampled from an experience
-# replay buffer, or any similar data structure. Using this format makes it
-# possible to re-use the module across
-# modalities, or in complex settings where the model needs to read multiple
-# entries for instance. In other words, it allows us to code a loss module that
-# is oblivious to the data type that is being given to is and that focuses on
-# running the elementary steps of the loss function and only those.
-#
-# To keep the tutorial as didactic as we can, we'll be displaying each method
-# of the class independently and we'll be populating the class at a later
-# stage.
-#
-# Let us start with the :meth:`torchrl.objectives.LossModule.__init__`
-# method. DDPG aims at solving a control task with a simple strategy:
-# training a policy to output actions that maximise the value predicted by
-# a value network. Hence, our loss module needs to receive two networks in its
-# constructor: an actor and a value networks. We expect both of these to be
-# tensordict-compatible objects, such as
-# :class:`tensordict.nn.TensorDictModule`.
-# Our loss function will need to compute a target value and fit the value
-# network to this, and generate an action and fit the policy such that its
-# value estimate is maximised.
-#
-# The crucial step of the :meth:`LossModule.__init__` method is the call to
-# :meth:`torchrl.LossModule.convert_to_functional`. This method will extract
-# the parameters from the module and convert it to a functional module.
-# Strictly speaking, this is not necessary and one may perfectly code all
-# the losses without it. However, we encourage its usage for the following
-# reason.
-#
-# The reason TorchRL does this is that RL algorithms often execute the same
-# model with different sets of parameters, called "trainable" and "target"
-# parameters.
-# The "trainable" parameters are those that the optimizer needs to fit. The
-# "target" parameters are usually a copy of the formers with some time lag
-# (absolute or diluted through a moving average).
-# These target parameters are used to compute the value associated with the
-# next observation. One the advantages of using a set of target parameters
-# for the value model that do not match exactly the current configuration is
-# that they provide a pessimistic bound on the value function being computed.
-# Pay attention to the ``create_target_params`` keyword argument below: this
-# argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
-# method to create a set of target parameters in the loss module to be used
-# for target value computation. If this is set to ``False`` (see the actor network
-# for instance) the ``target_actor_network_params`` attribute will still be
-# accessible but this will just return a **detached** version of the
-# actor parameters.
-#
-# Later, we will see how the target parameters should be updated in torchrl.
-#
-
-
-def _init(
-    self,
-    actor_network: TensorDictModule,
-    value_network: TensorDictModule,
-) -> None:
-    super(type(self), self).__init__()
-
-    self.convert_to_functional(
-        actor_network,
-        "actor_network",
-        create_target_params=False,
+if __name__ == "__main__":
+    import numpy as np
+    import torch.cuda
+    import tqdm
+    from matplotlib import pyplot as plt
+    from tensordict.nn import TensorDictModule
+    from tensordict.tensordict import TensorDict, TensorDictBase
+    from torch import nn, optim
+    from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
+    from torchrl.data import CompositeSpec, TensorDictReplayBuffer
+    from torchrl.data.postprocs import MultiStep
+    from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
+    from torchrl.data.replay_buffers.storages import LazyMemmapStorage
+    from torchrl.envs import (
+        CatTensors,
+        DoubleToFloat,
+        EnvCreator,
+        ObservationNorm,
+        ParallelEnv,
     )
-    self.convert_to_functional(
-        value_network,
-        "value_network",
-        create_target_params=True,
-        compare_against=list(actor_network.parameters()),
+    from torchrl.envs.libs.dm_control import DMControlEnv
+    from torchrl.envs.libs.gym import GymEnv
+    from torchrl.envs.transforms import RewardScaling, TransformedEnv
+    from torchrl.envs.utils import set_exploration_mode
+    from torchrl.modules import (
+        Actor,
+        ActorCriticWrapper,
+        MLP,
+        OrnsteinUhlenbeckProcessWrapper,
+        ValueOperator,
     )
-
-    self.actor_in_keys = actor_network.in_keys
-
-    # Since the value we'll be using is based on the actor and value network,
-    # we put them together in a single actor-critic container.
-    actor_critic = ActorCriticWrapper(actor_network, value_network)
-    self.actor_critic = actor_critic
-    self.loss_funtion = "l2"
-
-
-###############################################################################
-# The value estimator loss method
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# In many RL algorithm, the value network (or Q-value network) is trained based
-# on an empirical value estimate. This can be bootstrapped (TD(0), low
-# variance, high bias), meaning
-# that the target value is obtained using the next reward and nothing else, or
-# a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
-# sequence of upcoming rewards will be used (high variance, low bias). An
-# intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
-# bias and variance.
-# TorchRL makes it easy to use one or the other estimator via the
-# :class:`torchrl.objectives.utils.ValueEstimators` Enum class, which contains
-# pointers to all the value estimators implemented. Let us define the default
-# value function here. We will take the simplest version (TD(0)), and show later
-# on how this can be changed.
-
-from torchrl.objectives.utils import ValueEstimators
-
-default_value_estimator = ValueEstimators.TD0
-
-###############################################################################
-# We also need to give some instructions to DDPG on how to build the value
-# estimator, depending on the user query. Depending on the estimator provided,
-# we will build the corresponding module to be used at train time:
-
-from torchrl.objectives.utils import default_value_kwargs
-from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
-
-
-def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
-    hp = dict(default_value_kwargs(value_type))
-    if hasattr(self, "gamma"):
-        hp["gamma"] = self.gamma
-    hp.update(hyperparams)
-    value_key = "state_action_value"
-    if value_type == ValueEstimators.TD1:
-        self._value_estimator = TD1Estimator(
-            value_network=self.actor_critic, value_key=value_key, **hp
-        )
-    elif value_type == ValueEstimators.TD0:
-        self._value_estimator = TD0Estimator(
-            value_network=self.actor_critic, value_key=value_key, **hp
-        )
-    elif value_type == ValueEstimators.GAE:
-        raise NotImplementedError(
-            f"Value type {value_type} it not implemented for loss {type(self)}."
+    from torchrl.objectives.utils import distance_loss, SoftUpdate
+    from torchrl.trainers import Recorder
+
+    ###############################################################################
+    # torchrl :class:`torchrl.objectives.LossModule`
+    # ----------------------------------------------
+    #
+    # TorchRL provides a series of losses to use in your training scripts.
+    # The aim is to have losses that are easily reusable/swappable and that have
+    # a simple signature.
+    #
+    # The main characteristics of TorchRL losses are:
+    #
+    # - they are stateful objects: they contain a copy of the trainable parameters
+    #   such that ``loss_module.parameters()`` gives whatever is needed to train the
+    #   algorithm.
+    # - They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
+    #   method will receive a tensordict as input that contains all the necessary
+    #   information to return a loss value.
+    #
+    #       >>> data = replay_buffer.sample()
+    #       >>> loss_dict = loss_module(data)
+    #
+    # - They output a :class:`tensordict.TensorDict` instance with the loss values
+    #   written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
+    #   loss. Additional keys in the tensordict may be useful metrics to log during
+    #   training time.
+    #   .. note::
+    #     The reason we return independent losses is to let the user use a different
+    #     optimizer for different sets of parameters for instance. Summing the losses
+    #     can be simply done via
+    #
+    #       >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
+    #
+    # The ``__init__`` method
+    # ~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # The parent class of all losses is :class:`torchrl.objectives.LossModule`.
+    # As many other components of the library, its :meth:`torchrl.objectives.LossModule.forward` method expects
+    # as input a :class:`tensordict.TensorDict` instance sampled from an experience
+    # replay buffer, or any similar data structure. Using this format makes it
+    # possible to re-use the module across
+    # modalities, or in complex settings where the model needs to read multiple
+    # entries for instance. In other words, it allows us to code a loss module that
+    # is oblivious to the data type that is being given to is and that focuses on
+    # running the elementary steps of the loss function and only those.
+    #
+    # To keep the tutorial as didactic as we can, we'll be displaying each method
+    # of the class independently and we'll be populating the class at a later
+    # stage.
+    #
+    # Let us start with the :meth:`torchrl.objectives.LossModule.__init__`
+    # method. DDPG aims at solving a control task with a simple strategy:
+    # training a policy to output actions that maximise the value predicted by
+    # a value network. Hence, our loss module needs to receive two networks in its
+    # constructor: an actor and a value networks. We expect both of these to be
+    # tensordict-compatible objects, such as
+    # :class:`tensordict.nn.TensorDictModule`.
+    # Our loss function will need to compute a target value and fit the value
+    # network to this, and generate an action and fit the policy such that its
+    # value estimate is maximised.
+    #
+    # The crucial step of the :meth:`LossModule.__init__` method is the call to
+    # :meth:`torchrl.LossModule.convert_to_functional`. This method will extract
+    # the parameters from the module and convert it to a functional module.
+    # Strictly speaking, this is not necessary and one may perfectly code all
+    # the losses without it. However, we encourage its usage for the following
+    # reason.
+    #
+    # The reason TorchRL does this is that RL algorithms often execute the same
+    # model with different sets of parameters, called "trainable" and "target"
+    # parameters.
+    # The "trainable" parameters are those that the optimizer needs to fit. The
+    # "target" parameters are usually a copy of the formers with some time lag
+    # (absolute or diluted through a moving average).
+    # These target parameters are used to compute the value associated with the
+    # next observation. One the advantages of using a set of target parameters
+    # for the value model that do not match exactly the current configuration is
+    # that they provide a pessimistic bound on the value function being computed.
+    # Pay attention to the ``create_target_params`` keyword argument below: this
+    # argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
+    # method to create a set of target parameters in the loss module to be used
+    # for target value computation. If this is set to ``False`` (see the actor network
+    # for instance) the ``target_actor_network_params`` attribute will still be
+    # accessible but this will just return a **detached** version of the
+    # actor parameters.
+    #
+    # Later, we will see how the target parameters should be updated in torchrl.
+    #
+
+
+    def _init(
+        self,
+        actor_network: TensorDictModule,
+        value_network: TensorDictModule,
+    ) -> None:
+        super(type(self), self).__init__()
+
+        self.convert_to_functional(
+            actor_network,
+            "actor_network",
+            create_target_params=False,
         )
-    elif value_type == ValueEstimators.TDLambda:
-        self._value_estimator = TDLambdaEstimator(
-            value_network=self.actor_critic, value_key=value_key, **hp
+        self.convert_to_functional(
+            value_network,
+            "value_network",
+            create_target_params=True,
+            compare_against=list(actor_network.parameters()),
         )
-    else:
-        raise NotImplementedError(f"Unknown value type {value_type}")
-
 
-###############################################################################
-# The ``make_value_estimator`` method can but does not need to be called: if
-# not, the :class:`torchrl.objectives.LossModule` will query this method with
-# its default estimator.
-#
-# The actor loss method
-# ~~~~~~~~~~~~~~~~~~~~~
-#
-# The central piece of an RL algorithm is the training loss for the actor.
-# In the case of DDPG, this function is quite simple: we just need to compute
-# the value associated with an action computed using the policy and optimize
-# the actor weights to maximise this value.
-#
-# When computing this value, we must make sure to take the value parameters out
-# of the graph, otherwise the actor and value loss will be mixed up.
-# For this, the :func:`torchrl.objectives.utils.hold_out_params` function
-# can be used.
-
-from torchrl.objectives.utils import hold_out_params
-
-
-def _loss_actor(
-    self,
-    tensordict,
-) -> torch.Tensor:
-    td_copy = tensordict.select(*self.actor_in_keys).detach()
-    # Get an action from the actor network
-    td_copy = self.actor_network(
-        td_copy,
-        params=self.actor_network_params,
-    )
-    # get the value associated with that action
-    with hold_out_params(self.value_network_params) as params:
-        td_copy = self.value_network(
+        self.actor_in_keys = actor_network.in_keys
+
+        # Since the value we'll be using is based on the actor and value network,
+        # we put them together in a single actor-critic container.
+        actor_critic = ActorCriticWrapper(actor_network, value_network)
+        self.actor_critic = actor_critic
+        self.loss_funtion = "l2"
+
+
+    ###############################################################################
+    # The value estimator loss method
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # In many RL algorithm, the value network (or Q-value network) is trained based
+    # on an empirical value estimate. This can be bootstrapped (TD(0), low
+    # variance, high bias), meaning
+    # that the target value is obtained using the next reward and nothing else, or
+    # a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
+    # sequence of upcoming rewards will be used (high variance, low bias). An
+    # intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
+    # bias and variance.
+    # TorchRL makes it easy to use one or the other estimator via the
+    # :class:`torchrl.objectives.utils.ValueEstimators` Enum class, which contains
+    # pointers to all the value estimators implemented. Let us define the default
+    # value function here. We will take the simplest version (TD(0)), and show later
+    # on how this can be changed.
+
+    from torchrl.objectives.utils import ValueEstimators
+
+    default_value_estimator = ValueEstimators.TD0
+
+    ###############################################################################
+    # We also need to give some instructions to DDPG on how to build the value
+    # estimator, depending on the user query. Depending on the estimator provided,
+    # we will build the corresponding module to be used at train time:
+
+    from torchrl.objectives.utils import default_value_kwargs
+    from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
+
+
+    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
+        hp = dict(default_value_kwargs(value_type))
+        if hasattr(self, "gamma"):
+            hp["gamma"] = self.gamma
+        hp.update(hyperparams)
+        value_key = "state_action_value"
+        if value_type == ValueEstimators.TD1:
+            self._value_estimator = TD1Estimator(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueEstimators.TD0:
+            self._value_estimator = TD0Estimator(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        elif value_type == ValueEstimators.GAE:
+            raise NotImplementedError(
+                f"Value type {value_type} it not implemented for loss {type(self)}."
+            )
+        elif value_type == ValueEstimators.TDLambda:
+            self._value_estimator = TDLambdaEstimator(
+                value_network=self.actor_critic, value_key=value_key, **hp
+            )
+        else:
+            raise NotImplementedError(f"Unknown value type {value_type}")
+
+
+    ###############################################################################
+    # The ``make_value_estimator`` method can but does not need to be called: if
+    # not, the :class:`torchrl.objectives.LossModule` will query this method with
+    # its default estimator.
+    #
+    # The actor loss method
+    # ~~~~~~~~~~~~~~~~~~~~~
+    #
+    # The central piece of an RL algorithm is the training loss for the actor.
+    # In the case of DDPG, this function is quite simple: we just need to compute
+    # the value associated with an action computed using the policy and optimize
+    # the actor weights to maximise this value.
+    #
+    # When computing this value, we must make sure to take the value parameters out
+    # of the graph, otherwise the actor and value loss will be mixed up.
+    # For this, the :func:`torchrl.objectives.utils.hold_out_params` function
+    # can be used.
+
+    from torchrl.objectives.utils import hold_out_params
+
+
+    def _loss_actor(
+        self,
+        tensordict,
+    ) -> torch.Tensor:
+        td_copy = tensordict.select(*self.actor_in_keys).detach()
+        # Get an action from the actor network
+        td_copy = self.actor_network(
             td_copy,
-            params=params,
+            params=self.actor_network_params,
         )
-    return -td_copy.get("state_action_value")
-
-
-###############################################################################
-# The value loss method
-# ~~~~~~~~~~~~~~~~~~~~~
-#
-# We now need to optimize our value network parameters.
-# To do this, we will rely on the value estimator of our class:
-#
-
-
-def _loss_value(
-    self,
-    tensordict,
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    td_copy = tensordict.detach()
-
-    # V(s, a)
-    self.value_network(td_copy, params=self.value_network_params)
-    pred_val = td_copy.get("state_action_value").squeeze(-1)
-
-    # we manually reconstruct the parameters of the actor-critic, where the first
-    # set of parameters belongs to the actor and the second to the value function.
-    target_params = TensorDict(
-        {
-            "module": {
-                "0": self.target_actor_network_params,
-                "1": self.target_value_network_params,
-            }
-        },
-        batch_size=self.target_actor_network_params.batch_size,
-        device=self.target_actor_network_params.device,
-    )
-    with set_exploration_mode("mode"):  # we make sure that no exploration is performed
-        target_value = self.value_estimator.value_estimate(
-            tensordict, target_params=target_params
-        ).squeeze(-1)
-
-    # td_error = pred_val - target_value
-    loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
-    td_error = (pred_val - target_value).pow(2)
-
-    return loss_value, td_error, pred_val, target_value
-
-
-###############################################################################
-# Putting things together in a forward call
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# The only missing piece is the forward method, which will glue together the
-# value and actor loss, collect the cost values and write them in a tensordict
-# delivered to the user.
-
-
-def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
-    if not input_tensordict.device == self.device:
-        raise RuntimeError(
-            f"Got device={input_tensordict.device} but "
-            f"actor_network.device={self.device} (self.device={self.device})"
+        # get the value associated with that action
+        with hold_out_params(self.value_network_params) as params:
+            td_copy = self.value_network(
+                td_copy,
+                params=params,
+            )
+        return -td_copy.get("state_action_value")
+
+
+    ###############################################################################
+    # The value loss method
+    # ~~~~~~~~~~~~~~~~~~~~~
+    #
+    # We now need to optimize our value network parameters.
+    # To do this, we will rely on the value estimator of our class:
+    #
+
+
+    def _loss_value(
+        self,
+        tensordict,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        td_copy = tensordict.detach()
+
+        # V(s, a)
+        self.value_network(td_copy, params=self.value_network_params)
+        pred_val = td_copy.get("state_action_value").squeeze(-1)
+
+        # we manually reconstruct the parameters of the actor-critic, where the first
+        # set of parameters belongs to the actor and the second to the value function.
+        target_params = TensorDict(
+            {
+                "module": {
+                    "0": self.target_actor_network_params,
+                    "1": self.target_value_network_params,
+                }
+            },
+            batch_size=self.target_actor_network_params.batch_size,
+            device=self.target_actor_network_params.device,
         )
+        with set_exploration_mode("mode"):  # we make sure that no exploration is performed
+            target_value = self.value_estimator.value_estimate(
+                tensordict, target_params=target_params
+            ).squeeze(-1)
 
-    loss_value, td_error, pred_val, target_value = self.loss_value(
-        input_tensordict,
-    )
-    td_error = td_error.detach()
-    td_error = td_error.unsqueeze(input_tensordict.ndimension())
-    if input_tensordict.device is not None:
-        td_error = td_error.to(input_tensordict.device)
-    input_tensordict.set(
-        "td_error",
-        td_error,
-        inplace=True,
-    )
-    loss_actor = self.loss_actor(input_tensordict)
-    return TensorDict(
-        source={
-            "loss_actor": loss_actor.mean(),
-            "loss_value": loss_value.mean(),
-            "pred_value": pred_val.mean().detach(),
-            "target_value": target_value.mean().detach(),
-            "pred_value_max": pred_val.max().detach(),
-            "target_value_max": target_value.max().detach(),
-        },
-        batch_size=[],
-    )
+        # td_error = pred_val - target_value
+        loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
+        td_error = (pred_val - target_value).pow(2)
 
+        return loss_value, td_error, pred_val, target_value
 
-class DDPGLoss(LossModule):
-    default_value_estimator = default_value_estimator
-    make_value_estimator = make_value_estimator
 
-    __init__ = _init
-    forward = _forward
-    loss_value = _loss_value
-    loss_actor = _loss_actor
+    ###############################################################################
+    # Putting things together in a forward call
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # The only missing piece is the forward method, which will glue together the
+    # value and actor loss, collect the cost values and write them in a tensordict
+    # delivered to the user.
 
 
-###############################################################################
-# Now that we have our loss, we can use it to train a policy to solve a
-# control task.
-#
-# Environment
-# -----------
-#
-# In most algorithms, the first thing that needs to be taken care of is the
-# construction of the environment as it conditions the remainder of the
-# training script.
-#
-# For this example, we will be using the ``"cheetah"`` task. The goal is to make
-# a half-cheetah run as fast as possible.
-#
-# In TorchRL, one can create such a task by relying on dm_control or gym:
-#
-# .. code-block:: python
-#
-#    env = GymEnv("HalfCheetah-v4")
-#
-# or
-#
-# .. code-block:: python
-#
-#    env = DMControlEnv("cheetah", "run")
-#
-# By default, these environment disable rendering. Training from states is
-# usually easier than training from images. To keep things simple, we focus
-# on learning from states only. To pass the pixels to the tensordicts that
-# are collected by :func:`env.step()`, simply pass the ``from_pixels=True``
-# argument to the constructor:
-#
-# .. code-block:: python
-#
-#    env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
-#
-# We write a :func:`make_env` helper function that will create an environment
-# with either one of the two backends considered above (dm-control or gym).
-#
-
-env_library = None
-env_name = None
-
-
-def make_env():
-    """Create a base env."""
-    global env_library
-    global env_name
-
-    if backend == "dm_control":
-        env_name = "cheetah"
-        env_task = "run"
-        env_args = (env_name, env_task)
-        env_library = DMControlEnv
-    elif backend == "gym":
-        env_name = "HalfCheetah-v4"
-        env_args = (env_name,)
-        env_library = GymEnv
-    else:
-        raise NotImplementedError
-
-    env_kwargs = {
-        "device": device,
-        "frame_skip": frame_skip,
-        "from_pixels": from_pixels,
-        "pixels_only": from_pixels,
-    }
-    env = env_library(*env_args, **env_kwargs)
-    return env
-
-
-###############################################################################
-# Transforms
-# ~~~~~~~~~~
-#
-# Now that we have a base environment, we may want to modify its representation
-# to make it more policy-friendly. In TorchRL, transforms are appended to the
-# base environment in a specialized :class:`torchr.envs.TransformedEnv` class.
-#
-# - It is common in DDPG to rescale the reward using some heuristic value. We
-#   will multiply the reward by 5 in this example.
-#
-# - If we are using :mod:`dm_control`, it is also important to build an interface
-#   between the simulator which works with double precision numbers, and our
-#   script which presumably uses single precision ones. This transformation goes
-#   both ways: when calling :func:`env.step`, our actions will need to be
-#   represented in double precision, and the output will need to be transformed
-#   to single precision.
-#   The :class:`torchrl.envs.DoubleToFloat` transform does exactly this: the
-#   ``in_keys`` list refers to the keys that will need to be transformed from
-#   double to float, while the ``in_keys_inv`` refers to those that need to
-#   be transformed to double before being passed to the environment.
-#
-# - We concatenate the state keys together using the :class:`torchrl.envs.CatTensors`
-#   transform.
-#
-# - Finally, we also leave the possibility of normalizing the states: we will
-#   take care of computing the normalizing constants later on.
-#
-
+    def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
+        if not input_tensordict.device == self.device:
+            raise RuntimeError(
+                f"Got device={input_tensordict.device} but "
+                f"actor_network.device={self.device} (self.device={self.device})"
+            )
 
-def make_transformed_env(
-    env,
-):
-    """Apply transforms to the env (such as reward scaling and state normalization)."""
+        loss_value, td_error, pred_val, target_value = self.loss_value(
+            input_tensordict,
+        )
+        td_error = td_error.detach()
+        td_error = td_error.unsqueeze(input_tensordict.ndimension())
+        if input_tensordict.device is not None:
+            td_error = td_error.to(input_tensordict.device)
+        input_tensordict.set(
+            "td_error",
+            td_error,
+            inplace=True,
+        )
+        loss_actor = self.loss_actor(input_tensordict)
+        return TensorDict(
+            source={
+                "loss_actor": loss_actor.mean(),
+                "loss_value": loss_value.mean(),
+                "pred_value": pred_val.mean().detach(),
+                "target_value": target_value.mean().detach(),
+                "pred_value_max": pred_val.max().detach(),
+                "target_value_max": target_value.max().detach(),
+            },
+            batch_size=[],
+        )
 
-    env = TransformedEnv(env)
 
-    # we append transforms one by one, although we might as well create the
-    # transformed environment using the `env = TransformedEnv(base_env, transforms)`
-    # syntax.
-    env.append_transform(RewardScaling(loc=0.0, scale=reward_scaling))
+    class DDPGLoss(LossModule):
+        default_value_estimator = default_value_estimator
+        make_value_estimator = make_value_estimator
+
+        __init__ = _init
+        forward = _forward
+        loss_value = _loss_value
+        loss_actor = _loss_actor
+
+
+    ###############################################################################
+    # Now that we have our loss, we can use it to train a policy to solve a
+    # control task.
+    #
+    # Environment
+    # -----------
+    #
+    # In most algorithms, the first thing that needs to be taken care of is the
+    # construction of the environment as it conditions the remainder of the
+    # training script.
+    #
+    # For this example, we will be using the ``"cheetah"`` task. The goal is to make
+    # a half-cheetah run as fast as possible.
+    #
+    # In TorchRL, one can create such a task by relying on dm_control or gym:
+    #
+    # .. code-block:: python
+    #
+    #    env = GymEnv("HalfCheetah-v4")
+    #
+    # or
+    #
+    # .. code-block:: python
+    #
+    #    env = DMControlEnv("cheetah", "run")
+    #
+    # By default, these environment disable rendering. Training from states is
+    # usually easier than training from images. To keep things simple, we focus
+    # on learning from states only. To pass the pixels to the tensordicts that
+    # are collected by :func:`env.step()`, simply pass the ``from_pixels=True``
+    # argument to the constructor:
+    #
+    # .. code-block:: python
+    #
+    #    env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
+    #
+    # We write a :func:`make_env` helper function that will create an environment
+    # with either one of the two backends considered above (dm-control or gym).
+    #
+
+    env_library = None
+    env_name = None
+
+
+    def make_env():
+        """Create a base env."""
+        global env_library
+        global env_name
+
+        if backend == "dm_control":
+            env_name = "cheetah"
+            env_task = "run"
+            env_args = (env_name, env_task)
+            env_library = DMControlEnv
+        elif backend == "gym":
+            env_name = "HalfCheetah-v4"
+            env_args = (env_name,)
+            env_library = GymEnv
+        else:
+            raise NotImplementedError
+
+        env_kwargs = {
+            "device": device,
+            "frame_skip": frame_skip,
+            "from_pixels": from_pixels,
+            "pixels_only": from_pixels,
+        }
+        env = env_library(*env_args, **env_kwargs)
+        return env
+
+
+    ###############################################################################
+    # Transforms
+    # ~~~~~~~~~~
+    #
+    # Now that we have a base environment, we may want to modify its representation
+    # to make it more policy-friendly. In TorchRL, transforms are appended to the
+    # base environment in a specialized :class:`torchr.envs.TransformedEnv` class.
+    #
+    # - It is common in DDPG to rescale the reward using some heuristic value. We
+    #   will multiply the reward by 5 in this example.
+    #
+    # - If we are using :mod:`dm_control`, it is also important to build an interface
+    #   between the simulator which works with double precision numbers, and our
+    #   script which presumably uses single precision ones. This transformation goes
+    #   both ways: when calling :func:`env.step`, our actions will need to be
+    #   represented in double precision, and the output will need to be transformed
+    #   to single precision.
+    #   The :class:`torchrl.envs.DoubleToFloat` transform does exactly this: the
+    #   ``in_keys`` list refers to the keys that will need to be transformed from
+    #   double to float, while the ``in_keys_inv`` refers to those that need to
+    #   be transformed to double before being passed to the environment.
+    #
+    # - We concatenate the state keys together using the :class:`torchrl.envs.CatTensors`
+    #   transform.
+    #
+    # - Finally, we also leave the possibility of normalizing the states: we will
+    #   take care of computing the normalizing constants later on.
+    #
+
+
+    def make_transformed_env(
+        env,
+    ):
+        """Apply transforms to the env (such as reward scaling and state normalization)."""
+
+        env = TransformedEnv(env)
+
+        # we append transforms one by one, although we might as well create the
+        # transformed environment using the `env = TransformedEnv(base_env, transforms)`
+        # syntax.
+        env.append_transform(RewardScaling(loc=0.0, scale=reward_scaling))
+
+        double_to_float_list = []
+        double_to_float_inv_list = []
+        if env_library is DMControlEnv:
+            # DMControl requires double-precision
+            double_to_float_list += [
+                "reward",
+                "action",
+            ]
+            double_to_float_inv_list += ["action"]
+
+        # We concatenate all states into a single "observation_vector"
+        # even if there is a single tensor, it'll be renamed in "observation_vector".
+        # This facilitates the downstream operations as we know the name of the
+        # output tensor.
+        # In some environments (not half-cheetah), there may be more than one
+        # observation vector: in this case this code snippet will concatenate them
+        # all.
+        selected_keys = list(env.observation_spec.keys())
+        out_key = "observation_vector"
+        env.append_transform(CatTensors(in_keys=selected_keys, out_key=out_key))
+
+        # we normalize the states, but for now let's just instantiate a stateless
+        # version of the transform
+        env.append_transform(ObservationNorm(in_keys=[out_key], standard_normal=True))
+
+        double_to_float_list.append(out_key)
+        env.append_transform(
+            DoubleToFloat(
+                in_keys=double_to_float_list, in_keys_inv=double_to_float_inv_list
+            )
+        )
 
-    double_to_float_list = []
-    double_to_float_inv_list = []
-    if env_library is DMControlEnv:
-        # DMControl requires double-precision
-        double_to_float_list += [
-            "reward",
-            "action",
+        return env
+
+
+    ###############################################################################
+    # Normalization of the observations
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # To compute the normalizing statistics, we run an arbitrary number of random
+    # steps in the environment and compute the mean and standard deviation of the
+    # collected observations. The :func:`ObservationNorm.init_stats()` method can
+    # be used for this purpose. To get the summary statistics, we create a dummy
+    # environment and run it for a given number of steps, collect data over a given
+    # number of steps and compute its summary statistics.
+    #
+
+
+    def get_env_stats():
+        """Gets the stats of an environment."""
+        proof_env = make_transformed_env(make_env())
+        proof_env.set_seed(seed)
+        t = proof_env.transform[2]
+        t.init_stats(init_env_steps)
+        transform_state_dict = t.state_dict()
+        proof_env.close()
+        return transform_state_dict
+
+
+    ###############################################################################
+    # Parallel execution
+    # ~~~~~~~~~~~~~~~~~~
+    #
+    # The following helper function allows us to run environments in parallel.
+    # Running environments in parallel can significantly speed up the collection
+    # throughput. When using transformed environment, we need to choose whether we
+    # want to execute the transform individually for each environment, or
+    # centralize the data and transform it in batch. Both approaches are easy to
+    # code:
+    #
+    # .. code-block:: python
+    #
+    #    env = ParallelEnv(
+    #        lambda: TransformedEnv(GymEnv("HalfCheetah-v4"), transforms),
+    #        num_workers=4
+    #    )
+    #    env = TransformedEnv(
+    #        ParallelEnv(lambda: GymEnv("HalfCheetah-v4"), num_workers=4),
+    #        transforms
+    #    )
+    #
+    # To leverage the vectorization capabilities of PyTorch, we adopt
+    # the first method:
+    #
+
+
+    def parallel_env_constructor(
+        transform_state_dict,
+    ):
+        if env_per_collector == 1:
+
+            def make_t_env():
+                env = make_transformed_env(make_env())
+                env.transform[2].init_stats(3)
+                env.transform[2].loc.copy_(transform_state_dict["loc"])
+                env.transform[2].scale.copy_(transform_state_dict["scale"])
+                return env
+
+            env_creator = EnvCreator(make_t_env)
+            return env_creator
+
+        parallel_env = ParallelEnv(
+            num_workers=env_per_collector,
+            create_env_fn=EnvCreator(lambda: make_env()),
+            create_env_kwargs=None,
+            pin_memory=False,
+        )
+        env = make_transformed_env(parallel_env)
+        # we call `init_stats` for a limited number of steps, just to instantiate
+        # the lazy buffers.
+        env.transform[2].init_stats(3, cat_dim=1, reduce_dim=[0, 1])
+        env.transform[2].load_state_dict(transform_state_dict)
+        return env
+
+
+    ###############################################################################
+    # Building the model
+    # ------------------
+    #
+    # We now turn to the setup of the model. As we have seen, DDPG requires a
+    # value network, trained to estimate the value of a state-action pair, and a
+    # parametric actor that learns how to select actions that maximize this value.
+    #
+    # Recall that building a TorchRL module requires two steps:
+    #
+    # - writing the :class:`torch.nn.Module` that will be used as network,
+    # - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
+    #   data flow is handled by specifying the input and output keys.
+    #
+    # In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
+    # also be used.
+    #
+    #
+    # The Q-Value network is wrapped in a :class:`torchrl.modules.ValueOperator`
+    # that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
+    # networks and ``state_value`` for other value networks.
+    #
+    # Since we use lazy modules, it is necessary to materialize the lazy modules
+    # before being able to move the policy from device to device and achieve other
+    # operations. Hence, it is good practice to run the modules with a small
+    # sample of data. For this purpose, we generate fake data from the
+    # environment specs.
+    #
+
+
+    def make_ddpg_actor(
+        transform_state_dict,
+        device="cpu",
+    ):
+        proof_environment = make_transformed_env(make_env())
+        proof_environment.transform[2].init_stats(3)
+        proof_environment.transform[2].load_state_dict(transform_state_dict)
+
+        env_specs = proof_environment.specs
+        in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
+            -1
         ]
-        double_to_float_inv_list += ["action"]
-
-    # We concatenate all states into a single "observation_vector"
-    # even if there is a single tensor, it'll be renamed in "observation_vector".
-    # This facilitates the downstream operations as we know the name of the
-    # output tensor.
-    # In some environments (not half-cheetah), there may be more than one
-    # observation vector: in this case this code snippet will concatenate them
-    # all.
-    selected_keys = list(env.observation_spec.keys())
-    out_key = "observation_vector"
-    env.append_transform(CatTensors(in_keys=selected_keys, out_key=out_key))
-
-    # we normalize the states, but for now let's just instantiate a stateless
-    # version of the transform
-    env.append_transform(ObservationNorm(in_keys=[out_key], standard_normal=True))
-
-    double_to_float_list.append(out_key)
-    env.append_transform(
-        DoubleToFloat(
-            in_keys=double_to_float_list, in_keys_inv=double_to_float_inv_list
+        out_features = env_specs["input_spec"]["action"].shape[-1]
+
+        actor_net = MLP(
+            in_features=in_features,
+            out_features=out_features,
+            num_cells=[num_cells] * num_layers,
+            activation_class=nn.Tanh,
+            activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
+        )
+        in_keys = ["observation_vector"]
+        out_keys = ["action"]
+
+        actor = Actor(
+            actor_net,
+            in_keys=in_keys,
+            out_keys=out_keys,
+            spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
+        ).to(device)
+
+        q_net = MLP(
+            in_features=in_features
+            + out_features,  # receives an action and an observation as input
+            out_features=1,
+            num_cells=[num_cells] * num_layers,
+            activation_class=nn.Tanh,
         )
-    )
-
-    return env
 
+        in_keys = in_keys + ["action"]
+        qnet = ValueOperator(
+            in_keys=in_keys,
+            module=q_net,
+        ).to(device)
+
+        return actor, qnet
+
+
+    ###############################################################################
+    # Evaluator: building your recorder object
+    # ----------------------------------------
+    #
+    # As the training data is obtained using some exploration strategy, the true
+    # performance of our algorithm needs to be assessed in deterministic mode. We
+    # do this using a dedicated class, ``Recorder``, which executes the policy in
+    # the environment at a given frequency and returns some statistics obtained
+    # from these simulations.
+    #
+    # The following helper function builds this object:
+
+
+    def make_recorder(actor_model_explore, transform_state_dict):
+        base_env = make_env()
+        recorder = make_transformed_env(base_env)
+        recorder.transform[2].init_stats(3)
+        recorder.transform[2].load_state_dict(transform_state_dict)
+
+        recorder_obj = Recorder(
+            record_frames=1000,
+            frame_skip=frame_skip,
+            policy_exploration=actor_model_explore,
+            recorder=recorder,
+            exploration_mode="mean",
+            record_interval=record_interval,
+        )
+        return recorder_obj
+
+
+    ###############################################################################
+    # Replay buffer
+    # -------------
+    #
+    # Replay buffers come in two flavors: prioritized (where some error signal
+    # is used to give a higher likelihood of sampling to some items than others)
+    # and regular, circular experience replay.
+    #
+    # TorchRL replay buffers are composable: one can pick up the storage, sampling
+    # and writing strategies. It is also possible to
+    # store tensors on physical memory using a memory-mapped array. The following
+    # function takes care of creating the replay buffer with the desired
+    # hyperparameters:
+    #
+
+
+    def make_replay_buffer(buffer_size, prefetch=3):
+        if prb:
+            sampler = PrioritizedSampler(
+                max_capacity=buffer_size,
+                alpha=0.7,
+                beta=0.5,
+            )
+        else:
+            sampler = RandomSampler()
+        replay_buffer = TensorDictReplayBuffer(
+            storage=LazyMemmapStorage(
+                buffer_size,
+                scratch_dir=buffer_scratch_dir,
+                device=device,
+            ),
+            sampler=sampler,
+            pin_memory=False,
+            prefetch=prefetch,
+        )
+        return replay_buffer
 
-###############################################################################
-# Normalization of the observations
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# To compute the normalizing statistics, we run an arbitrary number of random
-# steps in the environment and compute the mean and standard deviation of the
-# collected observations. The :func:`ObservationNorm.init_stats()` method can
-# be used for this purpose. To get the summary statistics, we create a dummy
-# environment and run it for a given number of steps, collect data over a given
-# number of steps and compute its summary statistics.
-#
 
+    ###############################################################################
+    # Hyperparameters
+    # ---------------
+    #
+    # After having written our helper functions, it is time to set the
+    # experiment hyperparameters:
 
-def get_env_stats():
-    """Gets the stats of an environment."""
-    proof_env = make_transformed_env(make_env())
-    proof_env.set_seed(seed)
-    t = proof_env.transform[2]
-    t.init_stats(init_env_steps)
-    transform_state_dict = t.state_dict()
-    proof_env.close()
-    return transform_state_dict
+    ###############################################################################
+    # Environment
+    # ~~~~~~~~~~~
 
+    # The backend can be gym or dm_control
+    backend = "gym"
 
-###############################################################################
-# Parallel execution
-# ~~~~~~~~~~~~~~~~~~
-#
-# The following helper function allows us to run environments in parallel.
-# Running environments in parallel can significantly speed up the collection
-# throughput. When using transformed environment, we need to choose whether we
-# want to execute the transform individually for each environment, or
-# centralize the data and transform it in batch. Both approaches are easy to
-# code:
-#
-# .. code-block:: python
-#
-#    env = ParallelEnv(
-#        lambda: TransformedEnv(GymEnv("HalfCheetah-v4"), transforms),
-#        num_workers=4
-#    )
-#    env = TransformedEnv(
-#        ParallelEnv(lambda: GymEnv("HalfCheetah-v4"), num_workers=4),
-#        transforms
-#    )
-#
-# To leverage the vectorization capabilities of PyTorch, we adopt
-# the first method:
-#
+    exp_name = "cheetah"
 
+    # frame_skip batches multiple step together with a single action
+    # If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
+    # be adjusted to have a consistent total number of frames collected across
+    # experiments.
+    frame_skip = 2
+    from_pixels = False
+    # Scaling the reward helps us control the signal magnitude for a more
+    # efficient learning.
+    reward_scaling = 5.0
 
-def parallel_env_constructor(
-    transform_state_dict,
-):
-    if env_per_collector == 1:
+    # Number of random steps used as for stats computation using ObservationNorm
+    init_env_steps = 1000
 
-        def make_t_env():
-            env = make_transformed_env(make_env())
-            env.transform[2].init_stats(3)
-            env.transform[2].loc.copy_(transform_state_dict["loc"])
-            env.transform[2].scale.copy_(transform_state_dict["scale"])
-            return env
+    # Exploration: Number of frames before OU noise becomes null
+    annealing_frames = 1000000 // frame_skip
 
-        env_creator = EnvCreator(make_t_env)
-        return env_creator
+    ###############################################################################
+    # Collection
+    # ~~~~~~~~~~
 
-    parallel_env = ParallelEnv(
-        num_workers=env_per_collector,
-        create_env_fn=EnvCreator(lambda: make_env()),
-        create_env_kwargs=None,
-        pin_memory=False,
+    # We will execute the policy on cuda if available
+    device = (
+        torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
     )
-    env = make_transformed_env(parallel_env)
-    # we call `init_stats` for a limited number of steps, just to instantiate
-    # the lazy buffers.
-    env.transform[2].init_stats(3, cat_dim=1, reduce_dim=[0, 1])
-    env.transform[2].load_state_dict(transform_state_dict)
-    return env
-
 
-###############################################################################
-# Building the model
-# ------------------
-#
-# We now turn to the setup of the model. As we have seen, DDPG requires a
-# value network, trained to estimate the value of a state-action pair, and a
-# parametric actor that learns how to select actions that maximize this value.
-#
-# Recall that building a TorchRL module requires two steps:
-#
-# - writing the :class:`torch.nn.Module` that will be used as network,
-# - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
-#   data flow is handled by specifying the input and output keys.
-#
-# In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
-# also be used.
-#
-#
-# The Q-Value network is wrapped in a :class:`torchrl.modules.ValueOperator`
-# that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
-# networks and ``state_value`` for other value networks.
-#
-# Since we use lazy modules, it is necessary to materialize the lazy modules
-# before being able to move the policy from device to device and achieve other
-# operations. Hence, it is good practice to run the modules with a small
-# sample of data. For this purpose, we generate fake data from the
-# environment specs.
-#
-
-
-def make_ddpg_actor(
-    transform_state_dict,
-    device="cpu",
-):
-    proof_environment = make_transformed_env(make_env())
-    proof_environment.transform[2].init_stats(3)
-    proof_environment.transform[2].load_state_dict(transform_state_dict)
-
-    env_specs = proof_environment.specs
-    in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
-        -1
-    ]
-    out_features = env_specs["input_spec"]["action"].shape[-1]
-
-    actor_net = MLP(
-        in_features=in_features,
-        out_features=out_features,
-        num_cells=[num_cells] * num_layers,
-        activation_class=nn.Tanh,
-        activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
+    # Number of environments in each data collector
+    warnings.warn("More envs!")
+    env_per_collector = 1
+
+    # Total frames we will use during training. Scale up to 500K - 1M for a more
+    # meaningful training
+    total_frames = 10000 // frame_skip
+
+    # Number of frames returned by the collector at each iteration of the outer loop.
+    # We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
+    frames_per_batch = env_per_collector * 1000 // frame_skip
+    max_frames_per_traj = 1000 // frame_skip
+    init_random_frames = 0
+    # We'll be using the MultiStep class to have a less myopic representation of
+    # upcoming states
+    n_steps_forward = 3
+
+    # record every 10 batch collected
+    record_interval = 10
+
+    ###############################################################################
+    # Optimizer and optimization
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    lr = 5e-4
+    weight_decay = 0.0
+    # UTD: Number of iterations of the inner loop
+    update_to_data = 32
+    batch_size = 128
+
+    ###############################################################################
+    # Model
+    # ~~~~~
+
+    gamma = 0.99
+    tau = 0.005  # Decay factor for the target network
+
+    # Network specs
+    num_cells = 64
+    num_layers = 2
+
+    ###############################################################################
+    # Replay buffer
+    # ~~~~~~~~~~~~~
+
+    # If True, a Prioritized replay buffer will be used
+    prb = True
+    # Number of frames stored in the buffer
+    traj_len_collector = frames_per_batch // env_per_collector
+    buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
+    buffer_scratch_dir = "/tmp/"
+
+    seed = 0
+
+    ###############################################################################
+    # Initialization
+    # --------------
+    #
+    # To initialize the experiment, we first acquire the observation statistics,
+    # then build the networks, wrap them in an exploration wrapper (following the
+    # seminal DDPG paper, we used an Ornstein-Uhlenbeck process to add noise to the
+    # sampled actions).
+
+
+    # Seeding
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+
+    ###############################################################################
+    # Normalization stats
+    # ~~~~~~~~~~~~~~~~~~~
+
+    transform_state_dict = get_env_stats()
+
+    ###############################################################################
+    # Models: policy and q-value network
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    actor, qnet = make_ddpg_actor(
+        transform_state_dict=transform_state_dict,
+        device=device,
     )
-    in_keys = ["observation_vector"]
-    out_keys = ["action"]
-
-    actor = Actor(
-        actor_net,
-        in_keys=in_keys,
-        out_keys=out_keys,
-        spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
+    if device == torch.device("cpu"):
+        actor.share_memory()
+
+
+    ###############################################################################
+    # Loss module
+    # ~~~~~~~~~~~
+    # We build our loss module with the actor and qnet we've just created.
+    # Because we have target parameters to update, we _must_ create a target network
+    # updater.
+    #
+    loss_module = DDPGLoss(actor, qnet)
+    # let's use the TD(lambda) estimator!
+    loss_module.make_value_estimator(ValueEstimators.TDLambda)
+    target_net_updater = SoftUpdate(loss_module, eps=0.98)
+    target_net_updater.init_()
+
+    ###############################################################################
+    # The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
+    # exploration module:
+
+    actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
+        actor,
+        annealing_num_steps=annealing_frames,
     ).to(device)
-
-    q_net = MLP(
-        in_features=in_features
-        + out_features,  # receives an action and an observation as input
-        out_features=1,
-        num_cells=[num_cells] * num_layers,
-        activation_class=nn.Tanh,
+    if device == torch.device("cpu"):
+        actor_model_explore.share_memory()
+
+    ###############################################################################
+    # Parallel environment creation
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # We pass the stats computed earlier to normalize the output of our
+    # environment:
+
+    create_env_fn = parallel_env_constructor(
+        transform_state_dict=transform_state_dict,
     )
 
-    in_keys = in_keys + ["action"]
-    qnet = ValueOperator(
-        in_keys=in_keys,
-        module=q_net,
-    ).to(device)
-
-    return actor, qnet
-
-
-###############################################################################
-# Evaluator: building your recorder object
-# ----------------------------------------
-#
-# As the training data is obtained using some exploration strategy, the true
-# performance of our algorithm needs to be assessed in deterministic mode. We
-# do this using a dedicated class, ``Recorder``, which executes the policy in
-# the environment at a given frequency and returns some statistics obtained
-# from these simulations.
-#
-# The following helper function builds this object:
-
-
-def make_recorder(actor_model_explore, transform_state_dict):
-    base_env = make_env()
-    recorder = make_transformed_env(base_env)
-    recorder.transform[2].init_stats(3)
-    recorder.transform[2].load_state_dict(transform_state_dict)
-
-    recorder_obj = Recorder(
-        record_frames=1000,
-        frame_skip=frame_skip,
-        policy_exploration=actor_model_explore,
-        recorder=recorder,
-        exploration_mode="mean",
-        record_interval=record_interval,
-    )
-    return recorder_obj
-
-
-###############################################################################
-# Replay buffer
-# -------------
-#
-# Replay buffers come in two flavors: prioritized (where some error signal
-# is used to give a higher likelihood of sampling to some items than others)
-# and regular, circular experience replay.
-#
-# TorchRL replay buffers are composable: one can pick up the storage, sampling
-# and writing strategies. It is also possible to
-# store tensors on physical memory using a memory-mapped array. The following
-# function takes care of creating the replay buffer with the desired
-# hyperparameters:
-#
-
-
-def make_replay_buffer(buffer_size, prefetch=3):
-    if prb:
-        sampler = PrioritizedSampler(
-            max_capacity=buffer_size,
-            alpha=0.7,
-            beta=0.5,
-        )
+    ###############################################################################
+    # Data collector
+    # ~~~~~~~~~~~~~~
+    #
+    # TorchRL provides specialized classes to help you collect data by executing
+    # the policy in the environment. These "data collectors" iteratively compute
+    # the action to be executed at a given time, then execute a step in the
+    # environment and reset it when required.
+    # Data collectors are designed to help developers have a tight control
+    # on the number of frames per batch of data, on the (a)sync nature of this
+    # collection and on the resources allocated to the data collection (e.g. GPU,
+    # number of workers etc).
+    #
+    # Here we will use
+    # :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
+    # will be executed in an async manner (i.e. data will be collected while
+    # the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
+    # multiple workers are running rollouts separately. When a batch is asked, it
+    # is gathered from the first worker that can provide it.
+    #
+    # The parameters to specify are:
+    #
+    # - the list of environment creation functions,
+    # - the policy,
+    # - the total number of frames before the collector is considered empty,
+    # - the maximum number of frames per trajectory (useful for non-terminating
+    #   environments, like dm_control ones).
+    #
+    # One should also pass:
+    #
+    # - the number of frames in each batch collected,
+    # - the number of random steps executed independently from the policy,
+    # - the devices used for policy execution
+    # - the devices used to store data before the data is passed to the main
+    #   process.
+    #
+    # Collectors also accept post-processing hooks.
+    # For instance, the :class:`torchrl.data.postprocs.MultiStep` class passed as
+    # ``postproc`` makes it so that the rewards of the ``n`` upcoming steps are
+    # summed (with some discount factor) and the next observation is changed to
+    # be the n-step forward observation. One could pass other transforms too:
+    # using :class:`tensordict.nn.TensorDictModule` and
+    # :class:`tensordict.nn.TensorDictSequential` we can seamlessly append a
+    # wide range of transforms to our collector.
+
+    if n_steps_forward > 0:
+        multistep = MultiStep(n_steps=n_steps_forward, gamma=gamma)
     else:
-        sampler = RandomSampler()
-    replay_buffer = TensorDictReplayBuffer(
-        storage=LazyMemmapStorage(
-            buffer_size,
-            scratch_dir=buffer_scratch_dir,
-            device=device,
-        ),
-        sampler=sampler,
-        pin_memory=False,
-        prefetch=prefetch,
+        multistep = None
+
+    warnings.warn("Change collector!!")
+
+    collector = MultiaSyncDataCollector(
+        create_env_fn=[create_env_fn, create_env_fn],
+        policy=actor_model_explore,
+        total_frames=total_frames,
+        max_frames_per_traj=max_frames_per_traj,
+        frames_per_batch=frames_per_batch,
+        init_random_frames=init_random_frames,
+        reset_at_each_iter=False,
+        postproc=multistep,
+        split_trajs=True,
+        device=device,  # device for execution
+        storing_device=device,  # device where data will be stored and passed
+        update_at_each_batch=False,
+        exploration_mode="random",
     )
-    return replay_buffer
 
+    collector.set_seed(seed)
 
-###############################################################################
-# Hyperparameters
-# ---------------
-#
-# After having written our helper functions, it is time to set the
-# experiment hyperparameters:
-
-###############################################################################
-# Environment
-# ~~~~~~~~~~~
-
-# The backend can be gym or dm_control
-backend = "gym"
-
-exp_name = "cheetah"
-
-# frame_skip batches multiple step together with a single action
-# If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
-# be adjusted to have a consistent total number of frames collected across
-# experiments.
-frame_skip = 2
-from_pixels = False
-# Scaling the reward helps us control the signal magnitude for a more
-# efficient learning.
-reward_scaling = 5.0
-
-# Number of random steps used as for stats computation using ObservationNorm
-init_env_steps = 1000
-
-# Exploration: Number of frames before OU noise becomes null
-annealing_frames = 1000000 // frame_skip
-
-###############################################################################
-# Collection
-# ~~~~~~~~~~
-
-# We will execute the policy on cuda if available
-device = (
-    torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
-)
-
-# Number of environments in each data collector
-warnings.warn("More envs!")
-env_per_collector = 1
-
-# Total frames we will use during training. Scale up to 500K - 1M for a more
-# meaningful training
-total_frames = 10000 // frame_skip
-
-# Number of frames returned by the collector at each iteration of the outer loop.
-# We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
-frames_per_batch = env_per_collector * 1000 // frame_skip
-max_frames_per_traj = 1000 // frame_skip
-init_random_frames = 0
-# We'll be using the MultiStep class to have a less myopic representation of
-# upcoming states
-n_steps_forward = 3
-
-# record every 10 batch collected
-record_interval = 10
-
-###############################################################################
-# Optimizer and optimization
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-lr = 5e-4
-weight_decay = 0.0
-# UTD: Number of iterations of the inner loop
-update_to_data = 32
-batch_size = 128
-
-###############################################################################
-# Model
-# ~~~~~
-
-gamma = 0.99
-tau = 0.005  # Decay factor for the target network
-
-# Network specs
-num_cells = 64
-num_layers = 2
-
-###############################################################################
-# Replay buffer
-# ~~~~~~~~~~~~~
-
-# If True, a Prioritized replay buffer will be used
-prb = True
-# Number of frames stored in the buffer
-traj_len_collector = frames_per_batch // env_per_collector
-buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
-buffer_scratch_dir = "/tmp/"
-
-seed = 0
-
-###############################################################################
-# Initialization
-# --------------
-#
-# To initialize the experiment, we first acquire the observation statistics,
-# then build the networks, wrap them in an exploration wrapper (following the
-# seminal DDPG paper, we used an Ornstein-Uhlenbeck process to add noise to the
-# sampled actions).
-
-
-# Seeding
-torch.manual_seed(seed)
-np.random.seed(seed)
-
-###############################################################################
-# Normalization stats
-# ~~~~~~~~~~~~~~~~~~~
-
-transform_state_dict = get_env_stats()
-
-###############################################################################
-# Models: policy and q-value network
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-actor, qnet = make_ddpg_actor(
-    transform_state_dict=transform_state_dict,
-    device=device,
-)
-if device == torch.device("cpu"):
-    actor.share_memory()
-
-
-###############################################################################
-# Loss module
-# ~~~~~~~~~~~
-# We build our loss module with the actor and qnet we've just created.
-# Because we have target parameters to update, we _must_ create a target network
-# updater.
-#
-loss_module = DDPGLoss(actor, qnet)
-# let's use the TD(lambda) estimator!
-loss_module.make_value_estimator(ValueEstimators.TDLambda)
-target_net_updater = SoftUpdate(loss_module, eps=0.98)
-target_net_updater.init_()
-
-###############################################################################
-# The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
-# exploration module:
-
-actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
-    actor,
-    annealing_num_steps=annealing_frames,
-).to(device)
-if device == torch.device("cpu"):
-    actor_model_explore.share_memory()
-
-###############################################################################
-# Parallel environment creation
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# We pass the stats computed earlier to normalize the output of our
-# environment:
+    ###############################################################################
+    # Replay buffer
+    # ~~~~~~~~~~~~~
+    #
 
-create_env_fn = parallel_env_constructor(
-    transform_state_dict=transform_state_dict,
-)
+    replay_buffer = make_replay_buffer(buffer_size, prefetch=3)
 
-###############################################################################
-# Data collector
-# ~~~~~~~~~~~~~~
-#
-# TorchRL provides specialized classes to help you collect data by executing
-# the policy in the environment. These "data collectors" iteratively compute
-# the action to be executed at a given time, then execute a step in the
-# environment and reset it when required.
-# Data collectors are designed to help developers have a tight control
-# on the number of frames per batch of data, on the (a)sync nature of this
-# collection and on the resources allocated to the data collection (e.g. GPU,
-# number of workers etc).
-#
-# Here we will use
-# :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
-# will be executed in an async manner (i.e. data will be collected while
-# the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
-# multiple workers are running rollouts separately. When a batch is asked, it
-# is gathered from the first worker that can provide it.
-#
-# The parameters to specify are:
-#
-# - the list of environment creation functions,
-# - the policy,
-# - the total number of frames before the collector is considered empty,
-# - the maximum number of frames per trajectory (useful for non-terminating
-#   environments, like dm_control ones).
-#
-# One should also pass:
-#
-# - the number of frames in each batch collected,
-# - the number of random steps executed independently from the policy,
-# - the devices used for policy execution
-# - the devices used to store data before the data is passed to the main
-#   process.
-#
-# Collectors also accept post-processing hooks.
-# For instance, the :class:`torchrl.data.postprocs.MultiStep` class passed as
-# ``postproc`` makes it so that the rewards of the ``n`` upcoming steps are
-# summed (with some discount factor) and the next observation is changed to
-# be the n-step forward observation. One could pass other transforms too:
-# using :class:`tensordict.nn.TensorDictModule` and
-# :class:`tensordict.nn.TensorDictSequential` we can seamlessly append a
-# wide range of transforms to our collector.
-
-if n_steps_forward > 0:
-    multistep = MultiStep(n_steps=n_steps_forward, gamma=gamma)
-else:
-    multistep = None
-
-warnings.warn("Change collector!!")
-
-collector = SyncDataCollector(
-    create_env_fn=create_env_fn,
-    policy=actor_model_explore,
-    total_frames=total_frames,
-    max_frames_per_traj=max_frames_per_traj,
-    frames_per_batch=frames_per_batch,
-    init_random_frames=init_random_frames,
-    reset_at_each_iter=False,
-    postproc=multistep,
-    split_trajs=True,
-    device=device,  # device for execution
-    storing_device=device,  # device where data will be stored and passed
-    update_at_each_batch=False,
-    exploration_mode="random",
-)
-
-collector.set_seed(seed)
-
-###############################################################################
-# Replay buffer
-# ~~~~~~~~~~~~~
-#
+    ###############################################################################
+    # Recorder
+    # ~~~~~~~~
 
-replay_buffer = make_replay_buffer(buffer_size, prefetch=3)
+    recorder = make_recorder(actor_model_explore, transform_state_dict)
 
-###############################################################################
-# Recorder
-# ~~~~~~~~
+    ###############################################################################
+    # Optimizer
+    # ~~~~~~~~~
+    #
+    # Finally, we will use the Adam optimizer for the policy and value network,
+    # with the same learning rate for both.
 
-recorder = make_recorder(actor_model_explore, transform_state_dict)
-
-###############################################################################
-# Optimizer
-# ~~~~~~~~~
-#
-# Finally, we will use the Adam optimizer for the policy and value network,
-# with the same learning rate for both.
+    optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
+    total_collection_steps = total_frames // frames_per_batch
 
-optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
-total_collection_steps = total_frames // frames_per_batch
-
-scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer, T_max=total_collection_steps
-)
-
-###############################################################################
-# Time to train the policy
-# ------------------------
-#
-# The training loop is pretty straightforward now that we have built all the
-# modules we need.
-#
-
-rewards = []
-rewards_eval = []
-
-# Main loop
-norm_factor_training = (
-    sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
-)
-
-collected_frames = 0
-pbar = tqdm.tqdm(total=total_frames)
-r0 = None
-for i, tensordict in enumerate(collector):
-
-    # update weights of the inference policy
-    collector.update_policy_weights_()
-
-    if r0 is None:
-        r0 = tensordict["next", "reward"].mean().item()
-    pbar.update(tensordict.numel())
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+        optimizer, T_max=total_collection_steps
+    )
 
-    # extend the replay buffer with the new data
-    current_frames = tensordict.numel()
-    collected_frames += current_frames
-    replay_buffer.extend(tensordict.cpu())
+    ###############################################################################
+    # Time to train the policy
+    # ------------------------
+    #
+    # The training loop is pretty straightforward now that we have built all the
+    # modules we need.
+    #
 
-    # optimization steps
-    if collected_frames >= init_random_frames:
-        for _ in range(update_to_data):
-            # sample from replay buffer
-            sampled_tensordict = replay_buffer.sample(batch_size).clone()
+    rewards = []
+    rewards_eval = []
 
-            # Compute loss
-            loss_dict = loss_module(sampled_tensordict)
+    # Main loop
+    norm_factor_training = (
+        sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
+    )
 
-            # optimize
-            loss_val = sum(
-                value for key, value in loss_dict.items() if key.startswith("loss")
+    collected_frames = 0
+    pbar = tqdm.tqdm(total=total_frames)
+    r0 = None
+    for i, tensordict in enumerate(collector):
+
+        # update weights of the inference policy
+        collector.update_policy_weights_()
+
+        if r0 is None:
+            r0 = tensordict["next", "reward"].mean().item()
+        pbar.update(tensordict.numel())
+
+        # extend the replay buffer with the new data
+        current_frames = tensordict.numel()
+        collected_frames += current_frames
+        replay_buffer.extend(tensordict.cpu())
+
+        # optimization steps
+        if collected_frames >= init_random_frames:
+            for _ in range(update_to_data):
+                # sample from replay buffer
+                sampled_tensordict = replay_buffer.sample(batch_size).clone()
+
+                # Compute loss
+                loss_dict = loss_module(sampled_tensordict)
+
+                # optimize
+                loss_val = sum(
+                    value for key, value in loss_dict.items() if key.startswith("loss")
+                )
+                loss_val.backward()
+                optimizer.step()
+                optimizer.zero_grad()
+
+                # update priority
+                if prb:
+                    replay_buffer.update_tensordict_priority(sampled_tensordict)
+                # update target network
+                target_net_updater.step()
+
+        rewards.append(
+            (
+                i,
+                tensordict["next", "reward"].mean().item()
+                / norm_factor_training
+                / frame_skip,
             )
-            loss_val.backward()
-            optimizer.step()
-            optimizer.zero_grad()
-
-            # update priority
-            if prb:
-                replay_buffer.update_tensordict_priority(sampled_tensordict)
-            # update target network
-            target_net_updater.step()
-
-    rewards.append(
-        (
-            i,
-            tensordict["next", "reward"].mean().item()
-            / norm_factor_training
-            / frame_skip,
         )
-    )
-    td_record = recorder(None)
-    if td_record is not None:
-        rewards_eval.append((i, td_record["r_evaluation"].item()))
-    if len(rewards_eval):
-        pbar.set_description(
-            f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}"
-        )
-
-    # update the exploration strategy
-    actor_model_explore.step(current_frames)
-    if collected_frames >= init_random_frames:
-        scheduler.step()
-
-collector.shutdown()
-del collector
+        td_record = recorder(None)
+        if td_record is not None:
+            rewards_eval.append((i, td_record["r_evaluation"].item()))
+        if len(rewards_eval):
+            pbar.set_description(
+                f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}"
+            )
 
-###############################################################################
-# Experiment results
-# ------------------
-#
-# We make a simple plot of the average rewards during training. We can observe
-# that our policy learned quite well to solve the task.
-#
-# **Note**: As already mentioned above, to get a more reasonable performance,
-# use a greater value for ``total_frames`` e.g. 1M.
-
-plt.figure()
-plt.plot(*zip(*rewards), label="training")
-plt.plot(*zip(*rewards_eval), label="eval")
-plt.legend()
-plt.xlabel("iter")
-plt.ylabel("reward")
-plt.tight_layout()
-
-###############################################################################
-# Conclusion
-# ----------
-#
-# In this tutorial, we have learnt how to code a loss module in TorchRL given
-# the concrete example of DDPG.
-#
-# The key takeaways are:
-#
-# - How to use the :class:`torchrl.objectives.LossModule` class to register components;
-# - How to use (or not) a target network, and how to update its parameters;
-# - How to create an optimizer associated with a loss module.
+        # update the exploration strategy
+        actor_model_explore.step(current_frames)
+        if collected_frames >= init_random_frames:
+            scheduler.step()
+
+    collector.shutdown()
+    del collector
+
+    ###############################################################################
+    # Experiment results
+    # ------------------
+    #
+    # We make a simple plot of the average rewards during training. We can observe
+    # that our policy learned quite well to solve the task.
+    #
+    # **Note**: As already mentioned above, to get a more reasonable performance,
+    # use a greater value for ``total_frames`` e.g. 1M.
+
+    plt.figure()
+    plt.plot(*zip(*rewards), label="training")
+    plt.plot(*zip(*rewards_eval), label="eval")
+    plt.legend()
+    plt.xlabel("iter")
+    plt.ylabel("reward")
+    plt.tight_layout()
+
+    ###############################################################################
+    # Conclusion
+    # ----------
+    #
+    # In this tutorial, we have learnt how to code a loss module in TorchRL given
+    # the concrete example of DDPG.
+    #
+    # The key takeaways are:
+    #
+    # - How to use the :class:`torchrl.objectives.LossModule` class to register components;
+    # - How to use (or not) a target network, and how to update its parameters;
+    # - How to create an optimizer associated with a loss module.
 #

From aca694621c0751888b51abcc7f3df1d9873f82da Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:51:16 +0100
Subject: [PATCH 52/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 81542a4c1d0..362f3b4467a 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -818,8 +818,7 @@ def make_replay_buffer(buffer_size, prefetch=3):
     )
 
     # Number of environments in each data collector
-    warnings.warn("More envs!")
-    env_per_collector = 1
+    env_per_collector = 2
 
     # Total frames we will use during training. Scale up to 500K - 1M for a more
     # meaningful training

From 206830ab86c52bb392ceda4168934f9d39f8e301 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:51:25 +0100
Subject: [PATCH 53/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 362f3b4467a..94e5acceacd 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -987,8 +987,6 @@ def make_replay_buffer(buffer_size, prefetch=3):
     else:
         multistep = None
 
-    warnings.warn("Change collector!!")
-
     collector = MultiaSyncDataCollector(
         create_env_fn=[create_env_fn, create_env_fn],
         policy=actor_model_explore,

From 3b4e0e786df226043cad35b2c3b109f8492ba4d2 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 14:55:02 +0100
Subject: [PATCH 54/89] tmp

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 2054 +++++++++++----------
 1 file changed, 1030 insertions(+), 1024 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 94e5acceacd..dab9d740216 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -60,1086 +60,1092 @@
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
 
-if __name__ == "__main__":
-    import numpy as np
-    import torch.cuda
-    import tqdm
-    from matplotlib import pyplot as plt
-    from tensordict.nn import TensorDictModule
-    from tensordict.tensordict import TensorDict, TensorDictBase
-    from torch import nn, optim
-    from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
-    from torchrl.data import CompositeSpec, TensorDictReplayBuffer
-    from torchrl.data.postprocs import MultiStep
-    from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
-    from torchrl.data.replay_buffers.storages import LazyMemmapStorage
-    from torchrl.envs import (
-        CatTensors,
-        DoubleToFloat,
-        EnvCreator,
-        ObservationNorm,
-        ParallelEnv,
+import numpy as np
+import torch.cuda
+import tqdm
+from matplotlib import pyplot as plt
+from tensordict.nn import TensorDictModule
+from tensordict.tensordict import TensorDict, TensorDictBase
+from torch import nn, optim
+from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
+from torchrl.data import CompositeSpec, TensorDictReplayBuffer
+from torchrl.data.postprocs import MultiStep
+from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
+from torchrl.data.replay_buffers.storages import LazyMemmapStorage
+from torchrl.envs import (
+    CatTensors,
+    DoubleToFloat,
+    EnvCreator,
+    ObservationNorm,
+    ParallelEnv,
+)
+from torchrl.envs.libs.dm_control import DMControlEnv
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.envs.transforms import RewardScaling, TransformedEnv
+from torchrl.envs.utils import set_exploration_mode
+from torchrl.modules import (
+    Actor,
+    ActorCriticWrapper,
+    MLP,
+    OrnsteinUhlenbeckProcessWrapper,
+    ValueOperator,
+)
+from torchrl.objectives.utils import distance_loss, SoftUpdate
+from torchrl.trainers import Recorder
+
+###############################################################################
+# torchrl :class:`torchrl.objectives.LossModule`
+# ----------------------------------------------
+#
+# TorchRL provides a series of losses to use in your training scripts.
+# The aim is to have losses that are easily reusable/swappable and that have
+# a simple signature.
+#
+# The main characteristics of TorchRL losses are:
+#
+# - they are stateful objects: they contain a copy of the trainable parameters
+#   such that ``loss_module.parameters()`` gives whatever is needed to train the
+#   algorithm.
+# - They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
+#   method will receive a tensordict as input that contains all the necessary
+#   information to return a loss value.
+#
+#       >>> data = replay_buffer.sample()
+#       >>> loss_dict = loss_module(data)
+#
+# - They output a :class:`tensordict.TensorDict` instance with the loss values
+#   written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
+#   loss. Additional keys in the tensordict may be useful metrics to log during
+#   training time.
+#   .. note::
+#     The reason we return independent losses is to let the user use a different
+#     optimizer for different sets of parameters for instance. Summing the losses
+#     can be simply done via
+#
+#       >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
+#
+# The ``__init__`` method
+# ~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The parent class of all losses is :class:`torchrl.objectives.LossModule`.
+# As many other components of the library, its :meth:`torchrl.objectives.LossModule.forward` method expects
+# as input a :class:`tensordict.TensorDict` instance sampled from an experience
+# replay buffer, or any similar data structure. Using this format makes it
+# possible to re-use the module across
+# modalities, or in complex settings where the model needs to read multiple
+# entries for instance. In other words, it allows us to code a loss module that
+# is oblivious to the data type that is being given to is and that focuses on
+# running the elementary steps of the loss function and only those.
+#
+# To keep the tutorial as didactic as we can, we'll be displaying each method
+# of the class independently and we'll be populating the class at a later
+# stage.
+#
+# Let us start with the :meth:`torchrl.objectives.LossModule.__init__`
+# method. DDPG aims at solving a control task with a simple strategy:
+# training a policy to output actions that maximise the value predicted by
+# a value network. Hence, our loss module needs to receive two networks in its
+# constructor: an actor and a value networks. We expect both of these to be
+# tensordict-compatible objects, such as
+# :class:`tensordict.nn.TensorDictModule`.
+# Our loss function will need to compute a target value and fit the value
+# network to this, and generate an action and fit the policy such that its
+# value estimate is maximised.
+#
+# The crucial step of the :meth:`LossModule.__init__` method is the call to
+# :meth:`torchrl.LossModule.convert_to_functional`. This method will extract
+# the parameters from the module and convert it to a functional module.
+# Strictly speaking, this is not necessary and one may perfectly code all
+# the losses without it. However, we encourage its usage for the following
+# reason.
+#
+# The reason TorchRL does this is that RL algorithms often execute the same
+# model with different sets of parameters, called "trainable" and "target"
+# parameters.
+# The "trainable" parameters are those that the optimizer needs to fit. The
+# "target" parameters are usually a copy of the formers with some time lag
+# (absolute or diluted through a moving average).
+# These target parameters are used to compute the value associated with the
+# next observation. One the advantages of using a set of target parameters
+# for the value model that do not match exactly the current configuration is
+# that they provide a pessimistic bound on the value function being computed.
+# Pay attention to the ``create_target_params`` keyword argument below: this
+# argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
+# method to create a set of target parameters in the loss module to be used
+# for target value computation. If this is set to ``False`` (see the actor network
+# for instance) the ``target_actor_network_params`` attribute will still be
+# accessible but this will just return a **detached** version of the
+# actor parameters.
+#
+# Later, we will see how the target parameters should be updated in torchrl.
+#
+
+
+def _init(
+    self,
+    actor_network: TensorDictModule,
+    value_network: TensorDictModule,
+) -> None:
+    super(type(self), self).__init__()
+
+    self.convert_to_functional(
+        actor_network,
+        "actor_network",
+        create_target_params=False,
     )
-    from torchrl.envs.libs.dm_control import DMControlEnv
-    from torchrl.envs.libs.gym import GymEnv
-    from torchrl.envs.transforms import RewardScaling, TransformedEnv
-    from torchrl.envs.utils import set_exploration_mode
-    from torchrl.modules import (
-        Actor,
-        ActorCriticWrapper,
-        MLP,
-        OrnsteinUhlenbeckProcessWrapper,
-        ValueOperator,
+    self.convert_to_functional(
+        value_network,
+        "value_network",
+        create_target_params=True,
+        compare_against=list(actor_network.parameters()),
     )
-    from torchrl.objectives.utils import distance_loss, SoftUpdate
-    from torchrl.trainers import Recorder
-
-    ###############################################################################
-    # torchrl :class:`torchrl.objectives.LossModule`
-    # ----------------------------------------------
-    #
-    # TorchRL provides a series of losses to use in your training scripts.
-    # The aim is to have losses that are easily reusable/swappable and that have
-    # a simple signature.
-    #
-    # The main characteristics of TorchRL losses are:
-    #
-    # - they are stateful objects: they contain a copy of the trainable parameters
-    #   such that ``loss_module.parameters()`` gives whatever is needed to train the
-    #   algorithm.
-    # - They follow the ``tensordict`` convention: the :meth:`torch.nn.Module.forward`
-    #   method will receive a tensordict as input that contains all the necessary
-    #   information to return a loss value.
-    #
-    #       >>> data = replay_buffer.sample()
-    #       >>> loss_dict = loss_module(data)
-    #
-    # - They output a :class:`tensordict.TensorDict` instance with the loss values
-    #   written under a ``"loss_<smth>"`` where ``smth`` is a string describing the
-    #   loss. Additional keys in the tensordict may be useful metrics to log during
-    #   training time.
-    #   .. note::
-    #     The reason we return independent losses is to let the user use a different
-    #     optimizer for different sets of parameters for instance. Summing the losses
-    #     can be simply done via
-    #
-    #       >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
-    #
-    # The ``__init__`` method
-    # ~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # The parent class of all losses is :class:`torchrl.objectives.LossModule`.
-    # As many other components of the library, its :meth:`torchrl.objectives.LossModule.forward` method expects
-    # as input a :class:`tensordict.TensorDict` instance sampled from an experience
-    # replay buffer, or any similar data structure. Using this format makes it
-    # possible to re-use the module across
-    # modalities, or in complex settings where the model needs to read multiple
-    # entries for instance. In other words, it allows us to code a loss module that
-    # is oblivious to the data type that is being given to is and that focuses on
-    # running the elementary steps of the loss function and only those.
-    #
-    # To keep the tutorial as didactic as we can, we'll be displaying each method
-    # of the class independently and we'll be populating the class at a later
-    # stage.
-    #
-    # Let us start with the :meth:`torchrl.objectives.LossModule.__init__`
-    # method. DDPG aims at solving a control task with a simple strategy:
-    # training a policy to output actions that maximise the value predicted by
-    # a value network. Hence, our loss module needs to receive two networks in its
-    # constructor: an actor and a value networks. We expect both of these to be
-    # tensordict-compatible objects, such as
-    # :class:`tensordict.nn.TensorDictModule`.
-    # Our loss function will need to compute a target value and fit the value
-    # network to this, and generate an action and fit the policy such that its
-    # value estimate is maximised.
-    #
-    # The crucial step of the :meth:`LossModule.__init__` method is the call to
-    # :meth:`torchrl.LossModule.convert_to_functional`. This method will extract
-    # the parameters from the module and convert it to a functional module.
-    # Strictly speaking, this is not necessary and one may perfectly code all
-    # the losses without it. However, we encourage its usage for the following
-    # reason.
-    #
-    # The reason TorchRL does this is that RL algorithms often execute the same
-    # model with different sets of parameters, called "trainable" and "target"
-    # parameters.
-    # The "trainable" parameters are those that the optimizer needs to fit. The
-    # "target" parameters are usually a copy of the formers with some time lag
-    # (absolute or diluted through a moving average).
-    # These target parameters are used to compute the value associated with the
-    # next observation. One the advantages of using a set of target parameters
-    # for the value model that do not match exactly the current configuration is
-    # that they provide a pessimistic bound on the value function being computed.
-    # Pay attention to the ``create_target_params`` keyword argument below: this
-    # argument tells the :meth:`torchrl.objectives.LossModule.convert_to_functional`
-    # method to create a set of target parameters in the loss module to be used
-    # for target value computation. If this is set to ``False`` (see the actor network
-    # for instance) the ``target_actor_network_params`` attribute will still be
-    # accessible but this will just return a **detached** version of the
-    # actor parameters.
-    #
-    # Later, we will see how the target parameters should be updated in torchrl.
-    #
-
-
-    def _init(
-        self,
-        actor_network: TensorDictModule,
-        value_network: TensorDictModule,
-    ) -> None:
-        super(type(self), self).__init__()
-
-        self.convert_to_functional(
-            actor_network,
-            "actor_network",
-            create_target_params=False,
+
+    self.actor_in_keys = actor_network.in_keys
+
+    # Since the value we'll be using is based on the actor and value network,
+    # we put them together in a single actor-critic container.
+    actor_critic = ActorCriticWrapper(actor_network, value_network)
+    self.actor_critic = actor_critic
+    self.loss_funtion = "l2"
+
+
+###############################################################################
+# The value estimator loss method
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In many RL algorithm, the value network (or Q-value network) is trained based
+# on an empirical value estimate. This can be bootstrapped (TD(0), low
+# variance, high bias), meaning
+# that the target value is obtained using the next reward and nothing else, or
+# a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
+# sequence of upcoming rewards will be used (high variance, low bias). An
+# intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
+# bias and variance.
+# TorchRL makes it easy to use one or the other estimator via the
+# :class:`torchrl.objectives.utils.ValueEstimators` Enum class, which contains
+# pointers to all the value estimators implemented. Let us define the default
+# value function here. We will take the simplest version (TD(0)), and show later
+# on how this can be changed.
+
+from torchrl.objectives.utils import ValueEstimators
+
+default_value_estimator = ValueEstimators.TD0
+
+###############################################################################
+# We also need to give some instructions to DDPG on how to build the value
+# estimator, depending on the user query. Depending on the estimator provided,
+# we will build the corresponding module to be used at train time:
+
+from torchrl.objectives.utils import default_value_kwargs
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
+
+
+def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
+    hp = dict(default_value_kwargs(value_type))
+    if hasattr(self, "gamma"):
+        hp["gamma"] = self.gamma
+    hp.update(hyperparams)
+    value_key = "state_action_value"
+    if value_type == ValueEstimators.TD1:
+        self._value_estimator = TD1Estimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
+        )
+    elif value_type == ValueEstimators.TD0:
+        self._value_estimator = TD0Estimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
+        )
+    elif value_type == ValueEstimators.GAE:
+        raise NotImplementedError(
+            f"Value type {value_type} it not implemented for loss {type(self)}."
         )
-        self.convert_to_functional(
-            value_network,
-            "value_network",
-            create_target_params=True,
-            compare_against=list(actor_network.parameters()),
+    elif value_type == ValueEstimators.TDLambda:
+        self._value_estimator = TDLambdaEstimator(
+            value_network=self.actor_critic, value_key=value_key, **hp
         )
+    else:
+        raise NotImplementedError(f"Unknown value type {value_type}")
 
-        self.actor_in_keys = actor_network.in_keys
-
-        # Since the value we'll be using is based on the actor and value network,
-        # we put them together in a single actor-critic container.
-        actor_critic = ActorCriticWrapper(actor_network, value_network)
-        self.actor_critic = actor_critic
-        self.loss_funtion = "l2"
-
-
-    ###############################################################################
-    # The value estimator loss method
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # In many RL algorithm, the value network (or Q-value network) is trained based
-    # on an empirical value estimate. This can be bootstrapped (TD(0), low
-    # variance, high bias), meaning
-    # that the target value is obtained using the next reward and nothing else, or
-    # a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
-    # sequence of upcoming rewards will be used (high variance, low bias). An
-    # intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
-    # bias and variance.
-    # TorchRL makes it easy to use one or the other estimator via the
-    # :class:`torchrl.objectives.utils.ValueEstimators` Enum class, which contains
-    # pointers to all the value estimators implemented. Let us define the default
-    # value function here. We will take the simplest version (TD(0)), and show later
-    # on how this can be changed.
-
-    from torchrl.objectives.utils import ValueEstimators
-
-    default_value_estimator = ValueEstimators.TD0
-
-    ###############################################################################
-    # We also need to give some instructions to DDPG on how to build the value
-    # estimator, depending on the user query. Depending on the estimator provided,
-    # we will build the corresponding module to be used at train time:
-
-    from torchrl.objectives.utils import default_value_kwargs
-    from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
-
-
-    def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
-        hp = dict(default_value_kwargs(value_type))
-        if hasattr(self, "gamma"):
-            hp["gamma"] = self.gamma
-        hp.update(hyperparams)
-        value_key = "state_action_value"
-        if value_type == ValueEstimators.TD1:
-            self._value_estimator = TD1Estimator(
-                value_network=self.actor_critic, value_key=value_key, **hp
-            )
-        elif value_type == ValueEstimators.TD0:
-            self._value_estimator = TD0Estimator(
-                value_network=self.actor_critic, value_key=value_key, **hp
-            )
-        elif value_type == ValueEstimators.GAE:
-            raise NotImplementedError(
-                f"Value type {value_type} it not implemented for loss {type(self)}."
-            )
-        elif value_type == ValueEstimators.TDLambda:
-            self._value_estimator = TDLambdaEstimator(
-                value_network=self.actor_critic, value_key=value_key, **hp
-            )
-        else:
-            raise NotImplementedError(f"Unknown value type {value_type}")
-
-
-    ###############################################################################
-    # The ``make_value_estimator`` method can but does not need to be called: if
-    # not, the :class:`torchrl.objectives.LossModule` will query this method with
-    # its default estimator.
-    #
-    # The actor loss method
-    # ~~~~~~~~~~~~~~~~~~~~~
-    #
-    # The central piece of an RL algorithm is the training loss for the actor.
-    # In the case of DDPG, this function is quite simple: we just need to compute
-    # the value associated with an action computed using the policy and optimize
-    # the actor weights to maximise this value.
-    #
-    # When computing this value, we must make sure to take the value parameters out
-    # of the graph, otherwise the actor and value loss will be mixed up.
-    # For this, the :func:`torchrl.objectives.utils.hold_out_params` function
-    # can be used.
-
-    from torchrl.objectives.utils import hold_out_params
-
-
-    def _loss_actor(
-        self,
-        tensordict,
-    ) -> torch.Tensor:
-        td_copy = tensordict.select(*self.actor_in_keys).detach()
-        # Get an action from the actor network
-        td_copy = self.actor_network(
+
+###############################################################################
+# The ``make_value_estimator`` method can but does not need to be called: if
+# not, the :class:`torchrl.objectives.LossModule` will query this method with
+# its default estimator.
+#
+# The actor loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# The central piece of an RL algorithm is the training loss for the actor.
+# In the case of DDPG, this function is quite simple: we just need to compute
+# the value associated with an action computed using the policy and optimize
+# the actor weights to maximise this value.
+#
+# When computing this value, we must make sure to take the value parameters out
+# of the graph, otherwise the actor and value loss will be mixed up.
+# For this, the :func:`torchrl.objectives.utils.hold_out_params` function
+# can be used.
+
+from torchrl.objectives.utils import hold_out_params
+
+
+def _loss_actor(
+    self,
+    tensordict,
+) -> torch.Tensor:
+    td_copy = tensordict.select(*self.actor_in_keys).detach()
+    # Get an action from the actor network
+    td_copy = self.actor_network(
+        td_copy,
+        params=self.actor_network_params,
+    )
+    # get the value associated with that action
+    with hold_out_params(self.value_network_params) as params:
+        td_copy = self.value_network(
             td_copy,
-            params=self.actor_network_params,
+            params=params,
         )
-        # get the value associated with that action
-        with hold_out_params(self.value_network_params) as params:
-            td_copy = self.value_network(
-                td_copy,
-                params=params,
-            )
-        return -td_copy.get("state_action_value")
-
-
-    ###############################################################################
-    # The value loss method
-    # ~~~~~~~~~~~~~~~~~~~~~
-    #
-    # We now need to optimize our value network parameters.
-    # To do this, we will rely on the value estimator of our class:
-    #
-
-
-    def _loss_value(
-        self,
-        tensordict,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        td_copy = tensordict.detach()
-
-        # V(s, a)
-        self.value_network(td_copy, params=self.value_network_params)
-        pred_val = td_copy.get("state_action_value").squeeze(-1)
-
-        # we manually reconstruct the parameters of the actor-critic, where the first
-        # set of parameters belongs to the actor and the second to the value function.
-        target_params = TensorDict(
-            {
-                "module": {
-                    "0": self.target_actor_network_params,
-                    "1": self.target_value_network_params,
-                }
-            },
-            batch_size=self.target_actor_network_params.batch_size,
-            device=self.target_actor_network_params.device,
-        )
-        with set_exploration_mode("mode"):  # we make sure that no exploration is performed
-            target_value = self.value_estimator.value_estimate(
-                tensordict, target_params=target_params
-            ).squeeze(-1)
+    return -td_copy.get("state_action_value")
 
-        # td_error = pred_val - target_value
-        loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
-        td_error = (pred_val - target_value).pow(2)
 
-        return loss_value, td_error, pred_val, target_value
+###############################################################################
+# The value loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# We now need to optimize our value network parameters.
+# To do this, we will rely on the value estimator of our class:
+#
 
 
-    ###############################################################################
-    # Putting things together in a forward call
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # The only missing piece is the forward method, which will glue together the
-    # value and actor loss, collect the cost values and write them in a tensordict
-    # delivered to the user.
+def _loss_value(
+    self,
+    tensordict,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    td_copy = tensordict.detach()
+
+    # V(s, a)
+    self.value_network(td_copy, params=self.value_network_params)
+    pred_val = td_copy.get("state_action_value").squeeze(-1)
+
+    # we manually reconstruct the parameters of the actor-critic, where the first
+    # set of parameters belongs to the actor and the second to the value function.
+    target_params = TensorDict(
+        {
+            "module": {
+                "0": self.target_actor_network_params,
+                "1": self.target_value_network_params,
+            }
+        },
+        batch_size=self.target_actor_network_params.batch_size,
+        device=self.target_actor_network_params.device,
+    )
+    with set_exploration_mode("mode"):  # we make sure that no exploration is performed
+        target_value = self.value_estimator.value_estimate(
+            tensordict, target_params=target_params
+        ).squeeze(-1)
 
+    # td_error = pred_val - target_value
+    loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
+    td_error = (pred_val - target_value).pow(2)
 
-    def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
-        if not input_tensordict.device == self.device:
-            raise RuntimeError(
-                f"Got device={input_tensordict.device} but "
-                f"actor_network.device={self.device} (self.device={self.device})"
-            )
+    return loss_value, td_error, pred_val, target_value
 
-        loss_value, td_error, pred_val, target_value = self.loss_value(
-            input_tensordict,
-        )
-        td_error = td_error.detach()
-        td_error = td_error.unsqueeze(input_tensordict.ndimension())
-        if input_tensordict.device is not None:
-            td_error = td_error.to(input_tensordict.device)
-        input_tensordict.set(
-            "td_error",
-            td_error,
-            inplace=True,
-        )
-        loss_actor = self.loss_actor(input_tensordict)
-        return TensorDict(
-            source={
-                "loss_actor": loss_actor.mean(),
-                "loss_value": loss_value.mean(),
-                "pred_value": pred_val.mean().detach(),
-                "target_value": target_value.mean().detach(),
-                "pred_value_max": pred_val.max().detach(),
-                "target_value_max": target_value.max().detach(),
-            },
-            batch_size=[],
-        )
 
+###############################################################################
+# Putting things together in a forward call
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The only missing piece is the forward method, which will glue together the
+# value and actor loss, collect the cost values and write them in a tensordict
+# delivered to the user.
 
-    class DDPGLoss(LossModule):
-        default_value_estimator = default_value_estimator
-        make_value_estimator = make_value_estimator
-
-        __init__ = _init
-        forward = _forward
-        loss_value = _loss_value
-        loss_actor = _loss_actor
-
-
-    ###############################################################################
-    # Now that we have our loss, we can use it to train a policy to solve a
-    # control task.
-    #
-    # Environment
-    # -----------
-    #
-    # In most algorithms, the first thing that needs to be taken care of is the
-    # construction of the environment as it conditions the remainder of the
-    # training script.
-    #
-    # For this example, we will be using the ``"cheetah"`` task. The goal is to make
-    # a half-cheetah run as fast as possible.
-    #
-    # In TorchRL, one can create such a task by relying on dm_control or gym:
-    #
-    # .. code-block:: python
-    #
-    #    env = GymEnv("HalfCheetah-v4")
-    #
-    # or
-    #
-    # .. code-block:: python
-    #
-    #    env = DMControlEnv("cheetah", "run")
-    #
-    # By default, these environment disable rendering. Training from states is
-    # usually easier than training from images. To keep things simple, we focus
-    # on learning from states only. To pass the pixels to the tensordicts that
-    # are collected by :func:`env.step()`, simply pass the ``from_pixels=True``
-    # argument to the constructor:
-    #
-    # .. code-block:: python
-    #
-    #    env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
-    #
-    # We write a :func:`make_env` helper function that will create an environment
-    # with either one of the two backends considered above (dm-control or gym).
-    #
-
-    env_library = None
-    env_name = None
-
-
-    def make_env():
-        """Create a base env."""
-        global env_library
-        global env_name
-
-        if backend == "dm_control":
-            env_name = "cheetah"
-            env_task = "run"
-            env_args = (env_name, env_task)
-            env_library = DMControlEnv
-        elif backend == "gym":
-            env_name = "HalfCheetah-v4"
-            env_args = (env_name,)
-            env_library = GymEnv
-        else:
-            raise NotImplementedError
-
-        env_kwargs = {
-            "device": device,
-            "frame_skip": frame_skip,
-            "from_pixels": from_pixels,
-            "pixels_only": from_pixels,
-        }
-        env = env_library(*env_args, **env_kwargs)
-        return env
-
-
-    ###############################################################################
-    # Transforms
-    # ~~~~~~~~~~
-    #
-    # Now that we have a base environment, we may want to modify its representation
-    # to make it more policy-friendly. In TorchRL, transforms are appended to the
-    # base environment in a specialized :class:`torchr.envs.TransformedEnv` class.
-    #
-    # - It is common in DDPG to rescale the reward using some heuristic value. We
-    #   will multiply the reward by 5 in this example.
-    #
-    # - If we are using :mod:`dm_control`, it is also important to build an interface
-    #   between the simulator which works with double precision numbers, and our
-    #   script which presumably uses single precision ones. This transformation goes
-    #   both ways: when calling :func:`env.step`, our actions will need to be
-    #   represented in double precision, and the output will need to be transformed
-    #   to single precision.
-    #   The :class:`torchrl.envs.DoubleToFloat` transform does exactly this: the
-    #   ``in_keys`` list refers to the keys that will need to be transformed from
-    #   double to float, while the ``in_keys_inv`` refers to those that need to
-    #   be transformed to double before being passed to the environment.
-    #
-    # - We concatenate the state keys together using the :class:`torchrl.envs.CatTensors`
-    #   transform.
-    #
-    # - Finally, we also leave the possibility of normalizing the states: we will
-    #   take care of computing the normalizing constants later on.
-    #
-
-
-    def make_transformed_env(
-        env,
-    ):
-        """Apply transforms to the env (such as reward scaling and state normalization)."""
-
-        env = TransformedEnv(env)
-
-        # we append transforms one by one, although we might as well create the
-        # transformed environment using the `env = TransformedEnv(base_env, transforms)`
-        # syntax.
-        env.append_transform(RewardScaling(loc=0.0, scale=reward_scaling))
-
-        double_to_float_list = []
-        double_to_float_inv_list = []
-        if env_library is DMControlEnv:
-            # DMControl requires double-precision
-            double_to_float_list += [
-                "reward",
-                "action",
-            ]
-            double_to_float_inv_list += ["action"]
-
-        # We concatenate all states into a single "observation_vector"
-        # even if there is a single tensor, it'll be renamed in "observation_vector".
-        # This facilitates the downstream operations as we know the name of the
-        # output tensor.
-        # In some environments (not half-cheetah), there may be more than one
-        # observation vector: in this case this code snippet will concatenate them
-        # all.
-        selected_keys = list(env.observation_spec.keys())
-        out_key = "observation_vector"
-        env.append_transform(CatTensors(in_keys=selected_keys, out_key=out_key))
-
-        # we normalize the states, but for now let's just instantiate a stateless
-        # version of the transform
-        env.append_transform(ObservationNorm(in_keys=[out_key], standard_normal=True))
-
-        double_to_float_list.append(out_key)
-        env.append_transform(
-            DoubleToFloat(
-                in_keys=double_to_float_list, in_keys_inv=double_to_float_inv_list
-            )
-        )
 
-        return env
-
-
-    ###############################################################################
-    # Normalization of the observations
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # To compute the normalizing statistics, we run an arbitrary number of random
-    # steps in the environment and compute the mean and standard deviation of the
-    # collected observations. The :func:`ObservationNorm.init_stats()` method can
-    # be used for this purpose. To get the summary statistics, we create a dummy
-    # environment and run it for a given number of steps, collect data over a given
-    # number of steps and compute its summary statistics.
-    #
-
-
-    def get_env_stats():
-        """Gets the stats of an environment."""
-        proof_env = make_transformed_env(make_env())
-        proof_env.set_seed(seed)
-        t = proof_env.transform[2]
-        t.init_stats(init_env_steps)
-        transform_state_dict = t.state_dict()
-        proof_env.close()
-        return transform_state_dict
-
-
-    ###############################################################################
-    # Parallel execution
-    # ~~~~~~~~~~~~~~~~~~
-    #
-    # The following helper function allows us to run environments in parallel.
-    # Running environments in parallel can significantly speed up the collection
-    # throughput. When using transformed environment, we need to choose whether we
-    # want to execute the transform individually for each environment, or
-    # centralize the data and transform it in batch. Both approaches are easy to
-    # code:
-    #
-    # .. code-block:: python
-    #
-    #    env = ParallelEnv(
-    #        lambda: TransformedEnv(GymEnv("HalfCheetah-v4"), transforms),
-    #        num_workers=4
-    #    )
-    #    env = TransformedEnv(
-    #        ParallelEnv(lambda: GymEnv("HalfCheetah-v4"), num_workers=4),
-    #        transforms
-    #    )
-    #
-    # To leverage the vectorization capabilities of PyTorch, we adopt
-    # the first method:
-    #
-
-
-    def parallel_env_constructor(
-        transform_state_dict,
-    ):
-        if env_per_collector == 1:
-
-            def make_t_env():
-                env = make_transformed_env(make_env())
-                env.transform[2].init_stats(3)
-                env.transform[2].loc.copy_(transform_state_dict["loc"])
-                env.transform[2].scale.copy_(transform_state_dict["scale"])
-                return env
-
-            env_creator = EnvCreator(make_t_env)
-            return env_creator
-
-        parallel_env = ParallelEnv(
-            num_workers=env_per_collector,
-            create_env_fn=EnvCreator(lambda: make_env()),
-            create_env_kwargs=None,
-            pin_memory=False,
+def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
+    if not input_tensordict.device == self.device:
+        raise RuntimeError(
+            f"Got device={input_tensordict.device} but "
+            f"actor_network.device={self.device} (self.device={self.device})"
         )
-        env = make_transformed_env(parallel_env)
-        # we call `init_stats` for a limited number of steps, just to instantiate
-        # the lazy buffers.
-        env.transform[2].init_stats(3, cat_dim=1, reduce_dim=[0, 1])
-        env.transform[2].load_state_dict(transform_state_dict)
-        return env
-
-
-    ###############################################################################
-    # Building the model
-    # ------------------
-    #
-    # We now turn to the setup of the model. As we have seen, DDPG requires a
-    # value network, trained to estimate the value of a state-action pair, and a
-    # parametric actor that learns how to select actions that maximize this value.
-    #
-    # Recall that building a TorchRL module requires two steps:
-    #
-    # - writing the :class:`torch.nn.Module` that will be used as network,
-    # - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
-    #   data flow is handled by specifying the input and output keys.
-    #
-    # In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
-    # also be used.
-    #
-    #
-    # The Q-Value network is wrapped in a :class:`torchrl.modules.ValueOperator`
-    # that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
-    # networks and ``state_value`` for other value networks.
-    #
-    # Since we use lazy modules, it is necessary to materialize the lazy modules
-    # before being able to move the policy from device to device and achieve other
-    # operations. Hence, it is good practice to run the modules with a small
-    # sample of data. For this purpose, we generate fake data from the
-    # environment specs.
-    #
-
-
-    def make_ddpg_actor(
-        transform_state_dict,
-        device="cpu",
-    ):
-        proof_environment = make_transformed_env(make_env())
-        proof_environment.transform[2].init_stats(3)
-        proof_environment.transform[2].load_state_dict(transform_state_dict)
-
-        env_specs = proof_environment.specs
-        in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
-            -1
+
+    loss_value, td_error, pred_val, target_value = self.loss_value(
+        input_tensordict,
+    )
+    td_error = td_error.detach()
+    td_error = td_error.unsqueeze(input_tensordict.ndimension())
+    if input_tensordict.device is not None:
+        td_error = td_error.to(input_tensordict.device)
+    input_tensordict.set(
+        "td_error",
+        td_error,
+        inplace=True,
+    )
+    loss_actor = self.loss_actor(input_tensordict)
+    return TensorDict(
+        source={
+            "loss_actor": loss_actor.mean(),
+            "loss_value": loss_value.mean(),
+            "pred_value": pred_val.mean().detach(),
+            "target_value": target_value.mean().detach(),
+            "pred_value_max": pred_val.max().detach(),
+            "target_value_max": target_value.max().detach(),
+        },
+        batch_size=[],
+    )
+
+
+class DDPGLoss(LossModule):
+    default_value_estimator = default_value_estimator
+    make_value_estimator = make_value_estimator
+
+    __init__ = _init
+    forward = _forward
+    loss_value = _loss_value
+    loss_actor = _loss_actor
+
+
+###############################################################################
+# Now that we have our loss, we can use it to train a policy to solve a
+# control task.
+#
+# Environment
+# -----------
+#
+# In most algorithms, the first thing that needs to be taken care of is the
+# construction of the environment as it conditions the remainder of the
+# training script.
+#
+# For this example, we will be using the ``"cheetah"`` task. The goal is to make
+# a half-cheetah run as fast as possible.
+#
+# In TorchRL, one can create such a task by relying on dm_control or gym:
+#
+# .. code-block:: python
+#
+#    env = GymEnv("HalfCheetah-v4")
+#
+# or
+#
+# .. code-block:: python
+#
+#    env = DMControlEnv("cheetah", "run")
+#
+# By default, these environment disable rendering. Training from states is
+# usually easier than training from images. To keep things simple, we focus
+# on learning from states only. To pass the pixels to the tensordicts that
+# are collected by :func:`env.step()`, simply pass the ``from_pixels=True``
+# argument to the constructor:
+#
+# .. code-block:: python
+#
+#    env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
+#
+# We write a :func:`make_env` helper function that will create an environment
+# with either one of the two backends considered above (dm-control or gym).
+#
+
+env_library = None
+env_name = None
+
+
+def make_env():
+    """Create a base env."""
+    global env_library
+    global env_name
+
+    if backend == "dm_control":
+        env_name = "cheetah"
+        env_task = "run"
+        env_args = (env_name, env_task)
+        env_library = DMControlEnv
+    elif backend == "gym":
+        env_name = "HalfCheetah-v4"
+        env_args = (env_name,)
+        env_library = GymEnv
+    else:
+        raise NotImplementedError
+
+    env_kwargs = {
+        "device": device,
+        "frame_skip": frame_skip,
+        "from_pixels": from_pixels,
+        "pixels_only": from_pixels,
+    }
+    env = env_library(*env_args, **env_kwargs)
+    return env
+
+
+###############################################################################
+# Transforms
+# ~~~~~~~~~~
+#
+# Now that we have a base environment, we may want to modify its representation
+# to make it more policy-friendly. In TorchRL, transforms are appended to the
+# base environment in a specialized :class:`torchr.envs.TransformedEnv` class.
+#
+# - It is common in DDPG to rescale the reward using some heuristic value. We
+#   will multiply the reward by 5 in this example.
+#
+# - If we are using :mod:`dm_control`, it is also important to build an interface
+#   between the simulator which works with double precision numbers, and our
+#   script which presumably uses single precision ones. This transformation goes
+#   both ways: when calling :func:`env.step`, our actions will need to be
+#   represented in double precision, and the output will need to be transformed
+#   to single precision.
+#   The :class:`torchrl.envs.DoubleToFloat` transform does exactly this: the
+#   ``in_keys`` list refers to the keys that will need to be transformed from
+#   double to float, while the ``in_keys_inv`` refers to those that need to
+#   be transformed to double before being passed to the environment.
+#
+# - We concatenate the state keys together using the :class:`torchrl.envs.CatTensors`
+#   transform.
+#
+# - Finally, we also leave the possibility of normalizing the states: we will
+#   take care of computing the normalizing constants later on.
+#
+
+
+def make_transformed_env(
+    env,
+):
+    """Apply transforms to the env (such as reward scaling and state normalization)."""
+
+    env = TransformedEnv(env)
+
+    # we append transforms one by one, although we might as well create the
+    # transformed environment using the `env = TransformedEnv(base_env, transforms)`
+    # syntax.
+    env.append_transform(RewardScaling(loc=0.0, scale=reward_scaling))
+
+    double_to_float_list = []
+    double_to_float_inv_list = []
+    if env_library is DMControlEnv:
+        # DMControl requires double-precision
+        double_to_float_list += [
+            "reward",
+            "action",
         ]
-        out_features = env_specs["input_spec"]["action"].shape[-1]
-
-        actor_net = MLP(
-            in_features=in_features,
-            out_features=out_features,
-            num_cells=[num_cells] * num_layers,
-            activation_class=nn.Tanh,
-            activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
-        )
-        in_keys = ["observation_vector"]
-        out_keys = ["action"]
-
-        actor = Actor(
-            actor_net,
-            in_keys=in_keys,
-            out_keys=out_keys,
-            spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
-        ).to(device)
-
-        q_net = MLP(
-            in_features=in_features
-            + out_features,  # receives an action and an observation as input
-            out_features=1,
-            num_cells=[num_cells] * num_layers,
-            activation_class=nn.Tanh,
+        double_to_float_inv_list += ["action"]
+
+    # We concatenate all states into a single "observation_vector"
+    # even if there is a single tensor, it'll be renamed in "observation_vector".
+    # This facilitates the downstream operations as we know the name of the
+    # output tensor.
+    # In some environments (not half-cheetah), there may be more than one
+    # observation vector: in this case this code snippet will concatenate them
+    # all.
+    selected_keys = list(env.observation_spec.keys())
+    out_key = "observation_vector"
+    env.append_transform(CatTensors(in_keys=selected_keys, out_key=out_key))
+
+    # we normalize the states, but for now let's just instantiate a stateless
+    # version of the transform
+    env.append_transform(ObservationNorm(in_keys=[out_key], standard_normal=True))
+
+    double_to_float_list.append(out_key)
+    env.append_transform(
+        DoubleToFloat(
+            in_keys=double_to_float_list, in_keys_inv=double_to_float_inv_list
         )
+    )
+
+    return env
 
-        in_keys = in_keys + ["action"]
-        qnet = ValueOperator(
-            in_keys=in_keys,
-            module=q_net,
-        ).to(device)
-
-        return actor, qnet
-
-
-    ###############################################################################
-    # Evaluator: building your recorder object
-    # ----------------------------------------
-    #
-    # As the training data is obtained using some exploration strategy, the true
-    # performance of our algorithm needs to be assessed in deterministic mode. We
-    # do this using a dedicated class, ``Recorder``, which executes the policy in
-    # the environment at a given frequency and returns some statistics obtained
-    # from these simulations.
-    #
-    # The following helper function builds this object:
-
-
-    def make_recorder(actor_model_explore, transform_state_dict):
-        base_env = make_env()
-        recorder = make_transformed_env(base_env)
-        recorder.transform[2].init_stats(3)
-        recorder.transform[2].load_state_dict(transform_state_dict)
-
-        recorder_obj = Recorder(
-            record_frames=1000,
-            frame_skip=frame_skip,
-            policy_exploration=actor_model_explore,
-            recorder=recorder,
-            exploration_mode="mean",
-            record_interval=record_interval,
-        )
-        return recorder_obj
-
-
-    ###############################################################################
-    # Replay buffer
-    # -------------
-    #
-    # Replay buffers come in two flavors: prioritized (where some error signal
-    # is used to give a higher likelihood of sampling to some items than others)
-    # and regular, circular experience replay.
-    #
-    # TorchRL replay buffers are composable: one can pick up the storage, sampling
-    # and writing strategies. It is also possible to
-    # store tensors on physical memory using a memory-mapped array. The following
-    # function takes care of creating the replay buffer with the desired
-    # hyperparameters:
-    #
-
-
-    def make_replay_buffer(buffer_size, prefetch=3):
-        if prb:
-            sampler = PrioritizedSampler(
-                max_capacity=buffer_size,
-                alpha=0.7,
-                beta=0.5,
-            )
-        else:
-            sampler = RandomSampler()
-        replay_buffer = TensorDictReplayBuffer(
-            storage=LazyMemmapStorage(
-                buffer_size,
-                scratch_dir=buffer_scratch_dir,
-                device=device,
-            ),
-            sampler=sampler,
-            pin_memory=False,
-            prefetch=prefetch,
-        )
-        return replay_buffer
 
+###############################################################################
+# Normalization of the observations
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To compute the normalizing statistics, we run an arbitrary number of random
+# steps in the environment and compute the mean and standard deviation of the
+# collected observations. The :func:`ObservationNorm.init_stats()` method can
+# be used for this purpose. To get the summary statistics, we create a dummy
+# environment and run it for a given number of steps, collect data over a given
+# number of steps and compute its summary statistics.
+#
+
+
+def get_env_stats():
+    """Gets the stats of an environment."""
+    proof_env = make_transformed_env(make_env())
+    proof_env.set_seed(seed)
+    t = proof_env.transform[2]
+    t.init_stats(init_env_steps)
+    transform_state_dict = t.state_dict()
+    proof_env.close()
+    return transform_state_dict
+
+
+###############################################################################
+# Parallel execution
+# ~~~~~~~~~~~~~~~~~~
+#
+# The following helper function allows us to run environments in parallel.
+# Running environments in parallel can significantly speed up the collection
+# throughput. When using transformed environment, we need to choose whether we
+# want to execute the transform individually for each environment, or
+# centralize the data and transform it in batch. Both approaches are easy to
+# code:
+#
+# .. code-block:: python
+#
+#    env = ParallelEnv(
+#        lambda: TransformedEnv(GymEnv("HalfCheetah-v4"), transforms),
+#        num_workers=4
+#    )
+#    env = TransformedEnv(
+#        ParallelEnv(lambda: GymEnv("HalfCheetah-v4"), num_workers=4),
+#        transforms
+#    )
+#
+# To leverage the vectorization capabilities of PyTorch, we adopt
+# the first method:
+#
 
-    ###############################################################################
-    # Hyperparameters
-    # ---------------
-    #
-    # After having written our helper functions, it is time to set the
-    # experiment hyperparameters:
 
-    ###############################################################################
-    # Environment
-    # ~~~~~~~~~~~
+def parallel_env_constructor(
+    transform_state_dict,
+):
+    if env_per_collector == 1:
 
-    # The backend can be gym or dm_control
-    backend = "gym"
+        def make_t_env():
+            env = make_transformed_env(make_env())
+            env.transform[2].init_stats(3)
+            env.transform[2].loc.copy_(transform_state_dict["loc"])
+            env.transform[2].scale.copy_(transform_state_dict["scale"])
+            return env
 
-    exp_name = "cheetah"
+        env_creator = EnvCreator(make_t_env)
+        return env_creator
 
-    # frame_skip batches multiple step together with a single action
-    # If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
-    # be adjusted to have a consistent total number of frames collected across
-    # experiments.
-    frame_skip = 2
-    from_pixels = False
-    # Scaling the reward helps us control the signal magnitude for a more
-    # efficient learning.
-    reward_scaling = 5.0
+    parallel_env = ParallelEnv(
+        num_workers=env_per_collector,
+        create_env_fn=EnvCreator(lambda: make_env()),
+        create_env_kwargs=None,
+        pin_memory=False,
+    )
+    env = make_transformed_env(parallel_env)
+    # we call `init_stats` for a limited number of steps, just to instantiate
+    # the lazy buffers.
+    env.transform[2].init_stats(3, cat_dim=1, reduce_dim=[0, 1])
+    env.transform[2].load_state_dict(transform_state_dict)
+    return env
 
-    # Number of random steps used as for stats computation using ObservationNorm
-    init_env_steps = 1000
 
-    # Exploration: Number of frames before OU noise becomes null
-    annealing_frames = 1000000 // frame_skip
+###############################################################################
+# Building the model
+# ------------------
+#
+# We now turn to the setup of the model. As we have seen, DDPG requires a
+# value network, trained to estimate the value of a state-action pair, and a
+# parametric actor that learns how to select actions that maximize this value.
+#
+# Recall that building a TorchRL module requires two steps:
+#
+# - writing the :class:`torch.nn.Module` that will be used as network,
+# - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
+#   data flow is handled by specifying the input and output keys.
+#
+# In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
+# also be used.
+#
+#
+# The Q-Value network is wrapped in a :class:`torchrl.modules.ValueOperator`
+# that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
+# networks and ``state_value`` for other value networks.
+#
+# Since we use lazy modules, it is necessary to materialize the lazy modules
+# before being able to move the policy from device to device and achieve other
+# operations. Hence, it is good practice to run the modules with a small
+# sample of data. For this purpose, we generate fake data from the
+# environment specs.
+#
 
-    ###############################################################################
-    # Collection
-    # ~~~~~~~~~~
 
-    # We will execute the policy on cuda if available
-    device = (
-        torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
+def make_ddpg_actor(
+    transform_state_dict,
+    device="cpu",
+):
+    proof_environment = make_transformed_env(make_env())
+    proof_environment.transform[2].init_stats(3)
+    proof_environment.transform[2].load_state_dict(transform_state_dict)
+
+    env_specs = proof_environment.specs
+    in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
+        -1
+    ]
+    out_features = env_specs["input_spec"]["action"].shape[-1]
+
+    actor_net = MLP(
+        in_features=in_features,
+        out_features=out_features,
+        num_cells=[num_cells] * num_layers,
+        activation_class=nn.Tanh,
+        activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
     )
+    in_keys = ["observation_vector"]
+    out_keys = ["action"]
+
+    actor = Actor(
+        actor_net,
+        in_keys=in_keys,
+        out_keys=out_keys,
+        spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
+    ).to(device)
 
-    # Number of environments in each data collector
-    env_per_collector = 2
-
-    # Total frames we will use during training. Scale up to 500K - 1M for a more
-    # meaningful training
-    total_frames = 10000 // frame_skip
-
-    # Number of frames returned by the collector at each iteration of the outer loop.
-    # We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
-    frames_per_batch = env_per_collector * 1000 // frame_skip
-    max_frames_per_traj = 1000 // frame_skip
-    init_random_frames = 0
-    # We'll be using the MultiStep class to have a less myopic representation of
-    # upcoming states
-    n_steps_forward = 3
-
-    # record every 10 batch collected
-    record_interval = 10
-
-    ###############################################################################
-    # Optimizer and optimization
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    lr = 5e-4
-    weight_decay = 0.0
-    # UTD: Number of iterations of the inner loop
-    update_to_data = 32
-    batch_size = 128
-
-    ###############################################################################
-    # Model
-    # ~~~~~
-
-    gamma = 0.99
-    tau = 0.005  # Decay factor for the target network
-
-    # Network specs
-    num_cells = 64
-    num_layers = 2
-
-    ###############################################################################
-    # Replay buffer
-    # ~~~~~~~~~~~~~
-
-    # If True, a Prioritized replay buffer will be used
-    prb = True
-    # Number of frames stored in the buffer
-    traj_len_collector = frames_per_batch // env_per_collector
-    buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
-    buffer_scratch_dir = "/tmp/"
-
-    seed = 0
-
-    ###############################################################################
-    # Initialization
-    # --------------
-    #
-    # To initialize the experiment, we first acquire the observation statistics,
-    # then build the networks, wrap them in an exploration wrapper (following the
-    # seminal DDPG paper, we used an Ornstein-Uhlenbeck process to add noise to the
-    # sampled actions).
-
-
-    # Seeding
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-
-    ###############################################################################
-    # Normalization stats
-    # ~~~~~~~~~~~~~~~~~~~
-
-    transform_state_dict = get_env_stats()
-
-    ###############################################################################
-    # Models: policy and q-value network
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    actor, qnet = make_ddpg_actor(
-        transform_state_dict=transform_state_dict,
-        device=device,
+    q_net = MLP(
+        in_features=in_features
+        + out_features,  # receives an action and an observation as input
+        out_features=1,
+        num_cells=[num_cells] * num_layers,
+        activation_class=nn.Tanh,
     )
-    if device == torch.device("cpu"):
-        actor.share_memory()
-
-
-    ###############################################################################
-    # Loss module
-    # ~~~~~~~~~~~
-    # We build our loss module with the actor and qnet we've just created.
-    # Because we have target parameters to update, we _must_ create a target network
-    # updater.
-    #
-    loss_module = DDPGLoss(actor, qnet)
-    # let's use the TD(lambda) estimator!
-    loss_module.make_value_estimator(ValueEstimators.TDLambda)
-    target_net_updater = SoftUpdate(loss_module, eps=0.98)
-    target_net_updater.init_()
-
-    ###############################################################################
-    # The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
-    # exploration module:
-
-    actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
-        actor,
-        annealing_num_steps=annealing_frames,
+
+    in_keys = in_keys + ["action"]
+    qnet = ValueOperator(
+        in_keys=in_keys,
+        module=q_net,
     ).to(device)
-    if device == torch.device("cpu"):
-        actor_model_explore.share_memory()
-
-    ###############################################################################
-    # Parallel environment creation
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # We pass the stats computed earlier to normalize the output of our
-    # environment:
-
-    create_env_fn = parallel_env_constructor(
-        transform_state_dict=transform_state_dict,
+
+    return actor, qnet
+
+
+###############################################################################
+# Evaluator: building your recorder object
+# ----------------------------------------
+#
+# As the training data is obtained using some exploration strategy, the true
+# performance of our algorithm needs to be assessed in deterministic mode. We
+# do this using a dedicated class, ``Recorder``, which executes the policy in
+# the environment at a given frequency and returns some statistics obtained
+# from these simulations.
+#
+# The following helper function builds this object:
+
+
+def make_recorder(actor_model_explore, transform_state_dict):
+    base_env = make_env()
+    recorder = make_transformed_env(base_env)
+    recorder.transform[2].init_stats(3)
+    recorder.transform[2].load_state_dict(transform_state_dict)
+
+    recorder_obj = Recorder(
+        record_frames=1000,
+        frame_skip=frame_skip,
+        policy_exploration=actor_model_explore,
+        recorder=recorder,
+        exploration_mode="mean",
+        record_interval=record_interval,
     )
+    return recorder_obj
+
 
-    ###############################################################################
-    # Data collector
-    # ~~~~~~~~~~~~~~
-    #
-    # TorchRL provides specialized classes to help you collect data by executing
-    # the policy in the environment. These "data collectors" iteratively compute
-    # the action to be executed at a given time, then execute a step in the
-    # environment and reset it when required.
-    # Data collectors are designed to help developers have a tight control
-    # on the number of frames per batch of data, on the (a)sync nature of this
-    # collection and on the resources allocated to the data collection (e.g. GPU,
-    # number of workers etc).
-    #
-    # Here we will use
-    # :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
-    # will be executed in an async manner (i.e. data will be collected while
-    # the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
-    # multiple workers are running rollouts separately. When a batch is asked, it
-    # is gathered from the first worker that can provide it.
-    #
-    # The parameters to specify are:
-    #
-    # - the list of environment creation functions,
-    # - the policy,
-    # - the total number of frames before the collector is considered empty,
-    # - the maximum number of frames per trajectory (useful for non-terminating
-    #   environments, like dm_control ones).
-    #
-    # One should also pass:
-    #
-    # - the number of frames in each batch collected,
-    # - the number of random steps executed independently from the policy,
-    # - the devices used for policy execution
-    # - the devices used to store data before the data is passed to the main
-    #   process.
-    #
-    # Collectors also accept post-processing hooks.
-    # For instance, the :class:`torchrl.data.postprocs.MultiStep` class passed as
-    # ``postproc`` makes it so that the rewards of the ``n`` upcoming steps are
-    # summed (with some discount factor) and the next observation is changed to
-    # be the n-step forward observation. One could pass other transforms too:
-    # using :class:`tensordict.nn.TensorDictModule` and
-    # :class:`tensordict.nn.TensorDictSequential` we can seamlessly append a
-    # wide range of transforms to our collector.
-
-    if n_steps_forward > 0:
-        multistep = MultiStep(n_steps=n_steps_forward, gamma=gamma)
+###############################################################################
+# Replay buffer
+# -------------
+#
+# Replay buffers come in two flavors: prioritized (where some error signal
+# is used to give a higher likelihood of sampling to some items than others)
+# and regular, circular experience replay.
+#
+# TorchRL replay buffers are composable: one can pick up the storage, sampling
+# and writing strategies. It is also possible to
+# store tensors on physical memory using a memory-mapped array. The following
+# function takes care of creating the replay buffer with the desired
+# hyperparameters:
+#
+
+
+def make_replay_buffer(buffer_size, batch_size, prefetch=3):
+    if prb:
+        sampler = PrioritizedSampler(
+            max_capacity=buffer_size,
+            alpha=0.7,
+            beta=0.5,
+        )
     else:
-        multistep = None
-
-    collector = MultiaSyncDataCollector(
-        create_env_fn=[create_env_fn, create_env_fn],
-        policy=actor_model_explore,
-        total_frames=total_frames,
-        max_frames_per_traj=max_frames_per_traj,
-        frames_per_batch=frames_per_batch,
-        init_random_frames=init_random_frames,
-        reset_at_each_iter=False,
-        postproc=multistep,
-        split_trajs=True,
-        device=device,  # device for execution
-        storing_device=device,  # device where data will be stored and passed
-        update_at_each_batch=False,
-        exploration_mode="random",
+        sampler = RandomSampler()
+    replay_buffer = TensorDictReplayBuffer(
+        storage=LazyMemmapStorage(
+            buffer_size,
+            scratch_dir=buffer_scratch_dir,
+            device=device,
+        ),
+        batch_size=batch_size,
+        sampler=sampler,
+        pin_memory=False,
+        prefetch=prefetch,
     )
+    return replay_buffer
 
-    collector.set_seed(seed)
 
-    ###############################################################################
-    # Replay buffer
-    # ~~~~~~~~~~~~~
-    #
+###############################################################################
+# Hyperparameters
+# ---------------
+#
+# After having written our helper functions, it is time to set the
+# experiment hyperparameters:
+
+###############################################################################
+# Environment
+# ~~~~~~~~~~~
+
+# The backend can be gym or dm_control
+backend = "gym"
+
+exp_name = "cheetah"
+
+# frame_skip batches multiple step together with a single action
+# If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
+# be adjusted to have a consistent total number of frames collected across
+# experiments.
+frame_skip = 2
+from_pixels = False
+# Scaling the reward helps us control the signal magnitude for a more
+# efficient learning.
+reward_scaling = 5.0
+
+# Number of random steps used as for stats computation using ObservationNorm
+init_env_steps = 1000
+
+# Exploration: Number of frames before OU noise becomes null
+annealing_frames = 1000000 // frame_skip
+
+###############################################################################
+# Collection
+# ~~~~~~~~~~
+
+# We will execute the policy on cuda if available
+device = (
+    torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
+)
+
+# Number of environments in each data collector
+env_per_collector = 2
+
+# Total frames we will use during training. Scale up to 500K - 1M for a more
+# meaningful training
+total_frames = 10000 // frame_skip
+
+# Number of frames returned by the collector at each iteration of the outer loop.
+# We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
+frames_per_batch = env_per_collector * 1000 // frame_skip
+max_frames_per_traj = 1000 // frame_skip
+init_random_frames = 0
+# We'll be using the MultiStep class to have a less myopic representation of
+# upcoming states
+n_steps_forward = 3
+
+# record every 10 batch collected
+record_interval = 10
+
+###############################################################################
+# Optimizer and optimization
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+lr = 5e-4
+weight_decay = 0.0
+# UTD: Number of iterations of the inner loop
+update_to_data = 32
+batch_size = 128
+
+###############################################################################
+# Model
+# ~~~~~
+
+gamma = 0.99
+tau = 0.005  # Decay factor for the target network
+
+# Network specs
+num_cells = 64
+num_layers = 2
+
+###############################################################################
+# Replay buffer
+# ~~~~~~~~~~~~~
+
+# If True, a Prioritized replay buffer will be used
+prb = True
+# Number of frames stored in the buffer
+traj_len_collector = frames_per_batch // env_per_collector
+buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
+buffer_scratch_dir = "/tmp/"
+
+seed = 0
+
+###############################################################################
+# Initialization
+# --------------
+#
+# To initialize the experiment, we first acquire the observation statistics,
+# then build the networks, wrap them in an exploration wrapper (following the
+# seminal DDPG paper, we used an Ornstein-Uhlenbeck process to add noise to the
+# sampled actions).
 
-    replay_buffer = make_replay_buffer(buffer_size, prefetch=3)
 
-    ###############################################################################
-    # Recorder
-    # ~~~~~~~~
+# Seeding
+torch.manual_seed(seed)
+np.random.seed(seed)
 
-    recorder = make_recorder(actor_model_explore, transform_state_dict)
+###############################################################################
+# Normalization stats
+# ~~~~~~~~~~~~~~~~~~~
 
-    ###############################################################################
-    # Optimizer
-    # ~~~~~~~~~
-    #
-    # Finally, we will use the Adam optimizer for the policy and value network,
-    # with the same learning rate for both.
+transform_state_dict = get_env_stats()
 
-    optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
-    total_collection_steps = total_frames // frames_per_batch
+###############################################################################
+# Models: policy and q-value network
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
-        optimizer, T_max=total_collection_steps
-    )
+actor, qnet = make_ddpg_actor(
+    transform_state_dict=transform_state_dict,
+    device=device,
+)
+if device == torch.device("cpu"):
+    actor.share_memory()
 
-    ###############################################################################
-    # Time to train the policy
-    # ------------------------
-    #
-    # The training loop is pretty straightforward now that we have built all the
-    # modules we need.
-    #
 
-    rewards = []
-    rewards_eval = []
+###############################################################################
+# Loss module
+# ~~~~~~~~~~~
+# We build our loss module with the actor and qnet we've just created.
+# Because we have target parameters to update, we _must_ create a target network
+# updater.
+#
+loss_module = DDPGLoss(actor, qnet)
+# let's use the TD(lambda) estimator!
+loss_module.make_value_estimator(ValueEstimators.TDLambda)
+target_net_updater = SoftUpdate(loss_module, eps=0.98)
+target_net_updater.init_()
+
+###############################################################################
+# The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
+# exploration module:
+
+actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
+    actor,
+    annealing_num_steps=annealing_frames,
+).to(device)
+if device == torch.device("cpu"):
+    actor_model_explore.share_memory()
+
+###############################################################################
+# Parallel environment creation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We pass the stats computed earlier to normalize the output of our
+# environment:
 
-    # Main loop
-    norm_factor_training = (
-        sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
-    )
+create_env_fn = parallel_env_constructor(
+    transform_state_dict=transform_state_dict,
+)
 
-    collected_frames = 0
-    pbar = tqdm.tqdm(total=total_frames)
-    r0 = None
-    for i, tensordict in enumerate(collector):
+###############################################################################
+# Data collector
+# ~~~~~~~~~~~~~~
+#
+# TorchRL provides specialized classes to help you collect data by executing
+# the policy in the environment. These "data collectors" iteratively compute
+# the action to be executed at a given time, then execute a step in the
+# environment and reset it when required.
+# Data collectors are designed to help developers have a tight control
+# on the number of frames per batch of data, on the (a)sync nature of this
+# collection and on the resources allocated to the data collection (e.g. GPU,
+# number of workers etc).
+#
+# Here we will use
+# :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
+# will be executed in an async manner (i.e. data will be collected while
+# the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
+# multiple workers are running rollouts separately. When a batch is asked, it
+# is gathered from the first worker that can provide it.
+#
+# The parameters to specify are:
+#
+# - the list of environment creation functions,
+# - the policy,
+# - the total number of frames before the collector is considered empty,
+# - the maximum number of frames per trajectory (useful for non-terminating
+#   environments, like dm_control ones).
+#
+# One should also pass:
+#
+# - the number of frames in each batch collected,
+# - the number of random steps executed independently from the policy,
+# - the devices used for policy execution
+# - the devices used to store data before the data is passed to the main
+#   process.
+#
+# Collectors also accept post-processing hooks.
+# For instance, the :class:`torchrl.data.postprocs.MultiStep` class passed as
+# ``postproc`` makes it so that the rewards of the ``n`` upcoming steps are
+# summed (with some discount factor) and the next observation is changed to
+# be the n-step forward observation. One could pass other transforms too:
+# using :class:`tensordict.nn.TensorDictModule` and
+# :class:`tensordict.nn.TensorDictSequential` we can seamlessly append a
+# wide range of transforms to our collector.
+
+if n_steps_forward > 0:
+    multistep = MultiStep(n_steps=n_steps_forward, gamma=gamma)
+else:
+    multistep = None
+
+collector = MultiaSyncDataCollector(
+    create_env_fn=[create_env_fn, create_env_fn],
+    policy=actor_model_explore,
+    total_frames=total_frames,
+    max_frames_per_traj=max_frames_per_traj,
+    frames_per_batch=frames_per_batch,
+    init_random_frames=init_random_frames,
+    reset_at_each_iter=False,
+    postproc=multistep,
+    split_trajs=True,
+    device=device,  # device for execution
+    storing_device=device,  # device where data will be stored and passed
+    update_at_each_batch=False,
+    exploration_mode="random",
+)
+
+collector.set_seed(seed)
+
+###############################################################################
+# Replay buffer
+# ~~~~~~~~~~~~~
+#
 
-        # update weights of the inference policy
-        collector.update_policy_weights_()
+replay_buffer = make_replay_buffer(buffer_size=buffer_size, batch_size=batch_size, prefetch=3)
 
-        if r0 is None:
-            r0 = tensordict["next", "reward"].mean().item()
-        pbar.update(tensordict.numel())
+###############################################################################
+# Recorder
+# ~~~~~~~~
 
-        # extend the replay buffer with the new data
-        current_frames = tensordict.numel()
-        collected_frames += current_frames
-        replay_buffer.extend(tensordict.cpu())
+recorder = make_recorder(actor_model_explore, transform_state_dict)
+
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
+#
+# Finally, we will use the Adam optimizer for the policy and value network,
+# with the same learning rate for both.
+
+optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
+total_collection_steps = total_frames // frames_per_batch
 
-        # optimization steps
-        if collected_frames >= init_random_frames:
-            for _ in range(update_to_data):
-                # sample from replay buffer
-                sampled_tensordict = replay_buffer.sample(batch_size).clone()
-
-                # Compute loss
-                loss_dict = loss_module(sampled_tensordict)
-
-                # optimize
-                loss_val = sum(
-                    value for key, value in loss_dict.items() if key.startswith("loss")
-                )
-                loss_val.backward()
-                optimizer.step()
-                optimizer.zero_grad()
-
-                # update priority
-                if prb:
-                    replay_buffer.update_tensordict_priority(sampled_tensordict)
-                # update target network
-                target_net_updater.step()
-
-        rewards.append(
-            (
-                i,
-                tensordict["next", "reward"].mean().item()
-                / norm_factor_training
-                / frame_skip,
+scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+    optimizer, T_max=total_collection_steps
+)
+
+###############################################################################
+# Time to train the policy
+# ------------------------
+#
+# The training loop is pretty straightforward now that we have built all the
+# modules we need.
+#
+
+rewards = []
+rewards_eval = []
+
+# Main loop
+norm_factor_training = (
+    sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
+)
+
+collected_frames = 0
+pbar = tqdm.tqdm(total=total_frames)
+r0 = None
+for i, tensordict in enumerate(collector):
+
+    # update weights of the inference policy
+    collector.update_policy_weights_()
+
+    if r0 is None:
+        r0 = tensordict["next", "reward"].mean().item()
+    pbar.update(tensordict.numel())
+
+    # extend the replay buffer with the new data
+    current_frames = tensordict.numel()
+    collected_frames += current_frames
+    try:
+        replay_buffer.extend(tensordict.cpu())
+    except Exception as err:
+        print("iteration", i)
+        print(replay_buffer._storage._storage)
+        print(tensordict)
+        raise err
+
+    # optimization steps
+    if collected_frames >= init_random_frames:
+        for _ in range(update_to_data):
+            # sample from replay buffer
+            sampled_tensordict = replay_buffer.sample(batch_size).clone()
+
+            # Compute loss
+            loss_dict = loss_module(sampled_tensordict)
+
+            # optimize
+            loss_val = sum(
+                value for key, value in loss_dict.items() if key.startswith("loss")
             )
+            loss_val.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+
+            # update priority
+            if prb:
+                replay_buffer.update_tensordict_priority(sampled_tensordict)
+            # update target network
+            target_net_updater.step()
+
+    rewards.append(
+        (
+            i,
+            tensordict["next", "reward"].mean().item()
+            / norm_factor_training
+            / frame_skip,
         )
-        td_record = recorder(None)
-        if td_record is not None:
-            rewards_eval.append((i, td_record["r_evaluation"].item()))
-        if len(rewards_eval):
-            pbar.set_description(
-                f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}"
-            )
+    )
+    td_record = recorder(None)
+    if td_record is not None:
+        rewards_eval.append((i, td_record["r_evaluation"].item()))
+    if len(rewards_eval):
+        pbar.set_description(
+            f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}"
+        )
+
+    # update the exploration strategy
+    actor_model_explore.step(current_frames)
+    if collected_frames >= init_random_frames:
+        scheduler.step()
+
+collector.shutdown()
+del collector
 
-        # update the exploration strategy
-        actor_model_explore.step(current_frames)
-        if collected_frames >= init_random_frames:
-            scheduler.step()
-
-    collector.shutdown()
-    del collector
-
-    ###############################################################################
-    # Experiment results
-    # ------------------
-    #
-    # We make a simple plot of the average rewards during training. We can observe
-    # that our policy learned quite well to solve the task.
-    #
-    # **Note**: As already mentioned above, to get a more reasonable performance,
-    # use a greater value for ``total_frames`` e.g. 1M.
-
-    plt.figure()
-    plt.plot(*zip(*rewards), label="training")
-    plt.plot(*zip(*rewards_eval), label="eval")
-    plt.legend()
-    plt.xlabel("iter")
-    plt.ylabel("reward")
-    plt.tight_layout()
-
-    ###############################################################################
-    # Conclusion
-    # ----------
-    #
-    # In this tutorial, we have learnt how to code a loss module in TorchRL given
-    # the concrete example of DDPG.
-    #
-    # The key takeaways are:
-    #
-    # - How to use the :class:`torchrl.objectives.LossModule` class to register components;
-    # - How to use (or not) a target network, and how to update its parameters;
-    # - How to create an optimizer associated with a loss module.
+###############################################################################
+# Experiment results
+# ------------------
+#
+# We make a simple plot of the average rewards during training. We can observe
+# that our policy learned quite well to solve the task.
+#
+# **Note**: As already mentioned above, to get a more reasonable performance,
+# use a greater value for ``total_frames`` e.g. 1M.
+
+plt.figure()
+plt.plot(*zip(*rewards), label="training")
+plt.plot(*zip(*rewards_eval), label="eval")
+plt.legend()
+plt.xlabel("iter")
+plt.ylabel("reward")
+plt.tight_layout()
+
+###############################################################################
+# Conclusion
+# ----------
+#
+# In this tutorial, we have learnt how to code a loss module in TorchRL given
+# the concrete example of DDPG.
+#
+# The key takeaways are:
+#
+# - How to use the :class:`torchrl.objectives.LossModule` class to register components;
+# - How to use (or not) a target network, and how to update its parameters;
+# - How to create an optimizer associated with a loss module.
 #

From 6d2ff4b9a9711b2499fcc18886463df16e1c5d2d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 16:01:56 +0100
Subject: [PATCH 55/89] amend

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 14 ++----
 tutorials/sphinx-tutorials/coding_dqn.py  | 57 +++++++++++++----------
 2 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index dab9d740216..68e4a9e0bdb 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -67,7 +67,7 @@
 from tensordict.nn import TensorDictModule
 from tensordict.tensordict import TensorDict, TensorDictBase
 from torch import nn, optim
-from torchrl.collectors import MultiaSyncDataCollector, SyncDataCollector
+from torchrl.collectors import MultiaSyncDataCollector
 from torchrl.data import CompositeSpec, TensorDictReplayBuffer
 from torchrl.data.postprocs import MultiStep
 from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
@@ -1010,7 +1010,9 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 # ~~~~~~~~~~~~~
 #
 
-replay_buffer = make_replay_buffer(buffer_size=buffer_size, batch_size=batch_size, prefetch=3)
+replay_buffer = make_replay_buffer(
+    buffer_size=buffer_size, batch_size=batch_size, prefetch=3
+)
 
 ###############################################################################
 # Recorder
@@ -1063,13 +1065,7 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     # extend the replay buffer with the new data
     current_frames = tensordict.numel()
     collected_frames += current_frames
-    try:
-        replay_buffer.extend(tensordict.cpu())
-    except Exception as err:
-        print("iteration", i)
-        print(replay_buffer._storage._storage)
-        print(tensordict)
-        raise err
+    replay_buffer.extend(tensordict.cpu())
 
     # optimization steps
     if collected_frames >= init_random_frames:
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 15d98ad092a..c54518fa547 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -610,6 +610,11 @@ def get_loss_module(actor, gamma):
 trainer.register_op("post_optim", target_net_updater.step)
 
 ###############################################################################
+# .. note::
+#   It is possible to link multiple optimizers to the trainer if needed.
+#   In this case, each optimizer will be tied to a field in the loss dictionary.
+#   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
+#
 # Here we are, ready to train our algorithm! A simple call to
 # ``trainer.train()`` and we'll be getting our results logged in.
 #
@@ -621,26 +626,28 @@ def get_loss_module(actor, gamma):
 
 def print_csv_files_in_folder(folder_path):
     """
-    Find all CSV files in a folder and print the first 10 lines of each file.
+    Find all CSV files in a folder and return the first 10 lines of each file as a string.
 
     Args:
         folder_path (str): The relative path to the folder.
 
     Returns:
-        list: A list of all CSV files in the folder.
+        str: A string containing the first 10 lines of each CSV file in the folder.
     """
     csv_files = []
+    output_str = ""
     for file in os.listdir(folder_path):
         if file.endswith(".csv"):
             csv_files.append(os.path.join(folder_path, file))
     for csv_file in csv_files:
-        print(f"File: {csv_file}")
+        output_str += f"File: {csv_file}\n"
         with open(csv_file, "r") as f:
             for i, line in enumerate(f):
                 if i == 10:
                     break
-                print(line.strip())
-        print("\n")
+                output_str += line.strip() + "\n"
+        output_str += "\n"
+    return output_str
 
 
 print_csv_files_in_folder(exp_name)
@@ -649,36 +656,38 @@ def print_csv_files_in_folder(folder_path):
 # Conclusion and possible improvements
 # ------------------------------------
 #
-# In this tutorial we have learnt:
+# In this tutorial we have learned:
 #
-# - How to train a policy that read pixel-based states, what transforms to
-#   include and how to normalize the data;
-# - How to create a policy that picks up the action with the highest value
-#   with :class:`torchrl.modules.QValueNetwork`;
+# - How to write a Trainer, including building its components and registering
+#   them in the trainer;
+# - How to code a DQN algorithm, including how to create a policy that picks
+#   up the action with the highest value with
+#   :class:`torchrl.modules.QValueNetwork`;
 # - How to build a multiprocessed data collector;
-# - How to train a DQN with TD(:math:`\lambda`) returns.
 #
-# We have seen that using TD(:math:`\lambda`) greatly improved the performance
-# of DQN. Other possible improvements could include:
+# Possible improvements to this tutorial could include:
 #
-# - Using the Multi-Step post-processing. Multi-step will project an action
-#   to the nth following step, and create a discounted sum of the rewards in
-#   between. This trick can make the algorithm noticebly less myopic. To use
-#   this, simply create the collector with
+# - Using the :class:`torchrl.data.MultiStep`
+#   post-processing. Multi-step will project an action
+#   to the :math:`n^{th}` following step, and create a discounted sum of the
+#   rewards in between. This trick can make the algorithm noticeably less
+#   myopic (although the reward is then biased). To use this, simply
+#   create the collector with
 #
-#       from torchrl.data.postprocs.postprocs import MultiStep
-#       collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+#       >>> from torchrl.data.postprocs.postprocs import MultiStep
+#       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
 #
 #   where ``n`` is the number of looking-forward steps. Pay attention to the
 #   fact that the ``gamma`` factor has to be corrected by the number of
 #   steps till the next observation when being passed to
 #   ``vec_td_lambda_advantage_estimate``:
 #
-#       gamma = gamma ** tensordict["steps_to_next_obs"]
+#       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
+#
 # - A prioritized replay buffer could also be used. This will give a
 #   higher priority to samples that have the worst value accuracy.
-# - A distributional loss (see ``torchrl.objectives.DistributionalDQNLoss``
+#   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+#   of the documentation.
+# - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
 #   for more information).
-# - More fancy exploration techniques, such as NoisyLinear layers and such
-#   (check ``torchrl.modules.NoisyLinear``, which is fully compatible with the
-#   ``MLP`` class used in our Dueling DQN).
+# - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.

From 1411cf48de6d6b2b6438ce3f8b0b654ba08b5581 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 29 Mar 2023 16:32:35 +0100
Subject: [PATCH 56/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index c54518fa547..6abdae9d60d 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -650,7 +650,7 @@ def print_csv_files_in_folder(folder_path):
     return output_str
 
 
-print_csv_files_in_folder(exp_name)
+print_csv_files_in_folder("csv_logs/" + exp_name)
 
 ###############################################################################
 # Conclusion and possible improvements

From 259a1beb8d9799d117d035c4c57c230af1d3b702 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 30 Mar 2023 12:57:17 +0100
Subject: [PATCH 57/89] amend

---
 torchrl/collectors/collectors.py              |   6 +-
 torchrl/data/replay_buffers/replay_buffers.py |  16 +--
 torchrl/data/replay_buffers/storages.py       |  37 +++++-
 torchrl/envs/transforms/transforms.py         |  60 ++++++++--
 .../modules/tensordict_module/exploration.py  |  23 ++--
 torchrl/objectives/ddpg.py                    |   6 -
 torchrl/objectives/utils.py                   |   9 +-
 torchrl/trainers/trainers.py                  |   4 +-
 tutorials/sphinx-tutorials/coding_ddpg.py     | 109 ++++++++++--------
 9 files changed, 174 insertions(+), 96 deletions(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 7bd1f92a1d1..0c77db4ccb3 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -569,15 +569,18 @@ def __init__(
             traj_ids,
         )
 
+        with torch.no_grad():
+            self._tensordict_out = env.fake_tensordict()
         if (
             hasattr(self.policy, "spec")
             and self.policy.spec is not None
             and all(v is not None for v in self.policy.spec.values())
             and set(self.policy.spec.keys(True, True)) == set(self.policy.out_keys)
+            and any(key not in self._tensordict_out.keys(isinstance(key, tuple)) for key in self.policy.spec)
         ):
             # if policy spec is non-empty, all the values are not None and the keys
             # match the out_keys we assume the user has given all relevant information
-            self._tensordict_out = env.fake_tensordict().to_tensordict()
+            # the policy could have more keys than the env:
             self._tensordict_out.update(self.policy.spec.zero())
             self._tensordict_out = (
                 self._tensordict_out.unsqueeze(-1)
@@ -589,7 +592,6 @@ def __init__(
             # determine the relevant keys with which to pre-populate _tensordict_out.
             # See #505 for additional context.
             with torch.no_grad():
-                self._tensordict_out = env.fake_tensordict()
                 self._tensordict_out = self._tensordict_out.to(self.device)
                 self._tensordict_out = self.policy(self._tensordict_out).unsqueeze(-1)
             self._tensordict_out = (
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
index eb1ed4dbe15..cf04db52ea7 100644
--- a/torchrl/data/replay_buffers/replay_buffers.py
+++ b/torchrl/data/replay_buffers/replay_buffers.py
@@ -11,7 +11,7 @@
 
 import torch
 from tensordict.tensordict import LazyStackedTensorDict, TensorDict, TensorDictBase
-from tensordict.utils import expand_right
+from tensordict.utils import expand_as_right
 
 from torchrl.data.utils import DEVICE_TYPING
 
@@ -755,19 +755,7 @@ def sample(
         data, info = super().sample(batch_size, return_info=True)
         if include_info in (True, None):
             for k, v in info.items():
-                data.set(k, torch.tensor(v, device=data.device))
-        if "_batch_size" in data.keys():
-            # we need to reset the batch-size
-            shape = data.pop("_batch_size")
-            shape = shape[0]
-            shape = torch.Size([data.shape[0], *shape])
-            # we may need to update some values in the data
-            for key, value in data.items():
-                if value.ndim >= len(shape):
-                    continue
-                value = expand_right(value, shape)
-                data.set(key, value)
-            data.batch_size = shape
+                data.set(k, expand_as_right(torch.tensor(v, device=data.device), data))
         if return_info:
             return data, info
         return data
diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
index 7a789260e48..d96e2498f6b 100644
--- a/torchrl/data/replay_buffers/storages.py
+++ b/torchrl/data/replay_buffers/storages.py
@@ -14,6 +14,7 @@
 from tensordict.memmap import MemmapTensor
 from tensordict.prototype import is_tensorclass
 from tensordict.tensordict import is_tensor_collection, TensorDict, TensorDictBase
+from tensordict.utils import expand_right
 
 from torchrl._utils import _CKPT_BACKEND, VERBOSE
 from torchrl.data.replay_buffers.utils import INT_CLASSES
@@ -423,10 +424,42 @@ def _mem_map_tensor_as_tensor(mem_map_tensor: MemmapTensor) -> torch.Tensor:
         return mem_map_tensor._tensor
 
 
+def _reset_batch_size(x):
+    """Resets the batch size of a tensordict.
+
+    In some cases we save the original shape of the tensordict as a tensor (or memmap tensor).
+
+    This function will read that tensor, extract its items and reset the shape
+    of the tensordict to it. If items have an incompatible shape (e.g. "index")
+    they will be expanded to the right to match it.
+
+    """
+    shape = x.pop("_batch_size", None)
+    if shape is not None:
+        # we need to reset the batch-size
+        if isinstance(shape, MemmapTensor):
+            shape = shape.as_tensor()
+        locked = x.is_locked
+        if locked:
+            x.unlock_()
+        shape = [s.item() for s in shape[0]]
+        shape = torch.Size([x.shape[0], *shape])
+        # we may need to update some values in the data
+        for key, value in x.items():
+            if value.ndim >= len(shape):
+                continue
+            value = expand_right(value, shape)
+            x.set(key, value)
+        x.batch_size = shape
+        if locked:
+            x.lock_()
+    return x
+
+
 def _collate_list_tensordict(x):
     out = torch.stack(x, 0)
     if isinstance(out, TensorDictBase):
-        return out.to_tensordict()
+        return _reset_batch_size(out.to_tensordict())
     return out
 
 
@@ -436,7 +469,7 @@ def _collate_list_tensors(*x):
 
 def _collate_contiguous(x):
     if isinstance(x, TensorDictBase):
-        return x.to_tensordict()
+        return _reset_batch_size(x).to_tensordict()
     return x.clone()
 
 
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
index 6a0dd6be2b8..08f9dfe5c46 100644
--- a/torchrl/envs/transforms/transforms.py
+++ b/torchrl/envs/transforms/transforms.py
@@ -2602,6 +2602,13 @@ class VecNorm(Transform):
             default: 0.99
         eps (number, optional): lower bound of the running standard
             deviation (for numerical underflow). Default is 1e-4.
+        shapes (List[torch.Size], optional): if provided, represents the shape
+            of each in_keys. Its length must match the one of ``in_keys``.
+            Each shape must match the trailing dimension of the corresponding
+            entry.
+            If not, the feature dimensions of the entry (ie all dims that do
+            not belong to the tensordict batch-size) will be considered as
+            feature dimension.
 
     Examples:
         >>> from torchrl.envs.libs.gym import GymEnv
@@ -2629,6 +2636,7 @@ def __init__(
         lock: mp.Lock = None,
         decay: float = 0.9999,
         eps: float = 1e-4,
+        shapes: List[torch.Size] = None,
     ) -> None:
         if lock is None:
             lock = mp.Lock()
@@ -2656,8 +2664,14 @@ def __init__(
 
         self.lock = lock
         self.decay = decay
+        self.shapes = shapes
         self.eps = eps
 
+    def _key_str(self, key):
+        if not isinstance(key, str):
+            key = "_".join(key)
+        return key
+
     def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
         if self.lock is not None:
             self.lock.acquire()
@@ -2681,17 +2695,44 @@ def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
     forward = _call
 
     def _init(self, tensordict: TensorDictBase, key: str) -> None:
-        if self._td is None or key + "_sum" not in self._td.keys():
-            td_view = tensordict.view(-1)
-            td_select = td_view[0]
-            d = {key + "_sum": torch.zeros_like(td_select.get(key))}
-            d.update({key + "_ssq": torch.zeros_like(td_select.get(key))})
+        key_str = self._key_str(key)
+        if self._td is None or key_str + "_sum" not in self._td.keys():
+            if key is not key_str and key_str in tensordict.keys():
+                raise RuntimeError(
+                    f"Conflicting key names: {key_str} from VecNorm and input tensordict keys."
+                )
+            if self.shapes is None:
+                td_view = tensordict.view(-1)
+                td_select = td_view[0]
+                item = td_select.get(key)
+                d = {key_str + "_sum": torch.zeros_like(item)}
+                d.update({key_str + "_ssq": torch.zeros_like(item)})
+            else:
+                idx = 0
+                for in_key in self.in_keys:
+                    if in_key != key:
+                        idx += 1
+                    else:
+                        break
+                shape = self.shapes[idx]
+                item = tensordict.get(key)
+                d = {
+                    key_str
+                    + "_sum": torch.zeros(shape, device=item.device, dtype=item.dtype)
+                }
+                d.update(
+                    {
+                        key_str
+                        + "_ssq": torch.zeros(
+                            shape, device=item.device, dtype=item.dtype
+                        )
+                    }
+                )
+
             d.update(
                 {
-                    key
-                    + "_count": torch.zeros(
-                        1, device=td_select.get(key).device, dtype=torch.float
-                    )
+                    key_str
+                    + "_count": torch.zeros(1, device=item.device, dtype=torch.float)
                 }
             )
             if self._td is None:
@@ -2702,6 +2743,7 @@ def _init(self, tensordict: TensorDictBase, key: str) -> None:
             pass
 
     def _update(self, key, value, N) -> torch.Tensor:
+        key = self._key_str(key)
         _sum = self._td.get(key + "_sum")
         _ssq = self._td.get(key + "_ssq")
         _count = self._td.get(key + "_count")
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
index 4d8feaef8b8..8eb0bf6dc54 100644
--- a/torchrl/modules/tensordict_module/exploration.py
+++ b/torchrl/modules/tensordict_module/exploration.py
@@ -178,14 +178,21 @@ def __init__(
         self.register_buffer("std", torch.tensor([std]))
         self.register_buffer("sigma", torch.tensor([sigma_init]))
         self.action_key = action_key
-        self.spec = (
-            spec
-            if spec is not None
-            else policy.spec
-            if hasattr(policy, "spec")
-            else None
-        )
+        self.out_keys = list(self.td_module.out_keys)
+        if spec is not None:
+            if not isinstance(spec, CompositeSpec) and len(self.out_keys) == 1:
+                spec = CompositeSpec({self.out_keys[0]: spec})
+            elif not isinstance(spec, CompositeSpec):
+                raise ValueError(f"Cannot infer which key the spec is made for, got spec={spec} and out_keys={self.out_keys}.")
+            self._spec = spec
+        elif hasattr(self.td_module, "_spec"):
+            self._spec = self.td_module._spec.clone()
+        else:
+            self._spec = CompositeSpec({key: None for key in policy.in_keys})
+
         self.safe = safe
+        if self.safe:
+            self.register_forward_hook(_forward_hook_safe_action)
 
     def step(self, frames: int = 1) -> None:
         """A step of sigma decay.
@@ -341,7 +348,7 @@ def __init__(
         self.register_buffer("eps", torch.tensor([eps_init]))
         self.out_keys = list(self.td_module.out_keys) + self.ou.out_keys
         self._spec = CompositeSpec(
-            **self.td_module._spec, **{key: None for key in self.ou.out_keys}
+            **self.td_module._spec, **{key: None for key in self.ou.out_keys}, shape=self.td_module._spec.shape
         )
         if len(set(self.out_keys)) != len(self.out_keys):
             raise RuntimeError(f"Got multiple identical output keys: {self.out_keys}")
diff --git a/torchrl/objectives/ddpg.py b/torchrl/objectives/ddpg.py
index c1cacd7349e..917f5df44c6 100644
--- a/torchrl/objectives/ddpg.py
+++ b/torchrl/objectives/ddpg.py
@@ -99,12 +99,6 @@ def forward(self, input_tensordict: TensorDictBase) -> TensorDict:
             a tuple of 2 tensors containing the DDPG loss.
 
         """
-        if not input_tensordict.device == self.device:
-            raise RuntimeError(
-                f"Got device={input_tensordict.device} but "
-                f"actor_network.device={self.device} (self.device={self.device})"
-            )
-
         loss_value, td_error, pred_val, target_value = self._loss_value(
             input_tensordict,
         )
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 9d393a51d05..8b72f1f6620 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -242,15 +242,18 @@ def __repr__(self) -> str:
 
 
 class SoftUpdate(TargetNetUpdater):
-    """A soft-update class for target network update in Double DQN/DDPG.
+    r"""A soft-update class for target network update in Double DQN/DDPG.
 
     This was proposed in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING", https://arxiv.org/pdf/1509.02971.pdf
 
     Args:
         loss_module (DQNLoss or DDPGLoss): loss module where the target network should be updated.
         eps (scalar): epsilon in the update equation:
-            param = prev_param * eps + new_param * (1-eps)
-            default: 0.999
+            .. math::
+
+                \theta_t = \theta_{t-1} * \epsilon + \theta_t * (1-\epsilon)
+
+            Defaults to 0.999
     """
 
     def __init__(
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 52d58542442..ce3516f55db 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -1114,7 +1114,7 @@ class Recorder(TrainerHookBase):
             each iteration, otherwise the frame count can be underestimated.
             For logging, this parameter is important to normalize the reward.
             Finally, to compare different runs with different frame_skip,
-            one must normalize the frame count and rewards. Default is 1.
+            one must normalize the frame count and rewards. Defaults to ``1``.
         policy_exploration (ProbabilisticTDModule): a policy
             instance used for
 
@@ -1151,7 +1151,7 @@ def __init__(
         *,
         record_interval: int,
         record_frames: int,
-        frame_skip: int,
+        frame_skip: int = 1,
         policy_exploration: TensorDictModule,
         environment: EnvBase = None,
         exploration_mode: str = "random",
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 68e4a9e0bdb..503a53d48d1 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -69,7 +69,6 @@
 from torch import nn, optim
 from torchrl.collectors import MultiaSyncDataCollector
 from torchrl.data import CompositeSpec, TensorDictReplayBuffer
-from torchrl.data.postprocs import MultiStep
 from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
 from torchrl.data.replay_buffers.storages import LazyMemmapStorage
 from torchrl.envs import (
@@ -342,7 +341,6 @@ def _loss_value(
             tensordict, target_params=target_params
         ).squeeze(-1)
 
-    # td_error = pred_val - target_value
     loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
     td_error = (pred_val - target_value).pow(2)
 
@@ -790,50 +788,65 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 # The backend can be gym or dm_control
 backend = "gym"
 
-exp_name = "cheetah"
-
-# frame_skip batches multiple step together with a single action
-# If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
-# be adjusted to have a consistent total number of frames collected across
-# experiments.
+###############################################################################
+# .. note::
+#   ``frame_skip`` batches multiple step together with a single action
+#   If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
+#   be adjusted to have a consistent total number of frames collected across
+#   experiments. This is important as raising the frame-skip but keeping the
+#   total number of frames unchanged may seem like cheating: all things compared,
+#   a dataset of 10M elements collected with a frame-skip of 2 and another with
+#   a frame-skip of 1 actually have a ratio of interactions with the environment
+#   of 2:1!
+#
 frame_skip = 2
 from_pixels = False
+
+###############################################################################
 # Scaling the reward helps us control the signal magnitude for a more
 # efficient learning.
 reward_scaling = 5.0
 
+###############################################################################
 # Number of random steps used as for stats computation using ObservationNorm
 init_env_steps = 1000
 
+###############################################################################
 # Exploration: Number of frames before OU noise becomes null
 annealing_frames = 1000000 // frame_skip
 
 ###############################################################################
 # Collection
 # ~~~~~~~~~~
-
+#
 # We will execute the policy on cuda if available
 device = (
     torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
 )
 
+###############################################################################
 # Number of environments in each data collector
 env_per_collector = 2
 
+###############################################################################
 # Total frames we will use during training. Scale up to 500K - 1M for a more
 # meaningful training
 total_frames = 10000 // frame_skip
 
+###############################################################################
 # Number of frames returned by the collector at each iteration of the outer loop.
-# We expect batches from the collector to have a shape [env_per_collector, frames_per_batch // env_per_collector]
-frames_per_batch = env_per_collector * 1000 // frame_skip
+# We expect batches from the collector to have a shape
+# ``[env_per_collector, traj_len]`` where ``traj_len`` is the time dimension
+# of the samples. TorchRL's datacollectors are given a certain number of
+# environment and a number of frames to deliver in each batch. We can
+# We can easily calculate how many frames we need to ask to the collectors:
+traj_len = 50  # time length of the batches
+frames_per_batch = env_per_collector * traj_len // frame_skip
 max_frames_per_traj = 1000 // frame_skip
 init_random_frames = 0
-# We'll be using the MultiStep class to have a less myopic representation of
-# upcoming states
-n_steps_forward = 3
 
-# record every 10 batch collected
+###############################################################################
+# We will be recording the performance every 10 batch collected
 record_interval = 10
 
 ###############################################################################
@@ -843,14 +856,22 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 lr = 5e-4
 weight_decay = 0.0
 # UTD: Number of iterations of the inner loop
-update_to_data = 32
-batch_size = 128
+update_to_data = 4
+
+###############################################################################
+# Because we'll be sampling from a replay buffer that stores sub-trajectories
+# of length ``traj_len``, we need to compute how large the batch-size
+# is going to be based on the total number of elements we expect to find
+# divided by the trajectory length:
+batch_size = 128 // traj_len * frame_skip
+
 
 ###############################################################################
 # Model
 # ~~~~~
 
 gamma = 0.99
+lmbda = 0.2
 tau = 0.005  # Decay factor for the target network
 
 # Network specs
@@ -860,12 +881,11 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 ###############################################################################
 # Replay buffer
 # ~~~~~~~~~~~~~
-
-# If True, a Prioritized replay buffer will be used
-prb = True
+# If ``prb=True``, a Prioritized replay buffer will be used
+prb = False
+###############################################################################
 # Number of frames stored in the buffer
-traj_len_collector = frames_per_batch // env_per_collector
-buffer_size = min(total_frames, 1_000_000 // traj_len_collector)
+buffer_size = min(total_frames, 1_000_000 // traj_len)
 buffer_scratch_dir = "/tmp/"
 
 seed = 0
@@ -911,11 +931,20 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 #
 loss_module = DDPGLoss(actor, qnet)
 # let's use the TD(lambda) estimator!
-loss_module.make_value_estimator(ValueEstimators.TDLambda)
-target_net_updater = SoftUpdate(loss_module, eps=0.98)
+loss_module.make_value_estimator(ValueEstimators.TDLambda, gamma=gamma, lmbda=lmbda)
+target_net_updater = SoftUpdate(loss_module, eps=1 - tau)
 target_net_updater.init_()
 
 ###############################################################################
+# .. note::
+#   Off-policy usually dictates a TD(0) estimator. Here, we use a TD(:math:`\lambda`)
+#   estimator, which will introduce some bias as the trajectory that follows
+#   a certain state has been collected with an outdated policy.
+#   This trick, as the multi-step trick that can be used during data collection,
+#   are alternative versions of "hacks" that we usually find to work well in
+#   practice despite the fact that they introduce some bias in the return
+#   estimates.
+#
 # The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
 # exploration module:
 
@@ -973,19 +1002,6 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 # - the devices used to store data before the data is passed to the main
 #   process.
 #
-# Collectors also accept post-processing hooks.
-# For instance, the :class:`torchrl.data.postprocs.MultiStep` class passed as
-# ``postproc`` makes it so that the rewards of the ``n`` upcoming steps are
-# summed (with some discount factor) and the next observation is changed to
-# be the n-step forward observation. One could pass other transforms too:
-# using :class:`tensordict.nn.TensorDictModule` and
-# :class:`tensordict.nn.TensorDictSequential` we can seamlessly append a
-# wide range of transforms to our collector.
-
-if n_steps_forward > 0:
-    multistep = MultiStep(n_steps=n_steps_forward, gamma=gamma)
-else:
-    multistep = None
 
 collector = MultiaSyncDataCollector(
     create_env_fn=[create_env_fn, create_env_fn],
@@ -995,7 +1011,6 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     frames_per_batch=frames_per_batch,
     init_random_frames=init_random_frames,
     reset_at_each_iter=False,
-    postproc=multistep,
     split_trajs=True,
     device=device,  # device for execution
     storing_device=device,  # device where data will be stored and passed
@@ -1046,9 +1061,6 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 rewards_eval = []
 
 # Main loop
-norm_factor_training = (
-    sum(gamma**i for i in range(n_steps_forward)) if n_steps_forward else 1
-)
 
 collected_frames = 0
 pbar = tqdm.tqdm(total=total_frames)
@@ -1071,15 +1083,13 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     if collected_frames >= init_random_frames:
         for _ in range(update_to_data):
             # sample from replay buffer
-            sampled_tensordict = replay_buffer.sample(batch_size).clone()
+            sampled_tensordict = replay_buffer.sample()
 
             # Compute loss
             loss_dict = loss_module(sampled_tensordict)
 
             # optimize
-            loss_val = sum(
-                value for key, value in loss_dict.items() if key.startswith("loss")
-            )
+            loss_val = loss_dict["loss_actor"] + loss_dict["loss_value"]
             loss_val.backward()
             optimizer.step()
             optimizer.zero_grad()
@@ -1093,9 +1103,7 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     rewards.append(
         (
             i,
-            tensordict["next", "reward"].mean().item()
-            / norm_factor_training
-            / frame_skip,
+            tensordict["next", "reward"].mean().item() / frame_skip,
         )
     )
     td_record = recorder(None)
@@ -1103,7 +1111,7 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
         rewards_eval.append((i, td_record["r_evaluation"].item()))
     if len(rewards_eval):
         pbar.set_description(
-            f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}"
+            f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}, shape={sampled_tensordict.shape}"
         )
 
     # update the exploration strategy
@@ -1141,7 +1149,8 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 #
 # The key takeaways are:
 #
-# - How to use the :class:`torchrl.objectives.LossModule` class to register components;
+# - How to use the :class:`torchrl.objectives.LossModule` class to code up a new
+#   loss component;
 # - How to use (or not) a target network, and how to update its parameters;
 # - How to create an optimizer associated with a loss module.
 #

From bad0d6a22410e871af57d0c0f40c9e18a1365f2a Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 30 Mar 2023 16:10:03 +0100
Subject: [PATCH 58/89] init

---
 test/test_exploration.py                      | 201 +++++++++++++-----
 torchrl/collectors/collectors.py              |  37 +++-
 torchrl/data/tensor_specs.py                  |   5 +-
 .../modules/tensordict_module/exploration.py  |  66 +++++-
 4 files changed, 234 insertions(+), 75 deletions(-)

diff --git a/test/test_exploration.py b/test/test_exploration.py
index 0ebe0a9d97d..103de211d6e 100644
--- a/test/test_exploration.py
+++ b/test/test_exploration.py
@@ -8,10 +8,14 @@
 import pytest
 import torch
 from _utils_internal import get_available_devices
+from mocking_classes import ContinuousActionVecMockEnv
 from scipy.stats import ttest_1samp
 from tensordict.tensordict import TensorDict
 from torch import nn
+
+from torchrl.collectors import SyncDataCollector
 from torchrl.data import BoundedTensorSpec, CompositeSpec
+from torchrl.envs import SerialEnv
 from torchrl.envs.transforms.transforms import gSDENoise
 from torchrl.envs.utils import set_exploration_mode
 from torchrl.modules import SafeModule, SafeSequential
@@ -21,7 +25,7 @@
     NormalParamWrapper,
 )
 from torchrl.modules.models.exploration import LazygSDEModule
-from torchrl.modules.tensordict_module.actors import ProbabilisticActor
+from torchrl.modules.tensordict_module.actors import Actor, ProbabilisticActor
 from torchrl.modules.tensordict_module.exploration import (
     _OrnsteinUhlenbeckProcess,
     AdditiveGaussianWrapper,
@@ -30,70 +34,122 @@
 
 
 @pytest.mark.parametrize("device", get_available_devices())
-def test_ou(device, seed=0):
-    torch.manual_seed(seed)
-    td = TensorDict({"action": torch.randn(3) / 10}, batch_size=[], device=device)
-    ou = _OrnsteinUhlenbeckProcess(10.0, mu=2.0, x0=-4, sigma=0.1, sigma_min=0.01)
+class TestOrnsteinUhlenbeckProcessWrapper:
+    def test_ou(self, device, seed=0):
+        torch.manual_seed(seed)
+        td = TensorDict({"action": torch.randn(3) / 10}, batch_size=[], device=device)
+        ou = _OrnsteinUhlenbeckProcess(10.0, mu=2.0, x0=-4, sigma=0.1, sigma_min=0.01)
 
-    tds = []
-    for i in range(2000):
-        td = ou.add_sample(td)
-        tds.append(td.clone())
-        td.set_("action", torch.randn(3) / 10)
-        if i % 1000 == 0:
-            td.zero_()
+        tds = []
+        for i in range(2000):
+            td = ou.add_sample(td)
+            tds.append(td.clone())
+            td.set_("action", torch.randn(3) / 10)
+            if i % 1000 == 0:
+                td.zero_()
 
-    tds = torch.stack(tds, 0)
+        tds = torch.stack(tds, 0)
 
-    tset, pval_acc = ttest_1samp(tds.get("action")[950:1000, 0].cpu().numpy(), 2.0)
-    tset, pval_reg = ttest_1samp(tds.get("action")[:50, 0].cpu().numpy(), 2.0)
-    assert pval_acc > 0.05
-    assert pval_reg < 0.1
+        tset, pval_acc = ttest_1samp(tds.get("action")[950:1000, 0].cpu().numpy(), 2.0)
+        tset, pval_reg = ttest_1samp(tds.get("action")[:50, 0].cpu().numpy(), 2.0)
+        assert pval_acc > 0.05
+        assert pval_reg < 0.1
 
-    tset, pval_acc = ttest_1samp(tds.get("action")[1950:2000, 0].cpu().numpy(), 2.0)
-    tset, pval_reg = ttest_1samp(tds.get("action")[1000:1050, 0].cpu().numpy(), 2.0)
-    assert pval_acc > 0.05
-    assert pval_reg < 0.1
+        tset, pval_acc = ttest_1samp(tds.get("action")[1950:2000, 0].cpu().numpy(), 2.0)
+        tset, pval_reg = ttest_1samp(tds.get("action")[1000:1050, 0].cpu().numpy(), 2.0)
+        assert pval_acc > 0.05
+        assert pval_reg < 0.1
 
+    def test_ou_wrapper(self, device, d_obs=4, d_act=6, batch=32, n_steps=100, seed=0):
+        torch.manual_seed(seed)
+        net = NormalParamWrapper(nn.Linear(d_obs, 2 * d_act)).to(device)
+        module = SafeModule(net, in_keys=["observation"], out_keys=["loc", "scale"])
+        action_spec = BoundedTensorSpec(-torch.ones(d_act), torch.ones(d_act), (d_act,))
+        policy = ProbabilisticActor(
+            spec=action_spec,
+            module=module,
+            in_keys=["loc", "scale"],
+            distribution_class=TanhNormal,
+            default_interaction_mode="random",
+        ).to(device)
+        exploratory_policy = OrnsteinUhlenbeckProcessWrapper(policy)
 
-@pytest.mark.parametrize("device", get_available_devices())
-def test_ou_wrapper(device, d_obs=4, d_act=6, batch=32, n_steps=100, seed=0):
-    torch.manual_seed(seed)
-    net = NormalParamWrapper(nn.Linear(d_obs, 2 * d_act)).to(device)
-    module = SafeModule(net, in_keys=["observation"], out_keys=["loc", "scale"])
-    action_spec = BoundedTensorSpec(-torch.ones(d_act), torch.ones(d_act), (d_act,))
-    policy = ProbabilisticActor(
-        spec=action_spec,
-        module=module,
-        in_keys=["loc", "scale"],
-        distribution_class=TanhNormal,
-        default_interaction_mode="random",
-    ).to(device)
-    exploratory_policy = OrnsteinUhlenbeckProcessWrapper(policy)
+        tensordict = TensorDict(
+            batch_size=[batch],
+            source={
+                "observation": torch.randn(batch, d_obs, device=device),
+                "step_count": torch.zeros(batch, device=device),
+            },
+            device=device,
+        )
+        out_noexp = []
+        out = []
+        for i in range(n_steps):
+            tensordict_noexp = policy(tensordict.clone())
+            tensordict = exploratory_policy(tensordict.clone())
+            if i == 0:
+                assert (tensordict[exploratory_policy.ou.steps_key] == 1).all()
+            else:
+                assert not (tensordict[exploratory_policy.ou.steps_key] == 1).all()
 
-    tensordict = TensorDict(
-        batch_size=[batch],
-        source={"observation": torch.randn(batch, d_obs, device=device)},
-        device=device,
-    )
-    out_noexp = []
-    out = []
-    for _ in range(n_steps):
-        tensordict_noexp = policy(tensordict.select("observation"))
-        tensordict = exploratory_policy(tensordict)
-        out.append(tensordict.clone())
-        out_noexp.append(tensordict_noexp.clone())
-        tensordict.set_("observation", torch.randn(batch, d_obs, device=device))
-    out = torch.stack(out, 0)
-    out_noexp = torch.stack(out_noexp, 0)
-    assert (out_noexp.get("action") != out.get("action")).all()
-    assert (out.get("action") <= 1.0).all(), out.get("action").min()
-    assert (out.get("action") >= -1.0).all(), out.get("action").max()
+            out.append(tensordict.clone())
+            out_noexp.append(tensordict_noexp.clone())
+            tensordict.set_("observation", torch.randn(batch, d_obs, device=device))
+            tensordict["step_count"] += 1
+        out = torch.stack(out, 0)
+        out_noexp = torch.stack(out_noexp, 0)
+        assert (out_noexp.get("action") != out.get("action")).all()
+        assert (out.get("action") <= 1.0).all(), out.get("action").min()
+        assert (out.get("action") >= -1.0).all(), out.get("action").max()
+
+    @pytest.mark.parametrize("parallel_spec", [True, False])
+    @pytest.mark.parametrize("probabilistic", [True, False])
+    def test_collector(self, device, parallel_spec, probabilistic, seed=0):
+        torch.manual_seed(seed)
+        env = SerialEnv(2, ContinuousActionVecMockEnv)
+        # the module must work with the action spec of a single env or a serial env
+        if parallel_spec:
+            action_spec = env.action_spec
+        else:
+            action_spec = ContinuousActionVecMockEnv().action_spec
+        d_act = action_spec.shape[-1]
+        if probabilistic:
+            net = NormalParamWrapper(nn.LazyLinear(2 * d_act)).to(device)
+            module = SafeModule(
+                net,
+                in_keys=["observation"],
+                out_keys=["loc", "scale"],
+            )
+            policy = ProbabilisticActor(
+                module=module,
+                in_keys=["loc", "scale"],
+                distribution_class=TanhNormal,
+                default_interaction_mode="random",
+                spec=action_spec,
+            ).to(device)
+        else:
+            net = nn.LazyLinear(d_act).to(device)
+            policy = Actor(
+                net, in_keys=["observation"], out_keys=["action"], spec=action_spec
+            )
+
+        exploratory_policy = OrnsteinUhlenbeckProcessWrapper(policy)
+        exploratory_policy(env.reset())
+        collector = SyncDataCollector(
+            create_env_fn=env,
+            policy=exploratory_policy,
+            frames_per_batch=100,
+            total_frames=1000,
+        )
+        for _ in collector:
+            # check that we can run the policy
+            pass
+        return
 
 
 @pytest.mark.parametrize("device", get_available_devices())
-@pytest.mark.parametrize("spec_origin", ["spec", "policy", None])
 class TestAdditiveGaussian:
+    @pytest.mark.parametrize("spec_origin", ["spec", "policy", None])
     def test_additivegaussian_sd(
         self,
         device,
@@ -167,6 +223,7 @@ def test_additivegaussian_sd(
         )
         assert abs(noisy_action.std() - sigma_end) < 1e-1
 
+    @pytest.mark.parametrize("spec_origin", ["spec", "policy", None])
     def test_additivegaussian_wrapper(
         self, device, spec_origin, d_obs=4, d_act=6, batch=32, n_steps=100, seed=0
     ):
@@ -213,6 +270,42 @@ def test_additivegaussian_wrapper(
             if action_spec is not None:
                 assert action_spec.is_in(out.get("action"))
 
+    @pytest.mark.parametrize("parallel_spec", [True, False])
+    def test_collector(self, device, parallel_spec, seed=0):
+        torch.manual_seed(seed)
+        env = SerialEnv(2, ContinuousActionVecMockEnv)
+        # the module must work with the action spec of a single env or a serial env
+        if parallel_spec:
+            action_spec = env.action_spec
+        else:
+            action_spec = ContinuousActionVecMockEnv().action_spec
+        d_act = action_spec.shape[-1]
+        net = NormalParamWrapper(nn.LazyLinear(2 * d_act)).to(device)
+        module = SafeModule(
+            net,
+            in_keys=["observation"],
+            out_keys=["loc", "scale"],
+        )
+        policy = ProbabilisticActor(
+            module=module,
+            in_keys=["loc", "scale"],
+            distribution_class=TanhNormal,
+            default_interaction_mode="random",
+            spec=action_spec,
+        ).to(device)
+        exploratory_policy = AdditiveGaussianWrapper(policy, safe=False)
+        exploratory_policy(env.reset())
+        collector = SyncDataCollector(
+            create_env_fn=env,
+            policy=exploratory_policy,
+            frames_per_batch=100,
+            total_frames=1000,
+        )
+        for _ in collector:
+            # check that we can run the policy
+            pass
+        return
+
 
 @pytest.mark.parametrize("state_dim", [7])
 @pytest.mark.parametrize("action_dim", [5, 11])
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 5865df5b18f..ea311794212 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -31,8 +31,8 @@
     _check_for_faulty_process,
     accept_remote_rref_udf_invocation,
     prod,
-    VERBOSE,
     RL_WARNINGS,
+    VERBOSE,
 )
 from torchrl.collectors.utils import split_trajectories
 from torchrl.data.tensor_specs import TensorSpec
@@ -574,32 +574,51 @@ def __init__(
             traj_ids,
         )
 
+        with torch.no_grad():
+            self._tensordict_out = env.fake_tensordict()
         if (
             hasattr(self.policy, "spec")
             and self.policy.spec is not None
-            and all(v is not None for v in self.policy.spec.values())
-            and set(self.policy.spec.keys(True, True)) == set(self.policy.out_keys)
+            and all(
+                v is not None for v in self.policy.spec.values()
+            )  # if a spec is None, we don't know anything about it
+            # and set(self.policy.spec.keys(True, True)) == set(self.policy.out_keys)
+            and any(
+                key not in self._tensordict_out.keys(isinstance(key, tuple))
+                for key in self.policy.spec
+            )
         ):
             # if policy spec is non-empty, all the values are not None and the keys
             # match the out_keys we assume the user has given all relevant information
-            self._tensordict_out = env.fake_tensordict().to_tensordict()
-            self._tensordict_out.update(self.policy.spec.zero())
+            # the policy could have more keys than the env:
+            for key, spec in self.policy.spec.items():  # this may break for nested keys
+                if key in self._tensordict_out.keys():
+                    continue
+                if spec.ndim < self._tensordict_out.ndim:
+                    spec = spec.expand(self._tensordict_out.shape)
+                self._tensordict_out.set(key, spec.zero())
             self._tensordict_out = (
                 self._tensordict_out.unsqueeze(-1)
                 .expand(*env.batch_size, self.frames_per_batch)
-                .to_tensordict()
+                .clone()
             )
-        else:
+        elif hasattr(self.policy, "spec") and self.policy.spec is not None:
+            # reach this if the policy has specs and they match with the fake tensordict
+            self._tensordict_out = (
+                self._tensordict_out.unsqueeze(-1)
+                .expand(*env.batch_size, self.frames_per_batch)
+                .clone()
+            )
+        elif not hasattr(self.policy, "spec") or self.policy.spec is None:
             # otherwise, we perform a small number of steps with the policy to
             # determine the relevant keys with which to pre-populate _tensordict_out.
             # See #505 for additional context.
             with torch.no_grad():
-                self._tensordict_out = env.fake_tensordict()
                 self._tensordict_out = self._tensordict_out.to(self.device)
                 self._tensordict_out = self.policy(self._tensordict_out).unsqueeze(-1)
             self._tensordict_out = (
                 self._tensordict_out.expand(*env.batch_size, self.frames_per_batch)
-                .to_tensordict()
+                .clone()
                 .zero_()
             )
         # in addition to outputs of the policy, we add traj_ids and step_count to
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
index e07796028b7..2a3996c8681 100644
--- a/torchrl/data/tensor_specs.py
+++ b/torchrl/data/tensor_specs.py
@@ -2437,7 +2437,10 @@ def clone(self) -> CompositeSpec:
         except RuntimeError:
             device = self._device
         return self.__class__(
-            {key: item.clone() for key, item in self.items()},
+            {
+                key: item.clone() if item is not None else None
+                for key, item in self.items()
+            },
             device=device,
             shape=self.shape,
         )
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
index 4d8feaef8b8..271ec60ff4f 100644
--- a/torchrl/modules/tensordict_module/exploration.py
+++ b/torchrl/modules/tensordict_module/exploration.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+import warnings
 from typing import Optional, Union
 
 import numpy as np
@@ -11,7 +11,11 @@
 from tensordict.tensordict import TensorDictBase
 from tensordict.utils import expand_as_right
 
-from torchrl.data.tensor_specs import CompositeSpec, TensorSpec
+from torchrl.data.tensor_specs import (
+    CompositeSpec,
+    TensorSpec,
+    UnboundedContinuousTensorSpec,
+)
 from torchrl.envs.utils import exploration_mode
 from torchrl.modules.tensordict_module.common import _forward_hook_safe_action
 
@@ -178,14 +182,19 @@ def __init__(
         self.register_buffer("std", torch.tensor([std]))
         self.register_buffer("sigma", torch.tensor([sigma_init]))
         self.action_key = action_key
-        self.spec = (
-            spec
-            if spec is not None
-            else policy.spec
-            if hasattr(policy, "spec")
-            else None
-        )
+        self.out_keys = list(self.td_module.out_keys)
+        if spec is not None:
+            if not isinstance(spec, CompositeSpec) and len(self.out_keys) >= 1:
+                spec = CompositeSpec({self.out_keys[0]: spec})
+            self._spec = spec
+        elif hasattr(self.td_module, "_spec"):
+            self._spec = self.td_module._spec.clone()
+        else:
+            self._spec = CompositeSpec({key: None for key in policy.in_keys})
+
         self.safe = safe
+        if self.safe:
+            self.register_forward_hook(_forward_hook_safe_action)
 
     def step(self, frames: int = 1) -> None:
         """A step of sigma decay.
@@ -340,8 +349,21 @@ def __init__(
         self.annealing_num_steps = annealing_num_steps
         self.register_buffer("eps", torch.tensor([eps_init]))
         self.out_keys = list(self.td_module.out_keys) + self.ou.out_keys
+        noise_key = self.ou.noise_key
+        steps_key = self.ou.steps_key
+
+        ou_specs = {
+            noise_key: None,
+            steps_key: UnboundedContinuousTensorSpec(
+                shape=(*self.td_module._spec.shape, 1),
+                device=self.td_module._spec.device,
+                dtype=torch.int64,
+            ),
+        }
         self._spec = CompositeSpec(
-            **self.td_module._spec, **{key: None for key in self.ou.out_keys}
+            **self.td_module._spec,
+            **ou_specs,
+            shape=self.td_module._spec.shape,
         )
         if len(set(self.out_keys)) != len(self.out_keys):
             raise RuntimeError(f"Got multiple identical output keys: {self.out_keys}")
@@ -349,6 +371,10 @@ def __init__(
         if self.safe:
             self.register_forward_hook(_forward_hook_safe_action)
 
+    @property
+    def spec(self):
+        return self._spec
+
     def step(self, frames: int = 1) -> None:
         """Updates the eps noise factor.
 
@@ -375,6 +401,17 @@ def step(self, frames: int = 1) -> None:
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = super().forward(tensordict)
         if exploration_mode() == "random" or exploration_mode() is None:
+            if "step_count" not in tensordict.keys():
+                warnings.warn(
+                    f"The tensordict passed to {self.__class__.__name__} appears to be "
+                    f"missing the 'step_count' entry. This entry is used to "
+                    f"reset the noise at the beginning of a trajectory, without it "
+                    f"the behaviour of this exploration method is undefined. "
+                    f"This is allowed for BC compatibility purposes but it will be deprecated soon! "
+                    f"To create a 'step_count' entry, simply append a StepCounter "
+                    f"transform to your environment with `env = TransformedEnv(env, StepCounter())`."
+                )
+                tensordict.set("step_count", torch.ones(tensordict.shape))
             tensordict = self.ou.add_sample(tensordict, self.eps.item())
         return tensordict
 
@@ -421,10 +458,13 @@ def noise_key(self):
     def steps_key(self):
         return self._steps_key  # + str(id(self))
 
-    def _make_noise_pair(self, tensordict: TensorDictBase) -> None:
+    def _make_noise_pair(self, tensordict: TensorDictBase, is_init=None) -> None:
+        if is_init is not None:
+            tensordict = tensordict.get_sub_tensordict(is_init.view(tensordict.shape))
         tensordict.set(
             self.noise_key,
             torch.zeros(tensordict.get(self.key).shape, device=tensordict.device),
+            inplace=is_init is not None,
         )
         tensordict.set(
             self.steps_key,
@@ -433,6 +473,7 @@ def _make_noise_pair(self, tensordict: TensorDictBase) -> None:
                 dtype=torch.long,
                 device=tensordict.device,
             ),
+            inplace=is_init is not None,
         )
 
     def add_sample(
@@ -441,6 +482,9 @@ def add_sample(
 
         if self.noise_key not in tensordict.keys():
             self._make_noise_pair(tensordict)
+        step_count = tensordict.get("step_count", None)
+        if step_count is not None and not step_count.all():
+            self._make_noise_pair(tensordict, step_count == 0)
 
         prev_noise = tensordict.get(self.noise_key)
         prev_noise = prev_noise + self.x0

From 2fe0f82fe54f6f4c5abe866552665efbcd1608b6 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 30 Mar 2023 16:15:13 +0100
Subject: [PATCH 59/89] amend

---
 torchrl/modules/tensordict_module/common.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/torchrl/modules/tensordict_module/common.py b/torchrl/modules/tensordict_module/common.py
index 0b12eaa2e82..0b540c2b55f 100644
--- a/torchrl/modules/tensordict_module/common.py
+++ b/torchrl/modules/tensordict_module/common.py
@@ -71,7 +71,14 @@ def _forward_hook_safe_action(module, tensordict_in, tensordict_out):
         for _spec, _key in zip(values, keys):
             if _spec is None:
                 continue
-            if not _spec.is_in(tensordict_out.get(_key)):
+            item = tensordict_out.get(_key, None)
+            if item is None:
+                # this will happen when an exploration (e.g. OU) writes a key only
+                # during exploration, but is missing otherwise.
+                # it's fine since what we want here it to make sure that a key
+                # is within bounds if it is present
+                continue
+            if not _spec.is_in(item):
                 try:
                     tensordict_out.set_(
                         _key,

From 91fa500d98bf35517f873667eba6385633f11966 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 30 Mar 2023 21:00:40 +0100
Subject: [PATCH 60/89] amend

---
 test/test_exploration.py                      | 21 +++++
 .../modules/tensordict_module/exploration.py  | 82 ++++++++++++++-----
 2 files changed, 83 insertions(+), 20 deletions(-)

diff --git a/test/test_exploration.py b/test/test_exploration.py
index 103de211d6e..ef94cc7897d 100644
--- a/test/test_exploration.py
+++ b/test/test_exploration.py
@@ -29,10 +29,31 @@
 from torchrl.modules.tensordict_module.exploration import (
     _OrnsteinUhlenbeckProcess,
     AdditiveGaussianWrapper,
+    EGreedyWrapper,
     OrnsteinUhlenbeckProcessWrapper,
 )
 
 
+@pytest.mark.parametrize("eps_init", [0.0, 0.5, 1.0])
+class TestEGreedy:
+    def test_egreedy(self, eps_init):
+        torch.manual_seed(0)
+        spec = BoundedTensorSpec(1, 1, torch.Size([4]))
+        module = torch.nn.Linear(4, 4, bias=False)
+        policy = Actor(spec=spec, module=module)
+        explorative_policy = EGreedyWrapper(policy, eps_init=eps_init, eps_end=eps_init)
+        td = TensorDict({"observation": torch.zeros(10, 4)}, batch_size=[10])
+        action = explorative_policy(td).get("action")
+        if eps_init == 0:
+            assert (action == 0).all()
+        elif eps_init == 1:
+            assert (action == 1).all()
+        else:
+            assert (action == 1).any()
+            assert (action == 0).any()
+            assert ((action == 1) | (action == 0)).all()
+
+
 @pytest.mark.parametrize("device", get_available_devices())
 class TestOrnsteinUhlenbeckProcessWrapper:
     def test_ou(self, device, seed=0):
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
index 271ec60ff4f..7e573f1cce5 100644
--- a/torchrl/modules/tensordict_module/exploration.py
+++ b/torchrl/modules/tensordict_module/exploration.py
@@ -58,7 +58,7 @@ class EGreedyWrapper(TensorDictModuleWrapper):
         >>> print(explorative_policy(td).get("action"))
         tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
                 [ 0.0000,  0.0000,  0.0000,  0.0000],
-                [-0.6986, -0.9366, -0.5837,  0.8596],
+                [ 0.9055, -0.9277, -0.6295, -0.2532],
                 [ 0.0000,  0.0000,  0.0000,  0.0000],
                 [ 0.0000,  0.0000,  0.0000,  0.0000],
                 [ 0.0000,  0.0000,  0.0000,  0.0000],
@@ -86,13 +86,20 @@ def __init__(
         self.annealing_num_steps = annealing_num_steps
         self.register_buffer("eps", torch.tensor([eps_init]))
         self.action_key = action_key
-        self.spec = (
-            spec
-            if spec is not None
-            else policy.spec
-            if hasattr(policy, "spec")
-            else None
-        )
+        if spec is not None:
+            if not isinstance(spec, CompositeSpec) and len(self.out_keys) >= 1:
+                spec = CompositeSpec({action_key: spec}, shape=spec.shape[:-1])
+            self._spec = spec
+        elif hasattr(self.td_module, "_spec"):
+            self._spec = self.td_module._spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
+        elif hasattr(self.td_module, "spec"):
+            self._spec = self.td_module.spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
+        else:
+            self._spec = CompositeSpec({key: None for key in policy.out_keys})
 
     def step(self, frames: int = 1) -> None:
         """A step of epsilon decay.
@@ -163,6 +170,7 @@ class AdditiveGaussianWrapper(TensorDictModuleWrapper):
     def __init__(
         self,
         policy: TensorDictModule,
+        *,
         sigma_init: float = 1.0,
         sigma_end: float = 0.1,
         annealing_num_steps: int = 1000,
@@ -183,19 +191,33 @@ def __init__(
         self.register_buffer("sigma", torch.tensor([sigma_init]))
         self.action_key = action_key
         self.out_keys = list(self.td_module.out_keys)
+        if action_key not in self.out_keys:
+            raise RuntimeError(
+                f"The action key {action_key} was not found in the td_module out_keys {self.td_module.out_keys}."
+            )
         if spec is not None:
             if not isinstance(spec, CompositeSpec) and len(self.out_keys) >= 1:
-                spec = CompositeSpec({self.out_keys[0]: spec})
+                spec = CompositeSpec({action_key: spec}, shape=spec.shape[:-1])
             self._spec = spec
         elif hasattr(self.td_module, "_spec"):
             self._spec = self.td_module._spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
+        elif hasattr(self.td_module, "spec"):
+            self._spec = self.td_module.spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
         else:
-            self._spec = CompositeSpec({key: None for key in policy.in_keys})
+            self._spec = CompositeSpec({key: None for key in policy.out_keys})
 
         self.safe = safe
         if self.safe:
             self.register_forward_hook(_forward_hook_safe_action)
 
+    @property
+    def spec(self):
+        return self._spec
+
     def step(self, frames: int = 1) -> None:
         """A step of sigma decay.
 
@@ -222,8 +244,7 @@ def _add_noise(self, action: torch.Tensor) -> torch.Tensor:
         ).to(action.device)
         action = action + noise * sigma
         spec = self.spec
-        if isinstance(spec, CompositeSpec):
-            spec = spec[self.action_key]
+        spec = spec[self.action_key]
         if spec is not None:
             action = spec.project(action)
         elif self.safe:
@@ -283,8 +304,11 @@ class OrnsteinUhlenbeckProcessWrapper(TensorDictModuleWrapper):
             default: None
         n_steps_annealing (int): number of steps for the sigma annealing.
             default: 1000
-        key (str): key of the action to be modified.
+        action_key (str): key of the action to be modified.
             default: "action"
+        spec (TensorSpec, optional): if provided, the sampled action will be
+            projected onto the valid action space once explored. If not provided,
+            the exploration wrapper will attempt to recover it from the policy.
         safe (bool): if ``True``, actions that are out of bounds given the action specs will be projected in the space
             given the :obj:`TensorSpec.project` heuristic.
             default: True
@@ -315,6 +339,7 @@ class OrnsteinUhlenbeckProcessWrapper(TensorDictModuleWrapper):
     def __init__(
         self,
         policy: TensorDictModule,
+        *,
         eps_init: float = 1.0,
         eps_end: float = 0.1,
         annealing_num_steps: int = 1000,
@@ -325,9 +350,16 @@ def __init__(
         x0: Optional[Union[torch.Tensor, np.ndarray]] = None,
         sigma_min: Optional[float] = None,
         n_steps_annealing: int = 1000,
-        key: str = "action",
+        action_key: str = "action",
+        spec: TensorSpec = None,
         safe: bool = True,
+        key: str = None,
     ):
+        if key is not None:
+            action_key = key
+            warnings.warn(
+                f"the 'key' keyword argument of {type(self)} has been renamed 'action_key'. The 'key' entry will be deprecated soon."
+            )
         super().__init__(policy)
         self.ou = _OrnsteinUhlenbeckProcess(
             theta=theta,
@@ -337,7 +369,7 @@ def __init__(
             x0=x0,
             sigma_min=sigma_min,
             n_steps_annealing=n_steps_annealing,
-            key=key,
+            key=action_key,
         )
         self.register_buffer("eps_init", torch.tensor([eps_init]))
         self.register_buffer("eps_end", torch.tensor([eps_end]))
@@ -360,11 +392,21 @@ def __init__(
                 dtype=torch.int64,
             ),
         }
-        self._spec = CompositeSpec(
-            **self.td_module._spec,
-            **ou_specs,
-            shape=self.td_module._spec.shape,
-        )
+        if spec is not None:
+            if not isinstance(spec, CompositeSpec) and len(self.out_keys) >= 1:
+                spec = CompositeSpec({action_key: spec}, shape=spec.shape[:-1])
+            self._spec = spec
+        elif hasattr(self.td_module, "_spec"):
+            self._spec = self.td_module._spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
+        elif hasattr(self.td_module, "spec"):
+            self._spec = self.td_module.spec.clone()
+            if action_key not in self._spec.keys():
+                self._spec[action_key] = None
+        else:
+            self._spec = CompositeSpec({key: None for key in policy.out_keys})
+        self._spec.update(ou_specs)
         if len(set(self.out_keys)) != len(self.out_keys):
             raise RuntimeError(f"Got multiple identical output keys: {self.out_keys}")
         self.safe = safe

From c251d1abc8a752b405c42d3d2c2480953bfb8a31 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 31 Mar 2023 08:40:42 +0100
Subject: [PATCH 61/89] bf

---
 test/test_collector.py           |  2 +-
 test/test_exploration.py         | 19 ++++++++++++++-----
 torchrl/collectors/collectors.py |  8 ++++----
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/test/test_collector.py b/test/test_collector.py
index 07e1d591607..474cc819a60 100644
--- a/test/test_collector.py
+++ b/test/test_collector.py
@@ -914,9 +914,9 @@ def make_env():
 @pytest.mark.parametrize(
     "collector_class",
     [
+        SyncDataCollector,
         MultiaSyncDataCollector,
         MultiSyncDataCollector,
-        SyncDataCollector,
     ],
 )
 @pytest.mark.parametrize("init_random_frames", [0, 50])
diff --git a/test/test_exploration.py b/test/test_exploration.py
index ef94cc7897d..b19ce9eb3c3 100644
--- a/test/test_exploration.py
+++ b/test/test_exploration.py
@@ -110,13 +110,20 @@ def test_ou_wrapper(self, device, d_obs=4, d_act=6, batch=32, n_steps=100, seed=
             tensordict = exploratory_policy(tensordict.clone())
             if i == 0:
                 assert (tensordict[exploratory_policy.ou.steps_key] == 1).all()
+            elif i == n_steps // 2 + 1:
+                assert (
+                    tensordict[exploratory_policy.ou.steps_key][: batch // 2] == 1
+                ).all()
             else:
-                assert not (tensordict[exploratory_policy.ou.steps_key] == 1).all()
+                assert not (tensordict[exploratory_policy.ou.steps_key] == 1).any()
 
             out.append(tensordict.clone())
             out_noexp.append(tensordict_noexp.clone())
             tensordict.set_("observation", torch.randn(batch, d_obs, device=device))
             tensordict["step_count"] += 1
+            if i == n_steps // 2:
+                tensordict["step_count"][: batch // 2] = 0
+
         out = torch.stack(out, 0)
         out_noexp = torch.stack(out_noexp, 0)
         assert (out_noexp.get("action") != out.get("action")).all()
@@ -127,12 +134,12 @@ def test_ou_wrapper(self, device, d_obs=4, d_act=6, batch=32, n_steps=100, seed=
     @pytest.mark.parametrize("probabilistic", [True, False])
     def test_collector(self, device, parallel_spec, probabilistic, seed=0):
         torch.manual_seed(seed)
-        env = SerialEnv(2, ContinuousActionVecMockEnv)
+        env = SerialEnv(2, ContinuousActionVecMockEnv, device=device)
         # the module must work with the action spec of a single env or a serial env
         if parallel_spec:
             action_spec = env.action_spec
         else:
-            action_spec = ContinuousActionVecMockEnv().action_spec
+            action_spec = ContinuousActionVecMockEnv(device=device).action_spec
         d_act = action_spec.shape[-1]
         if probabilistic:
             net = NormalParamWrapper(nn.LazyLinear(2 * d_act)).to(device)
@@ -161,6 +168,7 @@ def test_collector(self, device, parallel_spec, probabilistic, seed=0):
             policy=exploratory_policy,
             frames_per_batch=100,
             total_frames=1000,
+            device=device,
         )
         for _ in collector:
             # check that we can run the policy
@@ -294,12 +302,12 @@ def test_additivegaussian_wrapper(
     @pytest.mark.parametrize("parallel_spec", [True, False])
     def test_collector(self, device, parallel_spec, seed=0):
         torch.manual_seed(seed)
-        env = SerialEnv(2, ContinuousActionVecMockEnv)
+        env = SerialEnv(2, ContinuousActionVecMockEnv, device=device)
         # the module must work with the action spec of a single env or a serial env
         if parallel_spec:
             action_spec = env.action_spec
         else:
-            action_spec = ContinuousActionVecMockEnv().action_spec
+            action_spec = ContinuousActionVecMockEnv(device=device).action_spec
         d_act = action_spec.shape[-1]
         net = NormalParamWrapper(nn.LazyLinear(2 * d_act)).to(device)
         module = SafeModule(
@@ -321,6 +329,7 @@ def test_collector(self, device, parallel_spec, seed=0):
             policy=exploratory_policy,
             frames_per_batch=100,
             total_frames=1000,
+            device=device,
         )
         for _ in collector:
             # check that we can run the policy
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index ea311794212..72c0ab71b65 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -580,19 +580,19 @@ def __init__(
             hasattr(self.policy, "spec")
             and self.policy.spec is not None
             and all(
-                v is not None for v in self.policy.spec.values()
+                v is not None for v in self.policy.spec.values(True, True)
             )  # if a spec is None, we don't know anything about it
             # and set(self.policy.spec.keys(True, True)) == set(self.policy.out_keys)
             and any(
                 key not in self._tensordict_out.keys(isinstance(key, tuple))
-                for key in self.policy.spec
+                for key in self.policy.spec.keys(True, True)
             )
         ):
             # if policy spec is non-empty, all the values are not None and the keys
             # match the out_keys we assume the user has given all relevant information
             # the policy could have more keys than the env:
-            for key, spec in self.policy.spec.items():  # this may break for nested keys
-                if key in self._tensordict_out.keys():
+            for key, spec in self.policy.spec.items(True, True):
+                if key in self._tensordict_out.keys(isinstance(key, tuple)):
                     continue
                 if spec.ndim < self._tensordict_out.ndim:
                     spec = spec.expand(self._tensordict_out.shape)

From 495acff88e4e008ec738ddf48493d3f6d231974f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 31 Mar 2023 11:49:24 +0100
Subject: [PATCH 62/89] bf

---
 test/test_collector.py           |  2 +-
 test/test_exploration.py         | 10 +++++--
 torchrl/collectors/collectors.py | 14 ++++++++--
 torchrl/modules/models/models.py | 46 ++++++++++++++++++--------------
 4 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/test/test_collector.py b/test/test_collector.py
index 474cc819a60..4dc92491fe7 100644
--- a/test/test_collector.py
+++ b/test/test_collector.py
@@ -920,7 +920,7 @@ def make_env():
     ],
 )
 @pytest.mark.parametrize("init_random_frames", [0, 50])
-@pytest.mark.parametrize("explicit_spec", [True, False])
+@pytest.mark.parametrize("explicit_spec", [False, True])
 @pytest.mark.parametrize("split_trajs", [True, False])
 def test_collector_output_keys(
     collector_class, init_random_frames, explicit_spec, split_trajs
diff --git a/test/test_exploration.py b/test/test_exploration.py
index b19ce9eb3c3..7543e172ff1 100644
--- a/test/test_exploration.py
+++ b/test/test_exploration.py
@@ -134,7 +134,10 @@ def test_ou_wrapper(self, device, d_obs=4, d_act=6, batch=32, n_steps=100, seed=
     @pytest.mark.parametrize("probabilistic", [True, False])
     def test_collector(self, device, parallel_spec, probabilistic, seed=0):
         torch.manual_seed(seed)
-        env = SerialEnv(2, ContinuousActionVecMockEnv, device=device)
+        env = SerialEnv(
+            2,
+            ContinuousActionVecMockEnv,
+        )
         # the module must work with the action spec of a single env or a serial env
         if parallel_spec:
             action_spec = env.action_spec
@@ -302,7 +305,10 @@ def test_additivegaussian_wrapper(
     @pytest.mark.parametrize("parallel_spec", [True, False])
     def test_collector(self, device, parallel_spec, seed=0):
         torch.manual_seed(seed)
-        env = SerialEnv(2, ContinuousActionVecMockEnv, device=device)
+        env = SerialEnv(
+            2,
+            ContinuousActionVecMockEnv,
+        )
         # the module must work with the action spec of a single env or a serial env
         if parallel_spec:
             action_spec = env.action_spec
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 72c0ab71b65..b7762f12d3c 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -602,16 +602,26 @@ def __init__(
                 .expand(*env.batch_size, self.frames_per_batch)
                 .clone()
             )
-        elif hasattr(self.policy, "spec") and self.policy.spec is not None:
+        elif (
+            hasattr(self.policy, "spec")
+            and self.policy.spec is not None
+            and all(v is not None for v in self.policy.spec.values(True, True))
+            and all(
+                key in self._tensordict_out.keys(isinstance(key, tuple))
+                for key in self.policy.spec.keys(True, True)
+            )
+        ):
             # reach this if the policy has specs and they match with the fake tensordict
             self._tensordict_out = (
                 self._tensordict_out.unsqueeze(-1)
                 .expand(*env.batch_size, self.frames_per_batch)
                 .clone()
             )
-        elif not hasattr(self.policy, "spec") or self.policy.spec is None:
+        else:
             # otherwise, we perform a small number of steps with the policy to
             # determine the relevant keys with which to pre-populate _tensordict_out.
+            # This is the safest thing to do if the spec has None fields or if there is
+            # no spec at all.
             # See #505 for additional context.
             with torch.no_grad():
                 self._tensordict_out = self._tensordict_out.to(self.device)
diff --git a/torchrl/modules/models/models.py b/torchrl/modules/models/models.py
index 575c12daa74..494ba536e3d 100644
--- a/torchrl/modules/models/models.py
+++ b/torchrl/modules/models/models.py
@@ -670,7 +670,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 def ddpg_init_last_layer(
-    last_layer: nn.Module,
+    module: nn.Sequential,
     scale: float = 6e-4,
     device: Optional[DEVICE_TYPING] = None,
 ) -> None:
@@ -680,6 +680,12 @@ def ddpg_init_last_layer(
     https://arxiv.org/pdf/1509.02971.pdf
 
     """
+    for last_layer in reversed(module):
+        if isinstance(last_layer, (nn.Linear, nn.Conv2d)):
+            break
+    else:
+        raise RuntimeError("Could not find a nn.Linear / nn.Conv2d to initialize.")
+
     last_layer.weight.data.copy_(
         torch.rand_like(last_layer.weight.data, device=device) * scale - scale / 2
     )
@@ -767,7 +773,7 @@ def __init__(
         mlp_net_default_kwargs.update(mlp_net_kwargs)
         self.convnet = ConvNet(device=device, **conv_net_default_kwargs)
         self.mlp = MLP(device=device, **mlp_net_default_kwargs)
-        ddpg_init_last_layer(self.mlp[-1], 6e-4, device=device)
+        ddpg_init_last_layer(self.mlp, 6e-4, device=device)
 
     def forward(self, observation: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         hidden = self.convnet(observation)
@@ -816,7 +822,7 @@ def __init__(
         mlp_net_kwargs = mlp_net_kwargs if mlp_net_kwargs is not None else {}
         mlp_net_default_kwargs.update(mlp_net_kwargs)
         self.mlp = MLP(device=device, **mlp_net_default_kwargs)
-        ddpg_init_last_layer(self.mlp[-1], 6e-3, device=device)
+        ddpg_init_last_layer(self.mlp, 6e-3, device=device)
 
     def forward(self, observation: torch.Tensor) -> torch.Tensor:
         action = self.mlp(observation)
@@ -897,7 +903,7 @@ def __init__(
         mlp_net_default_kwargs.update(mlp_net_kwargs)
         self.convnet = ConvNet(device=device, **conv_net_default_kwargs)
         self.mlp = MLP(device=device, **mlp_net_default_kwargs)
-        ddpg_init_last_layer(self.mlp[-1], 6e-4, device=device)
+        ddpg_init_last_layer(self.mlp, 6e-4, device=device)
 
     def forward(self, observation: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
         hidden = torch.cat([self.convnet(observation), action], -1)
@@ -917,23 +923,23 @@ class DdpgMlpQNet(nn.Module):
     Args:
         mlp_net_kwargs_net1 (dict, optional): kwargs for MLP.
             Default: {
-            'in_features': None,
-            'out_features': 400,
-            'depth': 0,
-            'num_cells': [],
-            'activation_class': nn.ELU,
-            'bias_last_layer': True,
-            'activate_last_layer': True,
-        }
+                'in_features': None,
+                'out_features': 400,
+                'depth': 0,
+                'num_cells': [],
+                'activation_class': nn.ELU,
+                'bias_last_layer': True,
+                'activate_last_layer': True,
+            }
         mlp_net_kwargs_net2
             Default: {
-            'in_features': None,
-            'out_features': 1,
-            'depth': 1,
-            'num_cells': [300, ],
-            'activation_class': nn.ELU,
-            'bias_last_layer': True,
-        }
+                'in_features': None,
+                'out_features': 1,
+                'depth': 1,
+                'num_cells': [300, ],
+                'activation_class': nn.ELU,
+                'bias_last_layer': True,
+            }
         device (Optional[DEVICE_TYPING]): device to create the module on.
     """
 
@@ -973,7 +979,7 @@ def __init__(
         )
         mlp2_net_default_kwargs.update(mlp_net_kwargs_net2)
         self.mlp2 = MLP(device=device, **mlp2_net_default_kwargs)
-        ddpg_init_last_layer(self.mlp2[-1], 6e-3, device=device)
+        ddpg_init_last_layer(self.mlp2, 6e-3, device=device)
 
     def forward(self, observation: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
         value = self.mlp2(torch.cat([self.mlp1(observation), action], -1))

From 59da7a2732f040ef342cdd7fd0e4a8d89737c88b Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 31 Mar 2023 12:11:46 +0100
Subject: [PATCH 63/89] amend

---
 torchrl/collectors/collectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index b7762f12d3c..be81aa9c70c 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -594,7 +594,7 @@ def __init__(
             for key, spec in self.policy.spec.items(True, True):
                 if key in self._tensordict_out.keys(isinstance(key, tuple)):
                     continue
-                if spec.ndim < self._tensordict_out.ndim:
+                if self.policy.spec.ndim < self._tensordict_out.ndim:
                     spec = spec.expand(self._tensordict_out.shape)
                 self._tensordict_out.set(key, spec.zero())
             self._tensordict_out = (

From 2498b5fbd1a3623ba0b985363ee37fcd79d51cad Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Fri, 31 Mar 2023 13:44:29 +0100
Subject: [PATCH 64/89] amend

---
 torchrl/collectors/collectors.py |  7 +++--
 torchrl/data/tensor_specs.py     | 54 ++++++++++++++++----------------
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index be81aa9c70c..dd2505a78b3 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -591,11 +591,12 @@ def __init__(
             # if policy spec is non-empty, all the values are not None and the keys
             # match the out_keys we assume the user has given all relevant information
             # the policy could have more keys than the env:
-            for key, spec in self.policy.spec.items(True, True):
+            policy_spec = self.policy.spec
+            if policy_spec.ndim < self._tensordict_out.ndim:
+                policy_spec = policy_spec.expand(self._tensordict_out.shape)
+            for key, spec in policy_spec.items(True, True):
                 if key in self._tensordict_out.keys(isinstance(key, tuple)):
                     continue
-                if self.policy.spec.ndim < self._tensordict_out.ndim:
-                    spec = spec.expand(self._tensordict_out.shape)
                 self._tensordict_out.set(key, spec.zero())
             self._tensordict_out = (
                 self._tensordict_out.unsqueeze(-1)
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
index 2a3996c8681..73cc015d6e8 100644
--- a/torchrl/data/tensor_specs.py
+++ b/torchrl/data/tensor_specs.py
@@ -821,12 +821,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             n=shape[-1], shape=shape, device=self.device, dtype=self.dtype
@@ -1065,12 +1065,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             minimum=self.space.minimum.expand(shape).clone(),
@@ -1260,12 +1260,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(shape=shape, device=self.device, dtype=self.dtype)
 
@@ -1347,12 +1347,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(shape=shape, device=self.device, dtype=self.dtype)
 
@@ -1537,12 +1537,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             nvec=nvecs, shape=shape, device=self.device, dtype=self.dtype
@@ -1676,12 +1676,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             n=self.space.n, shape=shape, device=self.device, dtype=self.dtype
@@ -1762,12 +1762,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             n=shape[-1], shape=shape, device=self.device, dtype=self.dtype
@@ -1981,12 +1981,12 @@ def expand(self, *shape):
             shape = shape[0]
         if any(val < 0 for val in shape):
             raise ValueError(
-                f"{self.__class__.__name__}.extend does not support negative shapes."
+                f"{self.__class__.__name__}.expand does not support negative shapes."
             )
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         return self.__class__(
             nvec=self.nvec, shape=shape, device=self.device, dtype=self.dtype
@@ -2501,11 +2501,11 @@ def expand(self, *shape):
         if len(shape) == 1 and isinstance(shape[0], (tuple, list, torch.Size)):
             shape = shape[0]
         if any(val < 0 for val in shape):
-            raise ValueError("CompositeSpec.extend does not support negative shapes.")
+            raise ValueError("CompositeSpec.expand does not support negative shapes.")
         if any(s1 != s2 and s2 != 1 for s1, s2 in zip(shape[-self.ndim :], self.shape)):
             raise ValueError(
-                f"The last {self.ndim} of the extended shape must match the"
-                f"shape of the CompositeSpec in CompositeSpec.extend."
+                f"The last {self.ndim} of the expanded shape {shape} must match the"
+                f"shape of the {self.__class__.__name__} spec in expand()."
             )
         try:
             device = self.device

From f1da0813d0245d1678eebb7d9309e279e45e380d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Sun, 2 Apr 2023 08:00:28 +0100
Subject: [PATCH 65/89] stateful functional modules

---
 torchrl/objectives/common.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 9c37b1cbdca..5bca30cf5d1 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -99,9 +99,8 @@ def convert_to_functional(
             buffer_names = next(itertools.islice(zip(*module.named_buffers()), 1))
         except StopIteration:
             buffer_names = ()
-        params = make_functional(module, funs_to_decorate=funs_to_decorate)
-        functional_module = deepcopy(module)
-        repopulate_module(module, params)
+        params = make_functional(module, funs_to_decorate=funs_to_decorate, keep_params=True)
+        functional_module = module
 
         params_and_buffers = params
         # we transform the buffers in params to make sure they follow the device

From 14e0a734af1777b9c07509e2dde0fb4a7b319e4c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 08:00:44 +0100
Subject: [PATCH 66/89] amend

---
 torchrl/objectives/common.py | 4 +++-
 torchrl/objectives/utils.py  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 5bca30cf5d1..106ad36ef62 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -99,7 +99,9 @@ def convert_to_functional(
             buffer_names = next(itertools.islice(zip(*module.named_buffers()), 1))
         except StopIteration:
             buffer_names = ()
-        params = make_functional(module, funs_to_decorate=funs_to_decorate, keep_params=True)
+        params = make_functional(
+            module, funs_to_decorate=funs_to_decorate, keep_params=True
+        )
         functional_module = module
 
         params_and_buffers = params
diff --git a/torchrl/objectives/utils.py b/torchrl/objectives/utils.py
index 8b72f1f6620..3af554935a9 100644
--- a/torchrl/objectives/utils.py
+++ b/torchrl/objectives/utils.py
@@ -267,7 +267,7 @@ def __init__(
         ],
         eps: float = 0.999,
     ):
-        if not (eps < 1.0 and eps > 0.0):
+        if not (eps <= 1.0 and eps >= 0.0):
             raise ValueError(
                 f"Got eps = {eps} when it was supposed to be between 0 and 1."
             )

From 94ec94e998d58692efca65116b75dc4981250428 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 16:48:31 +0100
Subject: [PATCH 67/89] amend

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 726 ++++++++++++----------
 tutorials/sphinx-tutorials/coding_dqn.py  |  45 +-
 2 files changed, 419 insertions(+), 352 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 503a53d48d1..914186f4ed9 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -5,6 +5,7 @@
 **Author**: `Vincent Moens <https://github.com/vmoens>`_
 
 """
+
 ##############################################################################
 # TorchRL separates the training of RL algorithms in various pieces that will be
 # assembled in your training script: the environment, the data collection and
@@ -47,50 +48,26 @@
 #   and the library features that are to be used in the context of
 #   this algorithm.
 #
-# Imports
-# -------
+# Imports and setup
+# -----------------
 #
 
 # sphinx_gallery_start_ignore
 import warnings
 from typing import Tuple
 
-from torchrl.objectives import LossModule
-
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
 
-import numpy as np
 import torch.cuda
 import tqdm
-from matplotlib import pyplot as plt
-from tensordict.nn import TensorDictModule
-from tensordict.tensordict import TensorDict, TensorDictBase
-from torch import nn, optim
-from torchrl.collectors import MultiaSyncDataCollector
-from torchrl.data import CompositeSpec, TensorDictReplayBuffer
-from torchrl.data.replay_buffers.samplers import PrioritizedSampler, RandomSampler
-from torchrl.data.replay_buffers.storages import LazyMemmapStorage
-from torchrl.envs import (
-    CatTensors,
-    DoubleToFloat,
-    EnvCreator,
-    ObservationNorm,
-    ParallelEnv,
-)
-from torchrl.envs.libs.dm_control import DMControlEnv
-from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.transforms import RewardScaling, TransformedEnv
-from torchrl.envs.utils import set_exploration_mode
-from torchrl.modules import (
-    Actor,
-    ActorCriticWrapper,
-    MLP,
-    OrnsteinUhlenbeckProcessWrapper,
-    ValueOperator,
+
+
+###############################################################################
+# We will execute the policy on cuda if available
+device = (
+    torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
 )
-from torchrl.objectives.utils import distance_loss, SoftUpdate
-from torchrl.trainers import Recorder
 
 ###############################################################################
 # torchrl :class:`torchrl.objectives.LossModule`
@@ -179,6 +156,8 @@
 # Later, we will see how the target parameters should be updated in torchrl.
 #
 
+from tensordict.nn import TensorDictModule
+
 
 def _init(
     self,
@@ -190,7 +169,7 @@ def _init(
     self.convert_to_functional(
         actor_network,
         "actor_network",
-        create_target_params=False,
+        create_target_params=True,
     )
     self.convert_to_functional(
         value_network,
@@ -283,25 +262,21 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
 # For this, the :func:`torchrl.objectives.utils.hold_out_params` function
 # can be used.
 
-from torchrl.objectives.utils import hold_out_params
-
 
 def _loss_actor(
     self,
     tensordict,
 ) -> torch.Tensor:
-    td_copy = tensordict.select(*self.actor_in_keys).detach()
+    td_copy = tensordict.select(*self.actor_in_keys)
     # Get an action from the actor network
     td_copy = self.actor_network(
         td_copy,
-        params=self.actor_network_params,
     )
     # get the value associated with that action
-    with hold_out_params(self.value_network_params) as params:
-        td_copy = self.value_network(
-            td_copy,
-            params=params,
-        )
+    td_copy = self.value_network(
+        td_copy,
+        params=self.value_network_params.detach(),
+    )
     return -td_copy.get("state_action_value")
 
 
@@ -313,12 +288,14 @@ def _loss_actor(
 # To do this, we will rely on the value estimator of our class:
 #
 
+from torchrl.objectives.utils import distance_loss
+
 
 def _loss_value(
     self,
     tensordict,
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    td_copy = tensordict.detach()
+    td_copy = tensordict.clone()
 
     # V(s, a)
     self.value_network(td_copy, params=self.value_network_params)
@@ -336,11 +313,11 @@ def _loss_value(
         batch_size=self.target_actor_network_params.batch_size,
         device=self.target_actor_network_params.device,
     )
-    with set_exploration_mode("mode"):  # we make sure that no exploration is performed
-        target_value = self.value_estimator.value_estimate(
-            tensordict, target_params=target_params
-        ).squeeze(-1)
+    target_value = self.value_estimator.value_estimate(
+        tensordict, target_params=target_params
+    ).squeeze(-1)
 
+    # Computes the value loss: L2, L1 or smooth L1 depending on self.loss_funtion
     loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_funtion)
     td_error = (pred_val - target_value).pow(2)
 
@@ -355,14 +332,10 @@ def _loss_value(
 # value and actor loss, collect the cost values and write them in a tensordict
 # delivered to the user.
 
+from tensordict.tensordict import TensorDict, TensorDictBase
 
-def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
-    if not input_tensordict.device == self.device:
-        raise RuntimeError(
-            f"Got device={input_tensordict.device} but "
-            f"actor_network.device={self.device} (self.device={self.device})"
-        )
 
+def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
     loss_value, td_error, pred_val, target_value = self.loss_value(
         input_tensordict,
     )
@@ -389,6 +362,9 @@ def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
     )
 
 
+from torchrl.objectives import LossModule
+
+
 class DDPGLoss(LossModule):
     default_value_estimator = default_value_estimator
     make_value_estimator = make_value_estimator
@@ -439,11 +415,14 @@ class DDPGLoss(LossModule):
 # with either one of the two backends considered above (dm-control or gym).
 #
 
+from torchrl.envs.libs.dm_control import DMControlEnv
+from torchrl.envs.libs.gym import GymEnv
+
 env_library = None
 env_name = None
 
 
-def make_env():
+def make_env(from_pixels=False):
     """Create a base env."""
     global env_library
     global env_name
@@ -462,9 +441,9 @@ def make_env():
 
     env_kwargs = {
         "device": device,
-        "frame_skip": frame_skip,
         "from_pixels": from_pixels,
         "pixels_only": from_pixels,
+        "frame_skip": 2,
     }
     env = env_library(*env_args, **env_kwargs)
     return env
@@ -499,6 +478,17 @@ def make_env():
 #   take care of computing the normalizing constants later on.
 #
 
+from torchrl.envs import (
+    CatTensors,
+    DoubleToFloat,
+    EnvCreator,
+    ObservationNorm,
+    ParallelEnv,
+    RewardScaling,
+    StepCounter,
+    TransformedEnv,
+)
+
 
 def make_transformed_env(
     env,
@@ -544,31 +534,9 @@ def make_transformed_env(
         )
     )
 
-    return env
+    env.append_transform(StepCounter(max_frames_per_traj))
 
-
-###############################################################################
-# Normalization of the observations
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# To compute the normalizing statistics, we run an arbitrary number of random
-# steps in the environment and compute the mean and standard deviation of the
-# collected observations. The :func:`ObservationNorm.init_stats()` method can
-# be used for this purpose. To get the summary statistics, we create a dummy
-# environment and run it for a given number of steps, collect data over a given
-# number of steps and compute its summary statistics.
-#
-
-
-def get_env_stats():
-    """Gets the stats of an environment."""
-    proof_env = make_transformed_env(make_env())
-    proof_env.set_seed(seed)
-    t = proof_env.transform[2]
-    t.init_stats(init_env_steps)
-    transform_state_dict = t.state_dict()
-    proof_env.close()
-    return transform_state_dict
+    return env
 
 
 ###############################################################################
@@ -599,6 +567,7 @@ def get_env_stats():
 
 
 def parallel_env_constructor(
+    env_per_collector,
     transform_state_dict,
 ):
     if env_per_collector == 1:
@@ -627,6 +596,82 @@ def make_t_env():
     return env
 
 
+# The backend can be gym or dm_control
+backend = "gym"
+
+###############################################################################
+# .. note::
+#   ``frame_skip`` batches multiple step together with a single action
+#   If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
+#   be adjusted to have a consistent total number of frames collected across
+#   experiments. This is important as raising the frame-skip but keeping the
+#   total number of frames unchanged may seem like cheating: all things compared,
+#   a dataset of 10M elements collected with a frame-skip of 2 and another with
+#   a frame-skip of 1 actually have a ratio of interactions with the environment
+#   of 2:1! In a nutshell, one should be cautious about the frame-count of a
+#   training script when dealing with frame skipping as this may lead to
+#   biased comparisons between training strategies.
+#
+
+###############################################################################
+# Scaling the reward helps us control the signal magnitude for a more
+# efficient learning.
+reward_scaling = 5.0
+
+###############################################################################
+# We also define when a trajectory will be truncated. A thousand steps (500 if
+# frame-skip = 2) is a good number to use for cheetah:
+
+max_frames_per_traj = 500
+
+###############################################################################
+# Normalization of the observations
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To compute the normalizing statistics, we run an arbitrary number of random
+# steps in the environment and compute the mean and standard deviation of the
+# collected observations. The :func:`ObservationNorm.init_stats()` method can
+# be used for this purpose. To get the summary statistics, we create a dummy
+# environment and run it for a given number of steps, collect data over a given
+# number of steps and compute its summary statistics.
+#
+
+
+def get_env_stats():
+    """Gets the stats of an environment."""
+    proof_env = make_transformed_env(make_env())
+    t = proof_env.transform[2]
+    t.init_stats(init_env_steps)
+    transform_state_dict = t.state_dict()
+    proof_env.close()
+    return transform_state_dict
+
+
+###############################################################################
+# Normalization stats
+# ~~~~~~~~~~~~~~~~~~~
+# Number of random steps used as for stats computation using ObservationNorm
+
+init_env_steps = 5000
+
+transform_state_dict = get_env_stats()
+
+###############################################################################
+# Number of environments in each data collector
+env_per_collector = 4
+
+###############################################################################
+# We pass the stats computed earlier to normalize the output of our
+# environment:
+
+parallel_env = parallel_env_constructor(
+    env_per_collector=env_per_collector,
+    transform_state_dict=transform_state_dict,
+)
+
+
+from torchrl.data import CompositeSpec
+
 ###############################################################################
 # Building the model
 # ------------------
@@ -649,6 +694,10 @@ def make_t_env():
 # that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
 # networks and ``state_value`` for other value networks.
 #
+# TorchRL provides a built-in version of the DDPG networks as presented in the
+# original paper. These can be found under :class:`torchrl.modules.DdpgMlpActor`
+# and :class:`torchrl.modules.DdpgMlpQNet`.
+#
 # Since we use lazy modules, it is necessary to materialize the lazy modules
 # before being able to move the policy from device to device and achieve other
 # operations. Hence, it is good practice to run the modules with a small
@@ -656,6 +705,16 @@ def make_t_env():
 # environment specs.
 #
 
+from torchrl.modules import (
+    ActorCriticWrapper,
+    DdpgMlpActor,
+    DdpgMlpQNet,
+    OrnsteinUhlenbeckProcessWrapper,
+    ProbabilisticActor,
+    TanhDelta,
+    ValueOperator,
+)
+
 
 def make_ddpg_actor(
     transform_state_dict,
@@ -666,35 +725,29 @@ def make_ddpg_actor(
     proof_environment.transform[2].load_state_dict(transform_state_dict)
 
     env_specs = proof_environment.specs
-    in_features = env_specs["output_spec"]["observation"]["observation_vector"].shape[
-        -1
-    ]
     out_features = env_specs["input_spec"]["action"].shape[-1]
 
-    actor_net = MLP(
-        in_features=in_features,
-        out_features=out_features,
-        num_cells=[num_cells] * num_layers,
-        activation_class=nn.Tanh,
-        activate_last_layer=True,  # with this option on, we use a Tanh map as a last layer, thereby constraining the action to the [-1; 1] domain
+    actor_net = DdpgMlpActor(
+        action_dim=out_features,
     )
+
     in_keys = ["observation_vector"]
-    out_keys = ["action"]
+    out_keys = ["param"]
 
-    actor = Actor(
+    actor = TensorDictModule(
         actor_net,
         in_keys=in_keys,
         out_keys=out_keys,
+    )
+
+    actor = ProbabilisticActor(
+        actor,
+        distribution_class=TanhDelta,
+        in_keys=["param"],
         spec=CompositeSpec(action=env_specs["input_spec"]["action"]),
     ).to(device)
 
-    q_net = MLP(
-        in_features=in_features
-        + out_features,  # receives an action and an observation as input
-        out_features=1,
-        num_cells=[num_cells] * num_layers,
-        activation_class=nn.Tanh,
-    )
+    q_net = DdpgMlpQNet()
 
     in_keys = in_keys + ["action"]
     qnet = ValueOperator(
@@ -702,9 +755,113 @@ def make_ddpg_actor(
         module=q_net,
     ).to(device)
 
+    # init lazy moduless
+    qnet(actor(proof_environment.reset()))
     return actor, qnet
 
 
+actor, qnet = make_ddpg_actor(
+    transform_state_dict=transform_state_dict,
+    device=device,
+)
+
+###############################################################################
+# Exploration
+# ~~~~~~~~~~~
+#
+# The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
+# exploration module, as suggesed in the original paper.
+# Let's define the number of frames before OU noise reaches its minimum value
+annealing_frames = 1_000_000
+
+actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
+    actor,
+    annealing_num_steps=annealing_frames,
+).to(device)
+if device == torch.device("cpu"):
+    actor_model_explore.share_memory()
+
+
+###############################################################################
+# Data collector
+# --------------
+#
+# TorchRL provides specialized classes to help you collect data by executing
+# the policy in the environment. These "data collectors" iteratively compute
+# the action to be executed at a given time, then execute a step in the
+# environment and reset it when required.
+# Data collectors are designed to help developers have a tight control
+# on the number of frames per batch of data, on the (a)sync nature of this
+# collection and on the resources allocated to the data collection (e.g. GPU,
+# number of workers etc).
+#
+# Here we will use
+# :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
+# will be executed in an async manner (i.e. data will be collected while
+# the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
+# multiple workers are running rollouts separately. When a batch is asked, it
+# is gathered from the first worker that can provide it.
+#
+# The parameters to specify are:
+#
+# - the list of environment creation functions,
+# - the policy,
+# - the total number of frames before the collector is considered empty,
+# - the maximum number of frames per trajectory (useful for non-terminating
+#   environments, like dm_control ones).
+#   .. note::
+#     The ``max_frames_per_traj`` passed to the collector will have the effect
+#     of registering a new :class:`torchrl.envs.StepCounter` transform
+#     with the environment used for inference. We can achieve the same result
+#     manually, as we do in this script.
+#
+# One should also pass:
+#
+# - the number of frames in each batch collected,
+# - the number of random steps executed independently from the policy,
+# - the devices used for policy execution
+# - the devices used to store data before the data is passed to the main
+#   process.
+#
+# The total frames we will use during training should be around 1M.
+total_frames = 10_000  # 1_000_000
+
+###############################################################################
+# The number of frames returned by the collector at each iteration of the outer
+# loop is equal to the length of each sub-trajectories times the number of envs
+# run in parallel in each collector.
+#
+# In other words, we expect batches from the collector to have a shape
+# ``[env_per_collector, traj_len]`` where
+# ``traj_len=frames_per_batch/env_per_collector``:
+#
+traj_len = 200
+frames_per_batch = env_per_collector * traj_len
+init_random_frames = 5000
+num_collectors = 2
+
+from torchrl.collectors import MultiaSyncDataCollector
+
+collector = MultiaSyncDataCollector(
+    create_env_fn=[
+        parallel_env,
+    ]
+    * num_collectors,
+    policy=actor_model_explore,
+    total_frames=total_frames,
+    # max_frames_per_traj=max_frames_per_traj,  # this is achieved by the env constructor
+    frames_per_batch=frames_per_batch,
+    init_random_frames=init_random_frames,
+    reset_at_each_iter=False,
+    split_trajs=False,
+    device=device,
+    # device for execution
+    storing_device=device,
+    # device where data will be stored and passed
+    update_at_each_batch=False,
+    exploration_mode="random",
+)
+
 ###############################################################################
 # Evaluator: building your recorder object
 # ----------------------------------------
@@ -716,25 +873,42 @@ def make_ddpg_actor(
 # from these simulations.
 #
 # The following helper function builds this object:
+from torchrl.trainers import Recorder
 
 
-def make_recorder(actor_model_explore, transform_state_dict):
+def make_recorder(actor_model_explore, transform_state_dict, record_interval):
     base_env = make_env()
-    recorder = make_transformed_env(base_env)
-    recorder.transform[2].init_stats(3)
-    recorder.transform[2].load_state_dict(transform_state_dict)
+    environment = make_transformed_env(base_env)
+    environment.transform[2].init_stats(
+        3
+    )  # must be instantiated to load the state dict
+    environment.transform[2].load_state_dict(transform_state_dict)
 
     recorder_obj = Recorder(
         record_frames=1000,
-        frame_skip=frame_skip,
         policy_exploration=actor_model_explore,
-        recorder=recorder,
-        exploration_mode="mean",
+        environment=environment,
+        exploration_mode="mode",
         record_interval=record_interval,
     )
     return recorder_obj
 
 
+###############################################################################
+# We will be recording the performance every 10 batch collected
+record_interval = 10
+
+recorder = make_recorder(
+    actor_model_explore, transform_state_dict, record_interval=record_interval
+)
+
+from torchrl.data.replay_buffers import (
+    LazyMemmapStorage,
+    PrioritizedSampler,
+    RandomSampler,
+    TensorDictReplayBuffer,
+)
+
 ###############################################################################
 # Replay buffer
 # -------------
@@ -750,8 +924,10 @@ def make_recorder(actor_model_explore, transform_state_dict):
 # hyperparameters:
 #
 
+from torchrl.envs import RandomCropTensorDict
+
 
-def make_replay_buffer(buffer_size, batch_size, prefetch=3):
+def make_replay_buffer(buffer_size, batch_size, random_crop_len, prefetch=3, prb=False):
     if prb:
         sampler = PrioritizedSampler(
             max_capacity=buffer_size,
@@ -764,176 +940,102 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
         storage=LazyMemmapStorage(
             buffer_size,
             scratch_dir=buffer_scratch_dir,
-            device=device,
         ),
         batch_size=batch_size,
         sampler=sampler,
         pin_memory=False,
         prefetch=prefetch,
+        transform=RandomCropTensorDict(random_crop_len, sample_dim=1),
     )
     return replay_buffer
 
 
 ###############################################################################
-# Hyperparameters
-# ---------------
-#
-# After having written our helper functions, it is time to set the
-# experiment hyperparameters:
+# We'll store the replay buffer in a temporary dirrectory on disk
 
-###############################################################################
-# Environment
-# ~~~~~~~~~~~
+import tempfile
 
-# The backend can be gym or dm_control
-backend = "gym"
+tmpdir = tempfile.TemporaryDirectory()
+buffer_scratch_dir = tmpdir.name
 
 ###############################################################################
-# .. note::
-#   ``frame_skip`` batches multiple step together with a single action
-#   If > 1, the other frame counts (e.g. frames_per_batch, total_frames) need to
-#   be adjusted to have a consistent total number of frames collected across
-#   experiments. This is important as raising the frame-skip but keeping the
-#   total number of frames unchanged may seem like cheating: all things compared,
-#   a dataset of 10M elements collected with a frame-skip of 2 and another with
-#   a frame-skip of 1 actually have a ratio of interactions with the environment
-#   of 2:1!
+# Replay buffer storage and batch size
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-frame_skip = 2
-from_pixels = False
-
-###############################################################################
-# Scaling the reward helps us control the signal magnitude for a more
-# efficient learning.
-reward_scaling = 5.0
-
-###############################################################################
-# Number of random steps used as for stats computation using ObservationNorm
-init_env_steps = 1000
-
-###############################################################################
-# Exploration: Number of frames before OU noise becomes null
-annealing_frames = 1000000 // frame_skip
-
-###############################################################################
-# Collection
-# ~~~~~~~~~~
+# TorchRL replay buffer counts the number of elements along the first dimension.
+# Since we'll be feeding trajectories to our buffer, we need to adapt the buffer
+# size by dividing it by the length of the sub-trajectories yielded by our
+# data collector.
+# Regarding the batch-size, our sampling strategy will consist in sampling
+# trajectories of length ``traj_len=200`` before selecting sub-trajecotries
+# or length ``random_crop_len=25`` on which the loss will be computed.
+# This strategy balances the choice of storing whole trajectories of a certain
+# length with the need for providing sampels with a sufficient heterogeneity
+# to our loss. The following figure shows the dataflow from a collector
+# that gets 8 frames in each batch with 2 environments run in parallel,
+# feeds them to a replay buffer that contains 1000 trajectories and
+# samples sub-trajectories of 2 time steps each.
 #
-# We will execute the policy on cuda if available
-device = (
-    torch.device("cpu") if torch.cuda.device_count() == 0 else torch.device("cuda:0")
-)
-
-###############################################################################
-# Number of environments in each data collector
-env_per_collector = 2
-
-###############################################################################
-# Total frames we will use during training. Scale up to 500K - 1M for a more
-# meaningful training
-total_frames = 10000 // frame_skip
-
-###############################################################################
-# Number of frames returned by the collector at each iteration of the outer loop.
-# We expect batches from the collector to have a shape
-# ``[env_per_collector, traj_len]`` where ``traj_len`` is the time dimension
-# of the samples. TorchRL's datacollectors are given a certain number of
-# environment and a number of frames to deliver in each batch. We can
-# We can easily calculate how many frames we need to ask to the collectors:
-traj_len = 50  # time length of the batches
-frames_per_batch = env_per_collector * traj_len // frame_skip
-max_frames_per_traj = 1000 // frame_skip
-init_random_frames = 0
-
-###############################################################################
-# We will be recording the performance every 10 batch collected
-record_interval = 10
-
-###############################################################################
-# Optimizer and optimization
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-lr = 5e-4
-weight_decay = 0.0
-# UTD: Number of iterations of the inner loop
-update_to_data = 4
-
-###############################################################################
-# Because we'll be sampling from a replay buffer that stores sub-trajectories
-# of length ``traj_len``, we need to compute how large the batch-size
-# is going to be based on the total number of elements we expect to find
-# divided by the trajectory length:
-batch_size = 128 // traj_len * frame_skip
+# .. figure:: /_static/img/replaybuffer_traj.png
+#    :alt: Storign trajectories in the replay buffer
+#
+# Let's start with the number of frames stored in the buffer
 
 
-###############################################################################
-# Model
-# ~~~~~
+def ceil_div(x, y):
+    return -x // (-y)
 
-gamma = 0.99
-lmbda = 0.2
-tau = 0.005  # Decay factor for the target network
 
-# Network specs
-num_cells = 64
-num_layers = 2
+buffer_size = 1_000_000
+buffer_size = ceil_div(buffer_size, traj_len)
 
 ###############################################################################
-# Replay buffer
-# ~~~~~~~~~~~~~
-# If ``prb=True``, a Prioritized replay buffer will be used
+# Prioritized replay buffer is disabled by default
 prb = False
-###############################################################################
-# Number of frames stored in the buffer
-buffer_size = min(total_frames, 1_000_000 // traj_len)
-buffer_scratch_dir = "/tmp/"
-
-seed = 0
 
 ###############################################################################
-# Initialization
-# --------------
-#
-# To initialize the experiment, we first acquire the observation statistics,
-# then build the networks, wrap them in an exploration wrapper (following the
-# seminal DDPG paper, we used an Ornstein-Uhlenbeck process to add noise to the
-# sampled actions).
-
-
-# Seeding
-torch.manual_seed(seed)
-np.random.seed(seed)
+# We also need to define how many updates we'll be doing per batch of data
+# collected. This is known as the update-to-data or UTD ratio:
+update_to_data = 64
 
 ###############################################################################
-# Normalization stats
-# ~~~~~~~~~~~~~~~~~~~
-
-transform_state_dict = get_env_stats()
+# We'll be feeding the loss with trajectories of length 25:
+random_crop_len = 25
 
 ###############################################################################
-# Models: policy and q-value network
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# In the original paper, the authors perform one update with a batch of 64
+# elements for each frame collected. Here, we reproduce the same ratio
+# but while realizing several updates at each batch collection. We
+# adapt our batch-size to achieve the same number of update-per-frame ratio:
 
-actor, qnet = make_ddpg_actor(
-    transform_state_dict=transform_state_dict,
-    device=device,
-)
-if device == torch.device("cpu"):
-    actor.share_memory()
+batch_size = ceil_div(64 * frames_per_batch, update_to_data * random_crop_len)
 
+replay_buffer = make_replay_buffer(
+    buffer_size=buffer_size,
+    batch_size=batch_size,
+    random_crop_len=random_crop_len,
+    prefetch=3,
+    prb=prb,
+)
 
 ###############################################################################
-# Loss module
-# ~~~~~~~~~~~
+# Loss module construction
+# ------------------------
+#
 # We build our loss module with the actor and qnet we've just created.
 # Because we have target parameters to update, we _must_ create a target network
 # updater.
 #
+
+gamma = 0.99
+lmbda = 0.9
+tau = 0.001  # Decay factor for the target network
+
 loss_module = DDPGLoss(actor, qnet)
+
+###############################################################################
 # let's use the TD(lambda) estimator!
 loss_module.make_value_estimator(ValueEstimators.TDLambda, gamma=gamma, lmbda=lmbda)
-target_net_updater = SoftUpdate(loss_module, eps=1 - tau)
-target_net_updater.init_()
 
 ###############################################################################
 # .. note::
@@ -945,109 +1047,37 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 #   practice despite the fact that they introduce some bias in the return
 #   estimates.
 #
-# The policy is wrapped in a :class:`torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
-# exploration module:
-
-actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
-    actor,
-    annealing_num_steps=annealing_frames,
-).to(device)
-if device == torch.device("cpu"):
-    actor_model_explore.share_memory()
-
-###############################################################################
-# Parallel environment creation
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# We pass the stats computed earlier to normalize the output of our
-# environment:
-
-create_env_fn = parallel_env_constructor(
-    transform_state_dict=transform_state_dict,
-)
-
-###############################################################################
-# Data collector
-# ~~~~~~~~~~~~~~
-#
-# TorchRL provides specialized classes to help you collect data by executing
-# the policy in the environment. These "data collectors" iteratively compute
-# the action to be executed at a given time, then execute a step in the
-# environment and reset it when required.
-# Data collectors are designed to help developers have a tight control
-# on the number of frames per batch of data, on the (a)sync nature of this
-# collection and on the resources allocated to the data collection (e.g. GPU,
-# number of workers etc).
-#
-# Here we will use
-# :class:`torchrl.collectors.MultiaSyncDataCollector`, a data collector that
-# will be executed in an async manner (i.e. data will be collected while
-# the policy is being optimized). With the :class:`MultiaSyncDataCollector`,
-# multiple workers are running rollouts separately. When a batch is asked, it
-# is gathered from the first worker that can provide it.
-#
-# The parameters to specify are:
-#
-# - the list of environment creation functions,
-# - the policy,
-# - the total number of frames before the collector is considered empty,
-# - the maximum number of frames per trajectory (useful for non-terminating
-#   environments, like dm_control ones).
-#
-# One should also pass:
-#
-# - the number of frames in each batch collected,
-# - the number of random steps executed independently from the policy,
-# - the devices used for policy execution
-# - the devices used to store data before the data is passed to the main
-#   process.
+# Target network updater
+# ^^^^^^^^^^^^^^^^^^^^^^
 #
+# Target networks are a crucial part of off-policy RL algorithms.
+# Updating the target network parameters is made easy thanks to the
+# :class:`torchrl.objectives.HardUpdate` and :class:`torchrl.objectives.SoftUpdate`
+# classes. They're built with the loss module as argument, and the update is
+# achieved via a call to `updater.step()` at the appropriate location in the
+# training loop.
 
-collector = MultiaSyncDataCollector(
-    create_env_fn=[create_env_fn, create_env_fn],
-    policy=actor_model_explore,
-    total_frames=total_frames,
-    max_frames_per_traj=max_frames_per_traj,
-    frames_per_batch=frames_per_batch,
-    init_random_frames=init_random_frames,
-    reset_at_each_iter=False,
-    split_trajs=True,
-    device=device,  # device for execution
-    storing_device=device,  # device where data will be stored and passed
-    update_at_each_batch=False,
-    exploration_mode="random",
-)
-
-collector.set_seed(seed)
-
-###############################################################################
-# Replay buffer
-# ~~~~~~~~~~~~~
-#
-
-replay_buffer = make_replay_buffer(
-    buffer_size=buffer_size, batch_size=batch_size, prefetch=3
-)
-
-###############################################################################
-# Recorder
-# ~~~~~~~~
+from torchrl.objectives.utils import SoftUpdate
 
-recorder = make_recorder(actor_model_explore, transform_state_dict)
+target_net_updater = SoftUpdate(loss_module, eps=1 - tau)
+# This class will raise an error if `init_` is not called first.
+target_net_updater.init_()
 
 ###############################################################################
 # Optimizer
 # ~~~~~~~~~
 #
-# Finally, we will use the Adam optimizer for the policy and value network,
-# with the same learning rate for both.
+# Finally, we will use the Adam optimizer for the policy and value network:
 
-optimizer = optim.Adam(loss_module.parameters(), lr=lr, weight_decay=weight_decay)
-total_collection_steps = total_frames // frames_per_batch
+from torch import optim
 
-scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
-    optimizer, T_max=total_collection_steps
+optimizer_actor = optim.Adam(
+    loss_module.actor_network_params.values(True, True), lr=1e-4, weight_decay=0.0
+)
+optimizer_value = optim.Adam(
+    loss_module.value_network_params.values(True, True), lr=1e-3, weight_decay=1e-2
 )
+total_collection_steps = total_frames // frames_per_batch
 
 ###############################################################################
 # Time to train the policy
@@ -1083,16 +1113,27 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     if collected_frames >= init_random_frames:
         for _ in range(update_to_data):
             # sample from replay buffer
-            sampled_tensordict = replay_buffer.sample()
+            sampled_tensordict = replay_buffer.sample().to(device)
 
             # Compute loss
             loss_dict = loss_module(sampled_tensordict)
 
             # optimize
-            loss_val = loss_dict["loss_actor"] + loss_dict["loss_value"]
-            loss_val.backward()
-            optimizer.step()
-            optimizer.zero_grad()
+            loss_dict["loss_actor"].backward()
+            gn1 = torch.nn.utils.clip_grad_norm_(
+                loss_module.actor_network_params.values(True, True), 10.0
+            )
+            optimizer_actor.step()
+            optimizer_actor.zero_grad()
+
+            loss_dict["loss_value"].backward()
+            gn2 = torch.nn.utils.clip_grad_norm_(
+                loss_module.value_network_params.values(True, True), 10.0
+            )
+            optimizer_value.step()
+            optimizer_value.zero_grad()
+
+            gn = (gn1**2 + gn2**2) ** 0.5
 
             # update priority
             if prb:
@@ -1103,21 +1144,30 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
     rewards.append(
         (
             i,
-            tensordict["next", "reward"].mean().item() / frame_skip,
+            tensordict["next", "reward"].mean().item(),
         )
     )
     td_record = recorder(None)
     if td_record is not None:
         rewards_eval.append((i, td_record["r_evaluation"].item()))
-    if len(rewards_eval):
+    if len(rewards_eval) and collected_frames >= init_random_frames:
+        target_value = loss_dict["target_value"].item()
+        loss_value = loss_dict["loss_value"].item()
+        loss_actor = loss_dict["loss_actor"].item()
+        rn = sampled_tensordict["next", "reward"].mean().item()
+        rs = sampled_tensordict["next", "reward"].std().item()
         pbar.set_description(
-            f"reward: {rewards[-1][1]: 4.4f} (r0 = {r0: 4.4f}), reward eval: reward: {rewards_eval[-1][1]: 4.4f}, shape={sampled_tensordict.shape}"
+            f"reward: {rewards[-1][1]: 4.2f} (r0 = {r0: 4.2f}), "
+            f"reward eval: reward: {rewards_eval[-1][1]: 4.2f}, "
+            f"reward normalized={rn :4.2f}/{rs :4.2f}, "
+            f"grad norm={gn: 4.2f}, "
+            f"loss_value={loss_value: 4.2f}, "
+            f"loss_actor={loss_actor: 4.2f}, "
+            f"target value: {target_value: 4.2f}"
         )
 
     # update the exploration strategy
     actor_model_explore.step(current_frames)
-    if collected_frames >= init_random_frames:
-        scheduler.step()
 
 collector.shutdown()
 del collector
@@ -1132,6 +1182,8 @@ def make_replay_buffer(buffer_size, batch_size, prefetch=3):
 # **Note**: As already mentioned above, to get a more reasonable performance,
 # use a greater value for ``total_frames`` e.g. 1M.
 
+from matplotlib import pyplot as plt
+
 plt.figure()
 plt.plot(*zip(*rewards), label="training")
 plt.plot(*zip(*rewards_eval), label="eval")
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 6abdae9d60d..b82d4a9ab78 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -41,8 +41,7 @@
 #   estimated return;
 # - how to collect data from your environment efficiently and store them
 #   in a replay buffer;
-# - how to store trajectories (and not transitions) in your replay buffer),
-#   and how to estimate returns using TD(lambda);
+# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
 # - and finally how to evaluate your model.
 #
 # **Prerequisites**: We encourage you to get familiar with torchrl through the
@@ -57,7 +56,7 @@
 # On a high level, the algorithm is quite simple: Q-learning consists in
 # learning a table of state-action values in such a way that, when
 # encountering any particular state, we know which action to pick just by
-# searching for the action with the highest value. This simple setting
+# searching for the one with the highest value. This simple setting
 # requires the actions and states to be
 # discrete, otherwise a lookup table cannot be built.
 #
@@ -84,21 +83,18 @@
 # of this algorithm.
 
 # sphinx_gallery_start_ignore
-import os
-import uuid
 import warnings
 
-from torchrl.objectives import DQNLoss, SoftUpdate
-from torchrl.record.loggers.csv import CSVLogger
-from torchrl.trainers import Recorder, ReplayBufferTrainer, Trainer, UpdateWeights
-
 warnings.filterwarnings("ignore")
 # sphinx_gallery_end_ignore
 
+import os
+import uuid
+
 import torch
 from torch import nn
 from torchrl.collectors import MultiaSyncDataCollector
-from torchrl.data import LazyMemmapStorage, TensorDictReplayBuffer
+from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
 from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
 from torchrl.envs.libs.gym import GymEnv
 from torchrl.envs.transforms import (
@@ -112,6 +108,16 @@
 )
 from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
 
+from torchrl.objectives import DQNLoss, SoftUpdate
+from torchrl.record.loggers.csv import CSVLogger
+from torchrl.trainers import (
+    LogReward,
+    Recorder,
+    ReplayBufferTrainer,
+    Trainer,
+    UpdateWeights,
+)
+
 
 def is_notebook() -> bool:
     try:
@@ -244,7 +250,7 @@ def get_norm_stats():
     obs_norm_sd = test_env.transform[-1].state_dict()
     # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
     # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-    print(obs_norm_sd)
+    print("state dict of the observation norm:", obs_norm_sd)
     return obs_norm_sd
 
 
@@ -392,6 +398,7 @@ def get_collector(
         device=device,
         storing_device=device,
         split_trajs=False,
+        postproc=MultiStep(5),
     )
     return data_collector
 
@@ -448,8 +455,6 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # DQN parameters
 # ~~~~~~~~~~~~~~
-
-###############################################################################
 # gamma decay factor
 gamma = 0.99
 
@@ -459,9 +464,9 @@ def get_loss_module(actor, gamma):
 
 ###############################################################################
 # Smooth target network update decay parameter.
-# This loosely corresponds to a 1/(1-tau) interval with hard target network
+# This loosely corresponds to a 1/tau interval with hard target network
 # update
-tau = 0.005
+tau = 0.02
 
 ###############################################################################
 # Data collection and replay buffer
@@ -595,6 +600,7 @@ def get_loss_module(actor, gamma):
     exploration_mode="mode",
     log_keys=[("next", "reward")],
     out_keys={("next", "reward"): "rewards"},
+    log_pbar=True,
 )
 recorder.register(trainer)
 
@@ -609,6 +615,15 @@ def get_loss_module(actor, gamma):
 #
 trainer.register_op("post_optim", target_net_updater.step)
 
+###############################################################################
+# We can log the training rewards too. Note that this is of limited interest
+# with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
+# not by getting higher rewards but by keeping the cart-pole alive for longer.
+# This will be reflected by the `total_rewards` value displayed in the progress bar.
+#
+log_reward = LogReward()
+log_reward.register(trainer)
+
 ###############################################################################
 # .. note::
 #   It is possible to link multiple optimizers to the trainer if needed.

From 3f16a495e4dc9143c02736ac2e31cd9c4a18b11d Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 16:56:11 +0100
Subject: [PATCH 68/89] amend

---
 torchrl/objectives/common.py | 3 +--
 torchrl/trainers/trainers.py | 7 +++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index 106ad36ef62..fca524eaa96 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -6,12 +6,11 @@
 from __future__ import annotations
 
 import itertools
-from copy import deepcopy
 from typing import Iterator, List, Optional, Tuple, Union
 
 import torch
 
-from tensordict.nn import make_functional, repopulate_module, TensorDictModule
+from tensordict.nn import make_functional, TensorDictModule
 
 from tensordict.tensordict import TensorDictBase
 from torch import nn, Tensor
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index ce3516f55db..3cf06a4da60 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -22,6 +22,7 @@
 
 from torchrl._utils import _CKPT_BACKEND, KeyDependentDefaultDict, VERBOSE
 from torchrl.collectors.collectors import DataCollectorBase
+from torchrl.collectors.utils import split_trajectories
 from torchrl.data import TensorDictPrioritizedReplayBuffer, TensorDictReplayBuffer
 from torchrl.data.utils import DEVICE_TYPING
 from torchrl.envs.common import EnvBase
@@ -1198,6 +1199,7 @@ def __call__(self, batch: TensorDictBase) -> Dict:
                     auto_cast_to_device=True,
                     break_when_any_done=False,
                 ).clone()
+                td_record = split_trajectories(td_record)
                 if isinstance(self.policy_exploration, torch.nn.Module):
                     self.policy_exploration.train()
                 self.environment.train()
@@ -1207,8 +1209,9 @@ def __call__(self, batch: TensorDictBase) -> Dict:
                 for key in self.log_keys:
                     value = td_record.get(key).float()
                     if key == ("next", "reward"):
-                        mean_value = value.mean() / self.frame_skip
-                        total_value = value.sum()
+                        mask = td_record["mask"]
+                        mean_value = value[mask].mean() / self.frame_skip
+                        total_value = value.sum(dim=td_record.ndim).mean()
                         out[self.out_keys[key]] = mean_value
                         out["total_" + self.out_keys[key]] = total_value
                         continue

From 833bf5865be2475d7b56388e53b3241ac1addd10 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 17:26:31 +0100
Subject: [PATCH 69/89] revert

---
 torchrl/objectives/common.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index fca524eaa96..be931e8c260 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -6,11 +6,12 @@
 from __future__ import annotations
 
 import itertools
+from copy import deepcopy
 from typing import Iterator, List, Optional, Tuple, Union
 
 import torch
 
-from tensordict.nn import make_functional, TensorDictModule
+from tensordict.nn import make_functional, repopulate_module, TensorDictModule
 
 from tensordict.tensordict import TensorDictBase
 from torch import nn, Tensor
@@ -98,10 +99,13 @@ def convert_to_functional(
             buffer_names = next(itertools.islice(zip(*module.named_buffers()), 1))
         except StopIteration:
             buffer_names = ()
-        params = make_functional(
-            module, funs_to_decorate=funs_to_decorate, keep_params=True
-        )
-        functional_module = module
+        params = make_functional(module, funs_to_decorate=funs_to_decorate)
+        functional_module = deepcopy(module)
+        repopulate_module(module, params)
+        # params = make_functional(
+        #     module, funs_to_decorate=funs_to_decorate, keep_params=True
+        # )
+        # functional_module = module
 
         params_and_buffers = params
         # we transform the buffers in params to make sure they follow the device

From 094d49b2d63d7e3a634bf67c6cc5ea6e3c832a9c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 17:56:02 +0100
Subject: [PATCH 70/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index b82d4a9ab78..34209e75b6e 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -398,7 +398,7 @@ def get_collector(
         device=device,
         storing_device=device,
         split_trajs=False,
-        postproc=MultiStep(5),
+        postproc=MultiStep(gamma=gamma, n_steps=5),
     )
     return data_collector
 
@@ -458,10 +458,6 @@ def get_loss_module(actor, gamma):
 # gamma decay factor
 gamma = 0.99
 
-###############################################################################
-# lambda decay factor (see second the part with TD(:math:`\lambda`)
-lmbda = 0.95
-
 ###############################################################################
 # Smooth target network update decay parameter.
 # This loosely corresponds to a 1/tau interval with hard target network

From effa4fc0a14994f5d463ae93ac1880d82c8d29db Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 18:39:13 +0100
Subject: [PATCH 71/89] log_dir

---
 tutorials/sphinx-tutorials/coding_dqn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 34209e75b6e..458700a33d8 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -661,7 +661,7 @@ def print_csv_files_in_folder(folder_path):
     return output_str
 
 
-print_csv_files_in_folder("csv_logs/" + exp_name)
+print_csv_files_in_folder(logger.experiment.log_dir)
 
 ###############################################################################
 # Conclusion and possible improvements

From 4afd785e04c1275209ed1c28445831145dce608f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 21:22:53 +0100
Subject: [PATCH 72/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 458700a33d8..acc63f36ca1 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -582,7 +582,7 @@ def get_loss_module(actor, gamma):
 #   can be cumbersome to implement.
 buffer_hook = ReplayBufferTrainer(
     get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
-    flatten_tensordicts=False,
+    flatten_tensordicts=True,
 )
 buffer_hook.register(trainer)
 weight_updater = UpdateWeights(collector, update_weights_interval=1)

From e50f57809665a20bf37e7b5ce97511eab96ca77c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Mon, 3 Apr 2023 21:52:07 +0100
Subject: [PATCH 73/89] amend

---
 torchrl/trainers/trainers.py             |    2 +-
 tutorials/sphinx-tutorials/coding_dqn.py | 1378 +++++++++++-----------
 2 files changed, 686 insertions(+), 694 deletions(-)

diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 3cf06a4da60..04aef9d0aa2 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -1211,7 +1211,7 @@ def __call__(self, batch: TensorDictBase) -> Dict:
                     if key == ("next", "reward"):
                         mask = td_record["mask"]
                         mean_value = value[mask].mean() / self.frame_skip
-                        total_value = value.sum(dim=td_record.ndim).mean()
+                        total_value = value.sum(dim=td_record.ndim - 1).mean()
                         out[self.out_keys[key]] = mean_value
                         out["total_" + self.out_keys[key]] = total_value
                         continue
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index acc63f36ca1..956721e10b7 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -1,704 +1,696 @@
-# -*- coding: utf-8 -*-
-"""
-TorchRL trainer: A DQN example
-==============================
-**Author**: `Vincent Moens <https://github.com/vmoens>`_
-
-"""
-
-##############################################################################
-# TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
-# your training loop. The trainer executes a nested loop where the outer loop
-# is the data collection and the inner loop consumes this data or some data
-# retrieved from the replay buffer to train the model.
-# At various points in this training loop, hooks can be attached and executed at
-# given intervals.
-#
-# In this tutorial, we will be using the trainer class to train a DQN algorithm
-# to solve the CartPole task from scratch.
-#
-# Main takeaways:
-#
-# - Building a trainer with its essential components: data collector, loss
-#   module, replay buffer and optimizer.
-# - Adding hooks to a trainer, such as loggers, target network updaters and such.
-#
-# The trainer is fully customisable and offers a large set of functionalities.
-# The tutorial is organised around its construction.
-# We will be detailing how to build each of the components of the library first,
-# and then put the pieces together using the :class:`torchrl.trainers.Trainer`
-# class.
-#
-# Along the road, we will also focus on some other aspects of the library:
-#
-# - how to build an environment in TorchRL, including transforms (e.g. data
-#   normalization, frame concatenation, resizing and turning to grayscale)
-#   and parallel execution. Unlike what we did in the
-#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
-#   will normalize the pixels and not the state vector.
-# - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
-#   that estimates the action values and picks up the action with the highest
-#   estimated return;
-# - how to collect data from your environment efficiently and store them
-#   in a replay buffer;
-# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
-# - and finally how to evaluate your model.
-#
-# **Prerequisites**: We encourage you to get familiar with torchrl through the
-# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
-#
-# DQN
-# ---
-#
-# DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
-# the founding work in deep reinforcement learning.
-#
-# On a high level, the algorithm is quite simple: Q-learning consists in
-# learning a table of state-action values in such a way that, when
-# encountering any particular state, we know which action to pick just by
-# searching for the one with the highest value. This simple setting
-# requires the actions and states to be
-# discrete, otherwise a lookup table cannot be built.
-#
-# DQN uses a neural network that encodes a map from the state-action space to
-# a value (scalar) space, which amortizes the cost of storing and exploring all
-# the possible state-action combinations: if a state has not been seen in the
-# past, we can still pass it in conjunction with the various actions available
-# through our neural network and get an interpolated value for each of the
-# actions available.
-#
-# We will solve the classic control problem of the cart pole. From the
-# Gymnasium doc from where this environment is retrieved:
-#
-# | A pole is attached by an un-actuated joint to a cart, which moves along a
-# | frictionless track. The pendulum is placed upright on the cart and the goal
-# | is to balance the pole by applying forces in the left and right direction
-# | on the cart.
-#
-# .. figure:: /_static/img/cartpole_demo.gif
-#    :alt: Cart Pole
-#
-# We do not aim at giving a SOTA implementation of the algorithm, but rather
-# to provide a high-level illustration of TorchRL features in the context
-# of this algorithm.
-
-# sphinx_gallery_start_ignore
-import warnings
-
-warnings.filterwarnings("ignore")
-# sphinx_gallery_end_ignore
-
-import os
-import uuid
-
-import torch
-from torch import nn
-from torchrl.collectors import MultiaSyncDataCollector
-from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
-from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.transforms import (
-    CatFrames,
-    Compose,
-    GrayScale,
-    ObservationNorm,
-    Resize,
-    ToTensorImage,
-    TransformedEnv,
-)
-from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
-
-from torchrl.objectives import DQNLoss, SoftUpdate
-from torchrl.record.loggers.csv import CSVLogger
-from torchrl.trainers import (
-    LogReward,
-    Recorder,
-    ReplayBufferTrainer,
-    Trainer,
-    UpdateWeights,
-)
-
-
-def is_notebook() -> bool:
-    try:
-        shell = get_ipython().__class__.__name__
-        if shell == "ZMQInteractiveShell":
-            return True  # Jupyter notebook or qtconsole
-        elif shell == "TerminalInteractiveShell":
-            return False  # Terminal running IPython
+if __name__ == "__main__":
+    # -*- coding: utf-8 -*-
+    """
+    TorchRL trainer: A DQN example
+    ==============================
+    **Author**: `Vincent Moens <https://github.com/vmoens>`_
+
+    """
+
+    ##############################################################################
+    # TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
+    # your training loop. The trainer executes a nested loop where the outer loop
+    # is the data collection and the inner loop consumes this data or some data
+    # retrieved from the replay buffer to train the model.
+    # At various points in this training loop, hooks can be attached and executed at
+    # given intervals.
+    #
+    # In this tutorial, we will be using the trainer class to train a DQN algorithm
+    # to solve the CartPole task from scratch.
+    #
+    # Main takeaways:
+    #
+    # - Building a trainer with its essential components: data collector, loss
+    #   module, replay buffer and optimizer.
+    # - Adding hooks to a trainer, such as loggers, target network updaters and such.
+    #
+    # The trainer is fully customisable and offers a large set of functionalities.
+    # The tutorial is organised around its construction.
+    # We will be detailing how to build each of the components of the library first,
+    # and then put the pieces together using the :class:`torchrl.trainers.Trainer`
+    # class.
+    #
+    # Along the road, we will also focus on some other aspects of the library:
+    #
+    # - how to build an environment in TorchRL, including transforms (e.g. data
+    #   normalization, frame concatenation, resizing and turning to grayscale)
+    #   and parallel execution. Unlike what we did in the
+    #   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+    #   will normalize the pixels and not the state vector.
+    # - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
+    #   that estimates the action values and picks up the action with the highest
+    #   estimated return;
+    # - how to collect data from your environment efficiently and store them
+    #   in a replay buffer;
+    # - how to use multi-step, a simple preprocessing step for off-policy algorithms;
+    # - and finally how to evaluate your model.
+    #
+    # **Prerequisites**: We encourage you to get familiar with torchrl through the
+    # `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+    #
+    # DQN
+    # ---
+    #
+    # DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
+    # the founding work in deep reinforcement learning.
+    #
+    # On a high level, the algorithm is quite simple: Q-learning consists in
+    # learning a table of state-action values in such a way that, when
+    # encountering any particular state, we know which action to pick just by
+    # searching for the one with the highest value. This simple setting
+    # requires the actions and states to be
+    # discrete, otherwise a lookup table cannot be built.
+    #
+    # DQN uses a neural network that encodes a map from the state-action space to
+    # a value (scalar) space, which amortizes the cost of storing and exploring all
+    # the possible state-action combinations: if a state has not been seen in the
+    # past, we can still pass it in conjunction with the various actions available
+    # through our neural network and get an interpolated value for each of the
+    # actions available.
+    #
+    # We will solve the classic control problem of the cart pole. From the
+    # Gymnasium doc from where this environment is retrieved:
+    #
+    # | A pole is attached by an un-actuated joint to a cart, which moves along a
+    # | frictionless track. The pendulum is placed upright on the cart and the goal
+    # | is to balance the pole by applying forces in the left and right direction
+    # | on the cart.
+    #
+    # .. figure:: /_static/img/cartpole_demo.gif
+    #    :alt: Cart Pole
+    #
+    # We do not aim at giving a SOTA implementation of the algorithm, but rather
+    # to provide a high-level illustration of TorchRL features in the context
+    # of this algorithm.
+
+    # sphinx_gallery_start_ignore
+    import warnings
+
+    warnings.filterwarnings("ignore")
+    # sphinx_gallery_end_ignore
+
+    import os
+    import uuid
+
+    import torch
+    from torch import nn
+    from torchrl.collectors import MultiaSyncDataCollector
+    from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
+    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+    from torchrl.envs.libs.gym import GymEnv
+    from torchrl.envs.transforms import (
+        CatFrames,
+        Compose,
+        GrayScale,
+        ObservationNorm,
+        Resize,
+        ToTensorImage,
+        TransformedEnv,
+    )
+    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+
+    from torchrl.objectives import DQNLoss, SoftUpdate
+    from torchrl.record.loggers.csv import CSVLogger
+    from torchrl.trainers import (
+        LogReward,
+        Recorder,
+        ReplayBufferTrainer,
+        Trainer,
+        UpdateWeights,
+    )
+
+    def is_notebook() -> bool:
+        try:
+            shell = get_ipython().__class__.__name__
+            if shell == "ZMQInteractiveShell":
+                return True  # Jupyter notebook or qtconsole
+            elif shell == "TerminalInteractiveShell":
+                return False  # Terminal running IPython
+            else:
+                return False  # Other type (?)
+        except NameError:
+            return False  # Probably standard Python interpreter
+
+    ###############################################################################
+    # Let's get started with the various pieces we need for our algorithm:
+    #
+    # - An environment;
+    # - A policy (and related modules that we group under the "model" umbrella);
+    # - A data collector, which makes the policy play in the environment and
+    #   delivers training data;
+    # - A replay buffer to store the training data;
+    # - A loss module, which computes the objective function to train our policy
+    #   to maximise the return;
+    # - An optimizer, which performs parameter updates based on our loss.
+    #
+    # Additional modules include a logger, a recorder (executes the policy in
+    # "eval" mode) and a target network updater. With all these components into
+    # place, it is easy to see how one could misplace or misuse one component in
+    # the training script. The trainer is there to orchestrate everything for you!
+    #
+    # Building the environment
+    # ------------------------
+    #
+    # First let's write a helper function that will output an environment. As usual,
+    # the "raw" environment may be too simple to be used in practice and we'll need
+    # some data transformation to expose its output to the policy.
+    #
+    # We will be using five transforms:
+    #
+    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
+    # - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
+    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+    #   ``[C, W, H]``;
+    # - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
+    # - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
+    # - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
+    # - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
+    #   successive frames (``N=4``) in a single tensor along the channel dimension.
+    #   This is useful as a single image does not carry information about the
+    #   motion of the cartpole. Some memory about past observations and actions
+    #   is needed, either via a recurrent neural network or using a stack of
+    #   frames.
+    # - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
+    #   given some custom summary statistics.
+    #
+    # In practice, our environment builder has two arguments:
+    #
+    # - ``parallel``: determines whether multiple environments have to be run in
+    #   parallel. We stack the transforms after the
+    #   :class:`torchrl.envs.ParallelEnv` to take advantage
+    #   of vectorization of the operations on device, although this would
+    #   technically work with every single environment attached to its own set of
+    #   transforms.
+    # - ``obs_norm_sd`` will contain the normalizing constants for
+    #   the :class:`torchrl.envs.ObservationNorm` transform.
+    #
+
+    def make_env(
+        parallel=False,
+        obs_norm_sd=None,
+    ):
+        if obs_norm_sd is None:
+            obs_norm_sd = {"standard_normal": True}
+        if parallel:
+            base_env = ParallelEnv(
+                num_workers,
+                EnvCreator(
+                    lambda: GymEnv(
+                        "CartPole-v1",
+                        from_pixels=True,
+                        pixels_only=True,
+                        device=device,
+                    )
+                ),
+            )
         else:
-            return False  # Other type (?)
-    except NameError:
-        return False  # Probably standard Python interpreter
-
-
-###############################################################################
-# Let's get started with the various pieces we need for our algorithm:
-#
-# - An environment;
-# - A policy (and related modules that we group under the "model" umbrella);
-# - A data collector, which makes the policy play in the environment and
-#   delivers training data;
-# - A replay buffer to store the training data;
-# - A loss module, which computes the objective function to train our policy
-#   to maximise the return;
-# - An optimizer, which performs parameter updates based on our loss.
-#
-# Additional modules include a logger, a recorder (executes the policy in
-# "eval" mode) and a target network updater. With all these components into
-# place, it is easy to see how one could misplace or misuse one component in
-# the training script. The trainer is there to orchestrate everything for you!
-#
-# Building the environment
-# ------------------------
-#
-# First let's write a helper function that will output an environment. As usual,
-# the "raw" environment may be too simple to be used in practice and we'll need
-# some data transformation to expose its output to the policy.
-#
-# We will be using five transforms:
-#
-# - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-# - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
-#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-#   ``[C, W, H]``;
-# - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
-# - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
-# - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
-# - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
-#   successive frames (``N=4``) in a single tensor along the channel dimension.
-#   This is useful as a single image does not carry information about the
-#   motion of the cartpole. Some memory about past observations and actions
-#   is needed, either via a recurrent neural network or using a stack of
-#   frames.
-# - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
-#   given some custom summary statistics.
-#
-# In practice, our environment builder has two arguments:
-#
-# - ``parallel``: determines whether multiple environments have to be run in
-#   parallel. We stack the transforms after the
-#   :class:`torchrl.envs.ParallelEnv` to take advantage
-#   of vectorization of the operations on device, although this would
-#   technically work with every single environment attached to its own set of
-#   transforms.
-# - ``obs_norm_sd`` will contain the normalizing constants for
-#   the :class:`torchrl.envs.ObservationNorm` transform.
-#
-
-
-def make_env(
-    parallel=False,
-    obs_norm_sd=None,
-):
-    if obs_norm_sd is None:
-        obs_norm_sd = {"standard_normal": True}
-    if parallel:
-        base_env = ParallelEnv(
-            num_workers,
-            EnvCreator(
-                lambda: GymEnv(
-                    "CartPole-v1",
-                    from_pixels=True,
-                    pixels_only=True,
-                    device=device,
-                )
+            base_env = GymEnv(
+                "CartPole-v1",
+                from_pixels=True,
+                pixels_only=True,
+                device=device,
+            )
+
+        env = TransformedEnv(
+            base_env,
+            Compose(
+                StepCounter(),  # to count the steps of each trajectory
+                ToTensorImage(),
+                RewardScaling(loc=0.0, scale=0.1),
+                GrayScale(),
+                Resize(64, 64),
+                CatFrames(4, in_keys=["pixels"], dim=-3),
+                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
             ),
         )
-    else:
-        base_env = GymEnv(
-            "CartPole-v1",
-            from_pixels=True,
-            pixels_only=True,
-            device=device,
+        return env
+
+    ###############################################################################
+    # Compute normalizing constants
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # To normalize images, we don't want to normalize each pixel independently
+    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+    # shaped set of normalizing constants (loc and scale parameters).
+    # We will be using the ``reduce_dim`` argument
+    # of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+    # not all dimensions disappear in the process:
+    #
+
+    def get_norm_stats():
+        test_env = make_env()
+        test_env.transform[-1].init_stats(
+            num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+        )
+        obs_norm_sd = test_env.transform[-1].state_dict()
+        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+        print("state dict of the observation norm:", obs_norm_sd)
+        return obs_norm_sd
+
+    ###############################################################################
+    # Building the model (Deep Q-network)
+    # -----------------------------------
+    #
+    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+    # object which is a simple CNN followed by a two-layer MLP. The only trick used
+    # here is that the action values (i.e. left and right action value) are
+    # computed using
+    #
+    # .. math::
+    #
+    #    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+    #
+    # where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
+    # :math:`\# obs \rightarrow num_actions` function.
+    #
+    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+    # which will read the state-action
+    # values, pick up the one with the maximum value and write all those results
+    # in the input :class:`tensordict.TensorDict`.
+    #
+
+    def make_model(dummy_env):
+        cnn_kwargs = {
+            "num_cells": [32, 64, 64],
+            "kernel_sizes": [6, 4, 3],
+            "strides": [2, 2, 1],
+            "activation_class": nn.ELU,
+            # This can be used to reduce the size of the last layer of the CNN
+            # "squeeze_output": True,
+            # "aggregator_class": nn.AdaptiveAvgPool2d,
+            # "aggregator_kwargs": {"output_size": (1, 1)},
+        }
+        mlp_kwargs = {
+            "depth": 2,
+            "num_cells": [
+                64,
+                64,
+            ],
+            "activation_class": nn.ELU,
+        }
+        net = DuelingCnnDQNet(
+            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+        ).to(device)
+        net.value[-1].bias.data.fill_(init_bias)
+
+        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(
+            device
+        )
+        # init actor: because the model is composed of lazy conv/linear layers,
+        # we must pass a fake batch of data through it to instantiate them.
+        tensordict = dummy_env.fake_tensordict()
+        actor(tensordict)
+
+        # we wrap our actor in an EGreedyWrapper for data collection
+        actor_explore = EGreedyWrapper(
+            actor,
+            annealing_num_steps=total_frames,
+            eps_init=eps_greedy_val,
+            eps_end=eps_greedy_val_env,
         )
 
-    env = TransformedEnv(
-        base_env,
-        Compose(
-            StepCounter(),  # to count the steps of each trajectory
-            ToTensorImage(),
-            RewardScaling(loc=0.0, scale=0.1),
-            GrayScale(),
-            Resize(64, 64),
-            CatFrames(4, in_keys=["pixels"], dim=-3),
-            ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
-        ),
-    )
-    return env
-
-
-###############################################################################
-# Compute normalizing constants
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# To normalize images, we don't want to normalize each pixel independently
-# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-# shaped set of normalizing constants (loc and scale parameters).
-# We will be using the ``reduce_dim`` argument
-# of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-# not all dimensions disappear in the process:
-#
-
-
-def get_norm_stats():
-    test_env = make_env()
-    test_env.transform[-1].init_stats(
-        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+        return actor, actor_explore
+
+    ###############################################################################
+    # Collecting and storing data
+    # ---------------------------
+    #
+    # Replay buffers
+    # ~~~~~~~~~~~~~~
+    #
+    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
+    # They constitute the dataset we will be sampling from during training.
+    #
+    # Here, we will use a regular sampling strategy, although a prioritized RB
+    # could improve the performance significantly.
+    #
+    # We place the storage on disk using
+    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+    # storage is created in a lazy manner: it will only be instantiated once the
+    # first batch of data is passed to it.
+    #
+    # The only requirement of this storage is that the data passed to it at write
+    # time must always have the same shape.
+
+    def get_replay_buffer(buffer_size, n_optim, batch_size):
+        replay_buffer = TensorDictReplayBuffer(
+            batch_size=batch_size,
+            storage=LazyMemmapStorage(buffer_size),
+            prefetch=n_optim,
+        )
+        return replay_buffer
+
+    ###############################################################################
+    # Data collector
+    # ~~~~~~~~~~~~~~
+    #
+    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
+    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
+    # a data collector as a dataloader in the outer loop.
+    #
+    # We choose the following configuration: we will be running a series of
+    # parallel environments synchronously in parallel in different collectors,
+    # themselves running in parallel but asynchronously.
+    # The advantage of this configuration is that we can balance the amount of
+    # compute that is executed in batch with what we want to be executed
+    # asynchronously. We encourage the reader to experiment how the collection
+    # speed is impacted by modifying the number of collectors (ie the number of
+    # environment constructors passed to the collector) and the number of
+    # environment executed in parallel in each collector (controlled by the
+    # ``num_workers`` hyperparameter).
+    #
+    # When building the collector, we can choose on which device we want the
+    # environment and policy to execute the operations through the ``device``
+    # keyword argument. The ``storing_devices`` argument will modify the
+    # location of the data being collected: if the batches that we are gathering
+    # have a considerable size, we may want to store them on a different location
+    # than the device where the computation is happening. For asynchronous data
+    # collectors such as ours, different storing devices mean that the data that
+    # we collect won't sit on the same device each time, which is something that
+    # out training loop must account for. For simplicity, we set the devices to
+    # the same value for all sub-collectors.
+
+    def get_collector(
+        obs_norm_sd,
+        num_collectors,
+        actor_explore,
+        frames_per_batch,
+        total_frames,
+        device,
+    ):
+        data_collector = MultiaSyncDataCollector(
+            [
+                make_env(parallel=True, obs_norm_sd=obs_norm_sd),
+            ]
+            * num_collectors,
+            policy=actor_explore,
+            frames_per_batch=frames_per_batch,
+            total_frames=total_frames,
+            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+            exploration_mode="random",
+            # We set the all the devices to be identical. Below is an example of
+            # heterogeneous devices
+            device=device,
+            storing_device=device,
+            split_trajs=False,
+            postproc=MultiStep(gamma=gamma, n_steps=5),
+        )
+        return data_collector
+
+    ###############################################################################
+    # Loss function
+    # -------------
+    #
+    # Building our loss function is straightforward: we only need to provide
+    # the model and a bunch of hyperparameters to the DQNLoss class.
+    #
+    # Target parameters
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Many off-policy RL algorithms use the concept of "target parameters" when it
+    # comes to estimate the value of the next state or state-action pair.
+    # The target parameters are lagged copies of the model parameters. Because
+    # their predictions mismatch those of the current model configuration, they
+    # help learning by putting a pessimistic bound on the value being estimated.
+    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+    # in similar algorithms.
+    #
+
+    def get_loss_module(actor, gamma):
+        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+        target_updater = SoftUpdate(loss_module)
+        return loss_module, target_updater
+
+    ###############################################################################
+    # Hyperparameters
+    # ---------------
+    #
+    # Let's start with our hyperparameters. The following setting should work well
+    # in practice, and the performance of the algorithm should hopefully not be
+    # too sensitive to slight variations of these.
+
+    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+    ###############################################################################
+    # Optimizer
+    # ~~~~~~~~~
+
+    # the learning rate of the optimizer
+    lr = 2e-3
+    # weight decay
+    wd = 1e-5
+    # the beta parameters of Adam
+    betas = (0.9, 0.999)
+    # Optimization steps per batch collected (aka UPD or updates per data)
+    n_optim = 8
+
+    ###############################################################################
+    # DQN parameters
+    # ~~~~~~~~~~~~~~
+    # gamma decay factor
+    gamma = 0.99
+
+    ###############################################################################
+    # Smooth target network update decay parameter.
+    # This loosely corresponds to a 1/tau interval with hard target network
+    # update
+    tau = 0.02
+
+    ###############################################################################
+    # Data collection and replay buffer
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # Values to be used for proper training have been commented.
+    #
+    # Total frames collected in the environment. In other implementations, the
+    # user defines a maximum number of episodes.
+    # This is harder to do with our data collectors since they return batches
+    # of N collected frames, where N is a constant.
+    # However, one can easily get the same restriction on number of episodes by
+    # breaking the training loop when a certain number
+    # episodes has been collected.
+    total_frames = 4096  # 500000
+
+    ###############################################################################
+    # Random frames used to initialize the replay buffer.
+    init_random_frames = 100  # 1000
+
+    ###############################################################################
+    # Frames in each batch collected.
+    frames_per_batch = 32  # 128
+
+    ###############################################################################
+    # Frames sampled from the replay buffer at each optimization step
+    batch_size = 32  # 256
+
+    ###############################################################################
+    # Size of the replay buffer in terms of frames
+    buffer_size = min(total_frames, 100000)
+
+    ###############################################################################
+    # Number of environments run in parallel in each data collector
+    num_workers = 2  # 8
+    num_collectors = 2  # 4
+
+    ###############################################################################
+    # Environment and exploration
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # We set the initial and final value of the epsilon factor in Epsilon-greedy
+    # exploration.
+    # Since our policy is deterministic, exploration is crucial: without it, the
+    # only source of randomness would be the environment reset.
+
+    eps_greedy_val = 0.1
+    eps_greedy_val_env = 0.005
+
+    ###############################################################################
+    # To speed up learning, we set the bias of the last layer of our value network
+    # to a predefined value (this is not mandatory)
+    init_bias = 2.0
+
+    ###############################################################################
+    # .. note::
+    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
+    #   was set to a very low number. To get a reasonable performance, use a greater
+    #   value e.g. 500000
+    #
+
+    ###############################################################################
+    # Building a Trainer
+    # ------------------
+    #
+    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+    # following keyword-only arguments:
+    #
+    # - ``collector``
+    # - ``loss_module``
+    # - ``optimizer``
+    # - ``logger``: A logger can be
+    # - ``total_frames``: this parameter defines the lifespan of the trainer.
+    # - ``frame_skip``: when a frame-skip is used, the collector must be made
+    #   aware of it in order to accurately count the number of frames
+    #   collected etc. Making the trainer aware of this parameter is not
+    #   mandatory but helps to have a fairer comparison between settings where
+    #   the total number of frames (budget) is fixed but the frame-skip is
+    #   variable.
+
+    stats = get_norm_stats()
+    test_env = make_env(parallel=False, obs_norm_sd=stats)
+    # Get model
+    actor, actor_explore = make_model(test_env)
+    loss_module, target_net_updater = get_loss_module(actor, gamma)
+    target_net_updater.init_()
+
+    collector = get_collector(
+        stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
     )
-    obs_norm_sd = test_env.transform[-1].state_dict()
-    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-    print("state dict of the observation norm:", obs_norm_sd)
-    return obs_norm_sd
-
-
-###############################################################################
-# Building the model (Deep Q-network)
-# -----------------------------------
-#
-# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-# object which is a simple CNN followed by a two-layer MLP. The only trick used
-# here is that the action values (i.e. left and right action value) are
-# computed using
-#
-# .. math::
-#
-#    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-#
-# where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
-# :math:`\# obs \rightarrow num_actions` function.
-#
-# Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
-# which will read the state-action
-# values, pick up the one with the maximum value and write all those results
-# in the input :class:`tensordict.TensorDict`.
-#
-
-
-def make_model(dummy_env):
-    cnn_kwargs = {
-        "num_cells": [32, 64, 64],
-        "kernel_sizes": [6, 4, 3],
-        "strides": [2, 2, 1],
-        "activation_class": nn.ELU,
-        # This can be used to reduce the size of the last layer of the CNN
-        # "squeeze_output": True,
-        # "aggregator_class": nn.AdaptiveAvgPool2d,
-        # "aggregator_kwargs": {"output_size": (1, 1)},
-    }
-    mlp_kwargs = {
-        "depth": 2,
-        "num_cells": [
-            64,
-            64,
-        ],
-        "activation_class": nn.ELU,
-    }
-    net = DuelingCnnDQNet(
-        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-    ).to(device)
-    net.value[-1].bias.data.fill_(init_bias)
-
-    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
-    # init actor: because the model is composed of lazy conv/linear layers,
-    # we must pass a fake batch of data through it to instantiate them.
-    tensordict = dummy_env.fake_tensordict()
-    actor(tensordict)
-
-    # we wrap our actor in an EGreedyWrapper for data collection
-    actor_explore = EGreedyWrapper(
-        actor,
-        annealing_num_steps=total_frames,
-        eps_init=eps_greedy_val,
-        eps_end=eps_greedy_val_env,
+    optimizer = torch.optim.Adam(
+        loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
     )
+    exp_name = f"dqn_exp_{uuid.uuid1()}"
+    logger = CSVLogger(exp_name=exp_name, log_dir="./")
 
-    return actor, actor_explore
-
-
-###############################################################################
-# Collecting and storing data
-# ---------------------------
-#
-# Replay buffers
-# ~~~~~~~~~~~~~~
-#
-# Replay buffers play a central role in off-policy RL algorithms such as DQN.
-# They constitute the dataset we will be sampling from during training.
-#
-# Here, we will use a regular sampling strategy, although a prioritized RB
-# could improve the performance significantly.
-#
-# We place the storage on disk using
-# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-# storage is created in a lazy manner: it will only be instantiated once the
-# first batch of data is passed to it.
-#
-# The only requirement of this storage is that the data passed to it at write
-# time must always have the same shape.
-
-
-def get_replay_buffer(buffer_size, n_optim, batch_size):
-    replay_buffer = TensorDictReplayBuffer(
-        batch_size=batch_size,
-        storage=LazyMemmapStorage(buffer_size),
-        prefetch=n_optim,
-    )
-    return replay_buffer
-
-
-###############################################################################
-# Data collector
-# ~~~~~~~~~~~~~~
-#
-# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
-# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
-# a data collector as a dataloader in the outer loop.
-#
-# We choose the following configuration: we will be running a series of
-# parallel environments synchronously in parallel in different collectors,
-# themselves running in parallel but asynchronously.
-# The advantage of this configuration is that we can balance the amount of
-# compute that is executed in batch with what we want to be executed
-# asynchronously. We encourage the reader to experiment how the collection
-# speed is impacted by modifying the number of collectors (ie the number of
-# environment constructors passed to the collector) and the number of
-# environment executed in parallel in each collector (controlled by the
-# ``num_workers`` hyperparameter).
-#
-# When building the collector, we can choose on which device we want the
-# environment and policy to execute the operations through the ``device``
-# keyword argument. The ``storing_devices`` argument will modify the
-# location of the data being collected: if the batches that we are gathering
-# have a considerable size, we may want to store them on a different location
-# than the device where the computation is happening. For asynchronous data
-# collectors such as ours, different storing devices mean that the data that
-# we collect won't sit on the same device each time, which is something that
-# out training loop must account for. For simplicity, we set the devices to
-# the same value for all sub-collectors.
-
-
-def get_collector(
-    obs_norm_sd, num_collectors, actor_explore, frames_per_batch, total_frames, device
-):
-    data_collector = MultiaSyncDataCollector(
-        [
-            make_env(parallel=True, obs_norm_sd=obs_norm_sd),
-        ]
-        * num_collectors,
-        policy=actor_explore,
-        frames_per_batch=frames_per_batch,
+    trainer = Trainer(
+        collector=collector,
         total_frames=total_frames,
-        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-        exploration_mode="random",
-        # We set the all the devices to be identical. Below is an example of
-        # heterogeneous devices
-        device=device,
-        storing_device=device,
-        split_trajs=False,
-        postproc=MultiStep(gamma=gamma, n_steps=5),
+        frame_skip=1,
+        loss_module=loss_module,
+        optimizer=optimizer,
+        logger=logger,
+        optim_steps_per_batch=n_optim,
     )
-    return data_collector
-
-
-###############################################################################
-# Loss function
-# -------------
-#
-# Building our loss function is straightforward: we only need to provide
-# the model and a bunch of hyperparameters to the DQNLoss class.
-#
-# Target parameters
-# ~~~~~~~~~~~~~~~~~
-#
-# Many off-policy RL algorithms use the concept of "target parameters" when it
-# comes to estimate the value of the next state or state-action pair.
-# The target parameters are lagged copies of the model parameters. Because
-# their predictions mismatch those of the current model configuration, they
-# help learning by putting a pessimistic bound on the value being estimated.
-# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-# in similar algorithms.
-#
-
-
-def get_loss_module(actor, gamma):
-    loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
-    target_updater = SoftUpdate(loss_module)
-    return loss_module, target_updater
-
-
-###############################################################################
-# Hyperparameters
-# ---------------
-#
-# Let's start with our hyperparameters. The following setting should work well
-# in practice, and the performance of the algorithm should hopefully not be
-# too sensitive to slight variations of these.
-
-device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-###############################################################################
-# Optimizer
-# ~~~~~~~~~
-
-# the learning rate of the optimizer
-lr = 2e-3
-# weight decay
-wd = 1e-5
-# the beta parameters of Adam
-betas = (0.9, 0.999)
-# Optimization steps per batch collected (aka UPD or updates per data)
-n_optim = 8
-
-###############################################################################
-# DQN parameters
-# ~~~~~~~~~~~~~~
-# gamma decay factor
-gamma = 0.99
-
-###############################################################################
-# Smooth target network update decay parameter.
-# This loosely corresponds to a 1/tau interval with hard target network
-# update
-tau = 0.02
-
-###############################################################################
-# Data collection and replay buffer
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Values to be used for proper training have been commented.
-#
-# Total frames collected in the environment. In other implementations, the
-# user defines a maximum number of episodes.
-# This is harder to do with our data collectors since they return batches
-# of N collected frames, where N is a constant.
-# However, one can easily get the same restriction on number of episodes by
-# breaking the training loop when a certain number
-# episodes has been collected.
-total_frames = 4096  # 500000
-
-###############################################################################
-# Random frames used to initialize the replay buffer.
-init_random_frames = 100  # 1000
-
-###############################################################################
-# Frames in each batch collected.
-frames_per_batch = 32  # 128
-
-###############################################################################
-# Frames sampled from the replay buffer at each optimization step
-batch_size = 32  # 256
-
-###############################################################################
-# Size of the replay buffer in terms of frames
-buffer_size = min(total_frames, 100000)
-
-###############################################################################
-# Number of environments run in parallel in each data collector
-num_workers = 2  # 8
-num_collectors = 2  # 4
-
-###############################################################################
-# Environment and exploration
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# We set the initial and final value of the epsilon factor in Epsilon-greedy
-# exploration.
-# Since our policy is deterministic, exploration is crucial: without it, the
-# only source of randomness would be the environment reset.
-
-eps_greedy_val = 0.1
-eps_greedy_val_env = 0.005
-
-###############################################################################
-# To speed up learning, we set the bias of the last layer of our value network
-# to a predefined value (this is not mandatory)
-init_bias = 2.0
-
-###############################################################################
-# .. note::
-#   For fast rendering of the tutorial ``total_frames`` hyperparameter
-#   was set to a very low number. To get a reasonable performance, use a greater
-#   value e.g. 500000
-#
-
-###############################################################################
-# Building a Trainer
-# ------------------
-#
-# TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
-# following keyword-only arguments:
-#
-# - ``collector``
-# - ``loss_module``
-# - ``optimizer``
-# - ``logger``: A logger can be
-# - ``total_frames``: this parameter defines the lifespan of the trainer.
-# - ``frame_skip``: when a frame-skip is used, the collector must be made
-#   aware of it in order to accurately count the number of frames
-#   collected etc. Making the trainer aware of this parameter is not
-#   mandatory but helps to have a fairer comparison between settings where
-#   the total number of frames (budget) is fixed but the frame-skip is
-#   variable.
-
-stats = get_norm_stats()
-test_env = make_env(parallel=False, obs_norm_sd=stats)
-# Get model
-actor, actor_explore = make_model(test_env)
-loss_module, target_net_updater = get_loss_module(actor, gamma)
-target_net_updater.init_()
-
-collector = get_collector(
-    stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
-)
-optimizer = torch.optim.Adam(
-    loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
-)
-exp_name = f"dqn_exp_{uuid.uuid1()}"
-logger = CSVLogger(exp_name=exp_name, log_dir="./")
-
-trainer = Trainer(
-    collector=collector,
-    total_frames=total_frames,
-    frame_skip=1,
-    loss_module=loss_module,
-    optimizer=optimizer,
-    logger=logger,
-    optim_steps_per_batch=n_optim,
-)
-
-###############################################################################
-# Registering hooks
-# ~~~~~~~~~~~~~~~~~
-#
-# Registering hooks can be achieved in two separate ways:
-#
-# - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
-#   method is the first choice. One just needs to provide the trainer as input
-#   and the hook will be registered with a default name at a default location.
-#   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
-#   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
-#   can be cumbersome to implement.
-buffer_hook = ReplayBufferTrainer(
-    get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
-    flatten_tensordicts=True,
-)
-buffer_hook.register(trainer)
-weight_updater = UpdateWeights(collector, update_weights_interval=1)
-weight_updater.register(trainer)
-recorder = Recorder(
-    record_interval=100,  # log every 100 optimization steps
-    record_frames=10_000,  # maximum number of frames in the record
-    frame_skip=1,
-    policy_exploration=actor_explore,
-    environment=test_env,
-    exploration_mode="mode",
-    log_keys=[("next", "reward")],
-    out_keys={("next", "reward"): "rewards"},
-    log_pbar=True,
-)
-recorder.register(trainer)
-
-###############################################################################
-# - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
-#   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
-#   In this case, a location must be explicitely passed (). This method gives
-#   more control over the location of the hook but it also requires more
-#   understanding of the Trainer mechanism.
-#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
-#   for a detailed description of the trainer hooks.
-#
-trainer.register_op("post_optim", target_net_updater.step)
-
-###############################################################################
-# We can log the training rewards too. Note that this is of limited interest
-# with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
-# not by getting higher rewards but by keeping the cart-pole alive for longer.
-# This will be reflected by the `total_rewards` value displayed in the progress bar.
-#
-log_reward = LogReward()
-log_reward.register(trainer)
-
-###############################################################################
-# .. note::
-#   It is possible to link multiple optimizers to the trainer if needed.
-#   In this case, each optimizer will be tied to a field in the loss dictionary.
-#   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
-#
-# Here we are, ready to train our algorithm! A simple call to
-# ``trainer.train()`` and we'll be getting our results logged in.
-#
-trainer.train()
-
-###############################################################################
-# We can now quickly check the CSVs with the results.
-
-
-def print_csv_files_in_folder(folder_path):
-    """
-    Find all CSV files in a folder and return the first 10 lines of each file as a string.
-
-    Args:
-        folder_path (str): The relative path to the folder.
 
-    Returns:
-        str: A string containing the first 10 lines of each CSV file in the folder.
-    """
-    csv_files = []
-    output_str = ""
-    for file in os.listdir(folder_path):
-        if file.endswith(".csv"):
-            csv_files.append(os.path.join(folder_path, file))
-    for csv_file in csv_files:
-        output_str += f"File: {csv_file}\n"
-        with open(csv_file, "r") as f:
-            for i, line in enumerate(f):
-                if i == 10:
-                    break
-                output_str += line.strip() + "\n"
-        output_str += "\n"
-    return output_str
-
-
-print_csv_files_in_folder(logger.experiment.log_dir)
-
-###############################################################################
-# Conclusion and possible improvements
-# ------------------------------------
-#
-# In this tutorial we have learned:
-#
-# - How to write a Trainer, including building its components and registering
-#   them in the trainer;
-# - How to code a DQN algorithm, including how to create a policy that picks
-#   up the action with the highest value with
-#   :class:`torchrl.modules.QValueNetwork`;
-# - How to build a multiprocessed data collector;
-#
-# Possible improvements to this tutorial could include:
-#
-# - Using the :class:`torchrl.data.MultiStep`
-#   post-processing. Multi-step will project an action
-#   to the :math:`n^{th}` following step, and create a discounted sum of the
-#   rewards in between. This trick can make the algorithm noticeably less
-#   myopic (although the reward is then biased). To use this, simply
-#   create the collector with
-#
-#       >>> from torchrl.data.postprocs.postprocs import MultiStep
-#       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-#
-#   where ``n`` is the number of looking-forward steps. Pay attention to the
-#   fact that the ``gamma`` factor has to be corrected by the number of
-#   steps till the next observation when being passed to
-#   ``vec_td_lambda_advantage_estimate``:
-#
-#       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
-#
-# - A prioritized replay buffer could also be used. This will give a
-#   higher priority to samples that have the worst value accuracy.
-#   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
-#   of the documentation.
-# - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
-#   for more information).
-# - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.
+    ###############################################################################
+    # Registering hooks
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Registering hooks can be achieved in two separate ways:
+    #
+    # - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
+    #   method is the first choice. One just needs to provide the trainer as input
+    #   and the hook will be registered with a default name at a default location.
+    #   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
+    #   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
+    #   can be cumbersome to implement.
+    buffer_hook = ReplayBufferTrainer(
+        get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
+        flatten_tensordicts=True,
+    )
+    buffer_hook.register(trainer)
+    weight_updater = UpdateWeights(collector, update_weights_interval=1)
+    weight_updater.register(trainer)
+    recorder = Recorder(
+        record_interval=1,  # log every 100 optimization steps
+        record_frames=10_000,  # maximum number of frames in the record
+        frame_skip=1,
+        policy_exploration=actor_explore,
+        environment=test_env,
+        exploration_mode="mode",
+        log_keys=[("next", "reward")],
+        out_keys={("next", "reward"): "rewards"},
+        log_pbar=True,
+    )
+    recorder.register(trainer)
+
+    ###############################################################################
+    # - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
+    #   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
+    #   In this case, a location must be explicitely passed (). This method gives
+    #   more control over the location of the hook but it also requires more
+    #   understanding of the Trainer mechanism.
+    #   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+    #   for a detailed description of the trainer hooks.
+    #
+    trainer.register_op("post_optim", target_net_updater.step)
+
+    ###############################################################################
+    # We can log the training rewards too. Note that this is of limited interest
+    # with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
+    # not by getting higher rewards but by keeping the cart-pole alive for longer.
+    # This will be reflected by the `total_rewards` value displayed in the progress bar.
+    #
+    log_reward = LogReward(log_pbar=True)
+    log_reward.register(trainer)
+
+    ###############################################################################
+    # .. note::
+    #   It is possible to link multiple optimizers to the trainer if needed.
+    #   In this case, each optimizer will be tied to a field in the loss dictionary.
+    #   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
+    #
+    # Here we are, ready to train our algorithm! A simple call to
+    # ``trainer.train()`` and we'll be getting our results logged in.
+    #
+    trainer.train()
+
+    ###############################################################################
+    # We can now quickly check the CSVs with the results.
+
+    def print_csv_files_in_folder(folder_path):
+        """
+        Find all CSV files in a folder and return the first 10 lines of each file as a string.
+
+        Args:
+            folder_path (str): The relative path to the folder.
+
+        Returns:
+            str: A string containing the first 10 lines of each CSV file in the folder.
+        """
+        csv_files = []
+        output_str = ""
+        for file in os.listdir(folder_path):
+            if file.endswith(".csv"):
+                csv_files.append(os.path.join(folder_path, file))
+        for csv_file in csv_files:
+            output_str += f"File: {csv_file}\n"
+            with open(csv_file, "r") as f:
+                for i, line in enumerate(f):
+                    if i == 10:
+                        break
+                    output_str += line.strip() + "\n"
+            output_str += "\n"
+        return output_str
+
+    print_csv_files_in_folder(logger.experiment.log_dir)
+
+    ###############################################################################
+    # Conclusion and possible improvements
+    # ------------------------------------
+    #
+    # In this tutorial we have learned:
+    #
+    # - How to write a Trainer, including building its components and registering
+    #   them in the trainer;
+    # - How to code a DQN algorithm, including how to create a policy that picks
+    #   up the action with the highest value with
+    #   :class:`torchrl.modules.QValueNetwork`;
+    # - How to build a multiprocessed data collector;
+    #
+    # Possible improvements to this tutorial could include:
+    #
+    # - Using the :class:`torchrl.data.MultiStep`
+    #   post-processing. Multi-step will project an action
+    #   to the :math:`n^{th}` following step, and create a discounted sum of the
+    #   rewards in between. This trick can make the algorithm noticeably less
+    #   myopic (although the reward is then biased). To use this, simply
+    #   create the collector with
+    #
+    #       >>> from torchrl.data.postprocs.postprocs import MultiStep
+    #       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+    #
+    #   where ``n`` is the number of looking-forward steps. Pay attention to the
+    #   fact that the ``gamma`` factor has to be corrected by the number of
+    #   steps till the next observation when being passed to
+    #   ``vec_td_lambda_advantage_estimate``:
+    #
+    #       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
+    #
+    # - A prioritized replay buffer could also be used. This will give a
+    #   higher priority to samples that have the worst value accuracy.
+    #   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+    #   of the documentation.
+    # - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
+    #   for more information).
+    # - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.

From ac6c83b7e7fedfbdea342218ccb4b96e6cdf8687 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 09:19:38 +0100
Subject: [PATCH 74/89] amend

---
 docs/source/_static/img/replaybuffer_traj.png |  Bin 0 -> 252140 bytes
 torchrl/objectives/common.py                  |    3 +-
 torchrl/trainers/trainers.py                  |   15 +-
 tutorials/sphinx-tutorials/coding_dqn.py      | 1377 +++++++++--------
 4 files changed, 704 insertions(+), 691 deletions(-)
 create mode 100644 docs/source/_static/img/replaybuffer_traj.png

diff --git a/docs/source/_static/img/replaybuffer_traj.png b/docs/source/_static/img/replaybuffer_traj.png
new file mode 100644
index 0000000000000000000000000000000000000000..64773ee8f784895bc0c3a6c8b4a390bbfa1ed7ee
GIT binary patch
literal 252140
zcmZU*2UJsA7cGpUqS7oV9Z{+ZQl%GBX-W|!V2~nWfJiT)hhjmc*B~XRfFNB%XhDLC
z(n7C72mu2|h!A>!fc~3%@At<4#yev;jB#+z*?X_O_FQw$#fw{JhMY$Qj<T?@a2gw3
zw`5^C!p*{RDC_Wk@Qv%~ogMJk5g#L{KMTtxvAsY0g5T-}vapD<7+=@33eNpaYQhw@
zXKnr+Jh>vVeaPjJ|Gw<QiR9|z>2lBfF4Lkk-J^?9Y?I|jdNy$P${+9^f4HT8z4suG
zCN4T3agciEZ|K$Dq}dAT*ABf5XOp$u9}-H&lPhPH$W3^>&OY$)e=qL=RUz=y|E`bF
z>xBP(W?2e6F3bAQztkE_OC{;|dvK$vRP_unq=`7PPvJlRb>(iVI248WgB|mbey>nk
zdV{MIGxTkNbriwb!uP#37g5!-`cd5YhgzUL12uRAjBgUWfF;jAKm7V^Y`*AWjhjzf
zpD4uU->I-hlD=#(cdvYP@4?wfy}=Y07t3=X$<1Q{VA7<wQUAQQ&k?<2Jo^0&VlDDA
z1V5oR<!++-X%y_K)1ALJ3tkO+3P`)sKKbUdi+?OFT@RI=zSQPxH=6PIE(&JB1A%Zp
zY5Z9ph7h<=;LOj$QZKgl?+~X>SeFg=XXS>Iy$`(eiglSr37$+uRl~F4*|sRe$OE@t
ztx*I;4gVcC`u4n()gcy^MRw%BQ&lg$aG?+*leTzk@BRqKeW^La&oxi$r%*#65E!)5
zx@;jZY$PvS;6u)_Ke^S5r!rtAdOBBOEG$#_WB)xpU#RVaXCvN_%vjaR{LDxd_qAtC
zXuq0#v=I-5Ko069iyK2A7Ch+77tUX^wnbfb8v3eEa{x~mK+y4z!ojV&nSYVj&i^y3
zK8RXR_xQ5}MXq*rzL?`}AD+DKv2Q=IoPByRyLz!^hV4Y*$EtKzmMB&F3p{uYa7`R-
z^DPWYjPLwjTw1EtEAjfA!YN`c+o4nGd@&)SksTNln>tVC{@w4Lm~@L{Nkt8%7dj%e
zKcida#_mC_wH4N{i!2ODJcaYMUU@U@YMS<V_|84i&;BFmTdMQlzPBTqQjKPXCKIop
zF&%usVZg%Tlj-o!T7KG^`xTedUF7YGiSi_(n->{Oc@9lh^-3D}6zv(~w7I=K-hhKa
zi3@=ye=f6A*e@nh@SjrY%mrmSBh1*z@vOA4n{OQvoo-~D1kX-rb|>TEY;HSCkNsp&
zwl)OMtE6BglH{S1VAtk5!4(fh4c8qUWBROsL3;`PZv~I6S893cTLkAxk)jH{(>#ga
z^q?r?R37~gV;<WY`WdjTl@&{R{9APFKT(>~?J{QO0!1Cvd!034W`QG1$BK8a*KoA>
z2uT^IYW2qD^BGyDiaT#>T{e1Q{M9|hWAafJqM&D0>-_gaZN7&_2ApJ{@q(>Y5AC`O
zLWzatlWCu{zGm`mNJh-HFY*%$fjQl+j9q2ZphonX$%WYaO%4p$^{bM#p57{gP-G)%
zF;Me}cl$=B?14dL0!EqR+6fuWjNOM1F7>VkdKhRXAYwePqhN~ZKa8idw0hq@HyIQn
zqus^H4B8nJ!VPp&pYr2%8FPoA2-2++7Y45tX_CC^T4uF8jZ9crqKJA}j=h(PqQ!9)
zdfy8BSuA>3S?C8dq984q&OE`>!M(lm1Gnf>UGnUQ$)L%g)qWd&3pegJ`gc$g`vl1h
zK|LN~Ekz9#S$FNe1_P2RzA*OfNl)tKW8a{=j&yRfV}&&gI$qmE@@fmf;@IPcILFLV
z#ZfTh_Z1QCG6Vsx$8+J{Nfsc#OmW^92hjqqOBV==N4%fsORs2+zP}Z=ah~7A{E^8*
zrT!NLNjkQ)v~%^Nf4m-;iYvnoHX5ke5d3eWP}aL?{rh3{-+IK2QEB3E*9MuJFHkit
zu?Y0%v(zc4t1$3r3!bM=d}3@K9?AKVwqPw5dHWUrePXG1l-{5+Ma`>Csnz7DL^Wjn
z;HwAGc!Y3UiW`)Sqb5@uBuj+9_204O(O(cvKHctvEw@h%$#Z2>JV9YK!PFR$Zo#v4
zOoWbufr$T?SL>w-nEB{)TZY+1$bH=WKh5%?inf%fBgf?^uS-i;^$CTlcS7W~L|(5T
z9dQ#9#skk|jqCdnr0KpMmumQ51>;od*iMXH)o6n6d3ygNL<@oxCdUcZs^j^8+0nv@
z@8O+uhxZ@xbNgOk8aYo(w;omPAC;dA3rM_b#5M0LuzKZ(KdWX9cVQS*+g8$|ZhF#+
zRa|lngg7LF8q!GeD#>;D_6Xq|>;h&Ys?FfPXee7o>Kdn(mgY<Q3Aw7>S16+OVjBv2
z`pe4y$+OGygUVoPSYBQ_rr+=LyvH{e1fFJgrL;45c9D96Sh~2R@rgOw2A@U#>hRth
z*G0L33=A$CXTBIZQ}3A3Cz(V_;0-LrhA+nTXs}UL0Zg^>4Mv`W=%17@jjN2$ekuOq
z`s32lRJXCEc$gvl$?wSm81Fkl=a)t2-?X_)fP395@|M1`cT<+ib_VjucX8u^vC$hX
z>aGGZ7C+1h284hJ#Zg4@)6OU5Wd(9KEIC_V^2-j^n^C)5gLu}}gGg%U`S+IU)9$&w
zXaFBF;F%cF!{g_Dg$y5QD7sL77!RMXLM5O&bPt7kC3M8maM?k-Rj1pP$Mw49<3myF
zWTeF_1^>tP-y3MHMgyyK+;L4XTJ`^}4@~2YjRns_a(w)Vw1rUb>Y+#HSkYmz`F%PM
z5aVyUmAeasYnAhc8cNbqc@F6D=sVYnhM1=os=ZX_aE7AFhHrGf{~*ly8)1-_D!#1c
zy%N#;AO)x3=xnVHp3->je_OXKNY1@YE1aBZ1VO>%<m3{b+d@$mJleXt+)w;VOKs1%
zvAxE~JQnSP3@B^c9v0wP)-taH`R55n;O4_u#i3Rvrz0XF67$_2{FR?`kethWPHNZj
z2}uxlZgSAo)xBvcOL)j}9G+d|(NJ1C{P1n`t9<DZS`9S>1%n@%NZdj=>ku&FBSs7m
zLq1ji%l7csu<EgZMU%&>czRtViOxu*3<;hQ55AqGo>kO9rVC!2|H7(Qq?Ty-vsun=
z2Iu8$obVSR`H8XH?V@BSdV!;5*;x2wpPx{%{jI91s_a4)0%n8RRQR=aq2Qo|AYb*&
z4nk@)Ar#djpPOv;w#}`M*geJtRuR%y_is+inPTFfsOC#Q3>Z#TU<@cHQ0^4y>08W%
zToUe=A;eOG8%Z08?}{AG)niYbzTf#&yRZ<b8R5~QY)WJoWJ}P+Rpu0xaCzT3V+c;2
zn%jz3uIa@%DSx>t^}&<K*aeBZ2u_{!Bvuwfg9KuxuZ%g6x^N#3qn*5FSaLu(sm`J!
zFbDPmi)HaD1fp+Y#ikfBbwzZB)EIPvZNTArNM0rk&KJY=I_QG&6$_pLdyjfe5HB!j
zqVdC5rKNeVUX_)uAe$$9e}~V*@t^y7*>bH^QK$4RjvY8*i9R_5?p?2y|L>`;g$cy|
zUUcJgsUw=Q)29`!%OH8w7^mNXjX4XkY%DB&tw!<xyvkG{1S0X6IaX`>?ssd)u}lc0
z!uldV+X;}kE&+%VrI|)mf3`P&-q*Dke=v2P?Y4HT1;;q?=<9s~Ur(fvecngai5aKL
z5Hza80*p}VwEKE_R0-n~ZOfbR@4lKnz9I+pZt*;YXG?Xb0T}E<=)`nkx|F*CZW2FF
zSectU&(Xrn*8L5>aIYRKw=T<<*6O`ExK@#Q7z~K*I$<%UYw%*pEa$XEhgFxkx%tn}
zKN`wQANti5f@hsSaUrcm@6BYvx88zZSN^IdQtEDjkxfm*ppw<;=l@2DMaqfaJFS15
z`yKe!;h1bAYe#fU=Mxxo@pH=q{~BAwACk^{8QOlhQEFl7g%<fscJKER`TDWv@N7I?
zF6RUR^F_Y=uYU8O77eHR!?rEx{2qcF+tr|j1UOOwey<nbnp#oxhQuXPH%d!Q2C<$*
zUm-kylO_t*U-r)&1%ZGSs>*>thC`b~emw`cNvJJotw(7JEIn)Uc&%{Ebb~+6wqivK
zK&|`N*gHLj*DKF*t~~O^zbKa=fH9xEbkKjKDVA`Cd{Ey)+;Q~8g)n)g-hzB-2M324
z@(@U>d)GaIZ4^nFGb5{L4p-R<5KHpnVSS78QQATvQ{I2r$ZS1wm36C)Kp;G>IY{sr
z9(TXR_2EEkY<@eYTy04!>BZ;CdW?*O)y6&T8w#&r&_>cF0w!+#^TB^_FT&lAA!fj`
z4#YnANDtNOs`Vr@(!`HE66I)lb|hnN>G5_mo_;e|l%UA}qDP*i&9?{3U{7*BeIWWt
zCr69XAXW$aT<C{%Y^u1^QtaCax*8&n+A-G8_m$htQRTn7BzIo$#&4H7mU!f{{3s%*
zk+gNyQ1y#)x3cMFqp1)Fq+b=xUNJkB+R&v3<7;)Po)daXX6zcys|6#11Y`$AP<Z<7
zf6o>$9lmR_jrQXh-9DYkKP9CX<|-eEaLiPC8!z+~3Ca;Hae*Boo<tG#1ZyZNByaTb
z-9(B>&LaW?`GVgHk|Ex4#W1$jtWEq*+||Ez{+0F9=d=;c8a87~;U0zsV)_<Cx5Ib~
zJ{TVvb>G~)arupdtY5fPdi7#mgo<%pf9S;GFXOYWj?T#g3TCoYIQ7pHce8>kmHKTB
z!w*KeyEkvorBbIpDWi%{|88G(`=kBe9M`i})kDVN;5b}1o}e=ZY#$OgmQn?L#uozD
zn+a!H<ii1`T?j-Y^AaWDdkHJF+OzRP+jaz7YK4Cgz_<-pgxZh^LwGu!!Q8ljZlH<5
zFzpclP0HNM{&`Q2YmS!ULfmkv=5IJo`tsy}$(^8PGWS}>Z<#XOZQAg;wje)s_k`hZ
zKh(Sok8jUcqrUsBY>#B$>RA^~&}CeH+QJ`#L=L-=`&p6v>pOv`GyehTqOzH~Y)l&c
zZwb7`Oe5(h`S!mN6Gb${{to=+D<hkvXi44-c$l{4qgkaKrI9$e*;p$&+Qx#FDeITc
zErOF#g10Yr;A%Z-BO3mNo;9rS5u<AbHQJ_wp~t=`n~phaKOb(G&FW9@U=E*KK}L!;
zvtmQtieH5uN#e#hDB9MNylNhZcRTToG*;h1T|Q=*b*t*9A>n7bznJaFsFH^oN6`_!
zGtn>Ydb0Cw_wFi;3UT7<D>W6xsFLtNdz-kMX4BhS?tBK0MU{SBT%*$pL9s`u{as;u
zMLf8Cn9orpY-m9RMNQ1vW!xM78lqu3h>!8!YE;RKaeBu|W9~ZSm@+l=hwmoSX!BLA
zdX00A49(=r3*7zEu?vA06FxfesnmWP^T6^w6Rf&i>x*UowzK<Cf$M-U+plRQj)D)q
z=cktio9|#fn#u0b*o!f>Q;bu8W<uXe3X!vB?UGO($GvSbz>y?$DD6}mLofGsPg*Zh
z{di23I*vB|oFFm$*dSr@-un5on#?XzySg;iBWY5s2yLiD|F<|~`LrNr8#p-6Ngnso
zENcGJf4sNwY<=|+2gdMd$<?rT*vMgn)o3n$<}QA<SRsYOlgEqqDgdil!&aP0JgR52
z6+@cGbs{XQ^gfX3Y+QQCX6LIiym2Tv`Zw*y<-Q-f#SSI6lju8sl}W-HE_a&EEZZqe
zTGx51Iu&0yC?Ag>)Em*(u%$@ABbbk_gl>6WkMg1l@rV_`{Cf)28UOGCpK=u6d|-c>
z6#HxoAM%AWO{IJLiN;=e#@N{r(zk#>kjA5^ZgJ!BW)v(721R~Q;g2u#*SKU~hK`bs
zojHT2GfZb;d<JB5N=?v$ni*Bbbp6dtZ2rpza%~n<8~*M}3JeNGB^;j-U}iX5p<u2D
z>$OZ@coH=lEFRNvmH@=7M&Rx=mEL`y3u-yFH!;TWcspMg^*A^cf8Ztnpzb%X<xy>4
z7Nb5jbVwaW!X*B>r4MW8JqFo$>%TGFoeG(JScCiC=yU8q*)XV2qFpRM9yCrhGM#dB
z=E{c!0Z>Eo;%)n|;UY~?Jp71CrQ*YqTH)9btXWPqo2^R4<DOUZZyI}K1!MY0EhA5b
z<hf@v&mExp6zwLeha)R?M)Tn#{R`vRtUm_x!<bpHe;ln4dP)SSi6-On;n5I_0kPo6
z{prz^d{BbZ|F5@u35ty!N9krFH6%}&-A*#_l<-VQp1AR`0}O}Im)A<|QpFE+$+vk+
zH#Le@bXC%RhFq%K*KjmA&j$6Ds|7-3M&wZeWH26Z`<%{1)q1_U@i>rkdrGm-vN=6E
z{ifWrm8sNT%fXEt>Q)JffSrzb-JJ3#K+HUC;JD!l^4Pe;=yMZ0i~~Z_IYPa$QuEEp
z=>z_qA@O*B$4&g119}!b3AVB}Z^Ag<wu?JJAeTGmMS4GPQTw+R_k!3^!q^fr>=}Vv
zZ<R1UM>c!vRR7!UOr3%Mg=?>aio9xUJ9LHG-c<Hv3Mo6ZJrGY)_(nibR+Ozyw=K`c
z+2>prd8u;i4hoUQ>jR(ZM~~NP|FMjmMj*+m90d+Xs1Jm^PxoUqozJAh=0ftY5e63G
zksc4O|B>#m*_OT0c1+Xpw^O|_k_%}xKcUam>(ol#SX`}vnt!jF9FPom7b_ELGQNX?
z@O(>HyEbZwQqW#z4us=FFDJ+AcW&K3?^u{C<u7E2pXtPuN9YfHouFKdg+Ui?Z;JHl
zU2afuSk41fS28RsJXODpQPCeX`Roj9#h*a()>zq)<rV{`lNo*wz_U?W&rhDL*W$*h
zYKhMrJL|pd?(pbma%T+p*0e=Pb+2k^CdVzfRI4)(MU$H{GjaQfwKU2r(h$yadFF&u
z_D|D<@R%???C(nec8_@|+fKUWSSM221N7FV)EmCOEDA|7_eR=l%yU~7!9kvWF8c7<
zN0{kk6%6VpDjB)uw+5L}Awyb+a5>weDSjfo%BG_TiD0`|o)(vGCsORxCL5kdiv$PR
zs{RcA_)q$j*K@_j`|jtp@`jg~^V;+1r;5X%r}aNLRMin>DR(ChXDJ$dIhDy_Zi}iR
zE)?3Yoo%cO+<5;nDqq^VEJNA-7Za~?_~XZq99`X!6^N%aoZp+nkEYD-=KH{^>hCs^
z%q}E(5|JUvagQi!E6pGeTOVn8@10RV98zEt5bSTJEt)AtfP8T|Vt_h?51+f2u}3>G
zi3=NU0;ut-MS^m73ez(5%^3%enT^Zr#gddl#JYPTpzh`!Tt+S;=9kfqJxJG1V{!gU
zS|Su~rrxX0U^{IzX!R$xXzqzPdidpbUr2WY+W%I316~#(Vp?AeVzo<(`z(b#<fGu6
zr+?=0j8*A<>NWfm&j<TNlBj)kMtju@GVG?WkYJ4Ons6GeRiY!V0-lY2#*v0F!}~|y
ztTZSTuu(EKZ}i-lr<LwT^v)%9;ODeveR#3o0$)d_V)JN5(19aUCIu?)IaOVr)teg#
zks<@~L$p*Ef~1c0FyB24>xqyhq_#*^qfKv8KgDhjCWJ!^5Qs0Mfj5WCZpZyF=!DWb
zHlJ2)LJA_f-b&o2hp0zrLLj6sQ50kw1MIxFJq7xU=T-f}_py8uo}S*PdeyOSeASb{
z16^}|X-;;fIirReDjSIliFju}QvJ)FD^iJ}Vhwl>z&CmR$>bl@537Fa^xbV~4e7Yw
zW+0LlI8vd+am{)(`$&A7QGDG+;}b3NhrIrVQqH&~Ana8SKPoMK;GryhV!<Rxj{liy
zVBPPj_Zo#xjZkZX(^Sb?;mC9fd+$UQota0~*493#caIoIK7Ky-YSER!=I^z#vO`eR
zI)vM^q&ek7{{w@IC47~%JlkDVg^gGeJt4iii|6vnqE2-#FCZ1DA>X3m*?!&X=hIIB
zh}tSkxG}gE>z%WGV5GivlA>Ne6r%h$&rh?BmxaYHwQI}D`=iLJ9s~kee|epZ?Njs)
zARENyzpJ|Jg1tln+hd(7wCiHT!M!a0%4VMU%!#koGbwztW7SS+l0Qm;qd<-~AzugH
zJnHuln!)CL;%=2-=$&U5yZR+&g=Gnt?X$@m0GPdGbf+bzctA_@tvLW+L~g11s}@O$
zxt){}p;|&1GJiEa_})PgN#NM|3NqU$r|_%4L%q`GO>!J{3CcI{PU8rQ8;1TvAnZcO
zAU%fxz%2hUNOqn;vx7V7&?urnI`z^~6T#@gK3TfT+!VoyZ)5%b$Xz3$HrA}<?~P7;
z8Yf6g(w>+uZ2$KFpY~|01x6ULmY6k9q(?MYF*LmxHKS{53D!2v?OZ{S5o(MA>sU9r
z%ApFDI}4cRL6d^|HI)fyyRQ(T$~h)v=(-u7G#zXwQH?~UK85CLp`Sx#xU@t75*!|g
z8)$nPio8~z)uK+=fNfi8kEgpIPdF2+*EZ2-2TtAnWBTO7A}wtUgC0EvzqK3?jJN6M
z5t&+eP|5|aA&#YFmP`k|RXEgogg%kzNo?UZ+ny+k`{q00#jzmBeF5;rl5jHP*{d3r
zobH#_qdm_8LxL7*af1zHaQr5YY@JZ>7jK2bhE<iJcbmL^m38)iXjnVmlm{Ct0f<1{
z8F@<mp|8-*3|d**$;6b?-eRLGMr1cdFD+^>H0Y;7>cK)GjcO05H972SduydirC<vR
z=Y_44w<|<TPVizK*7<d=3|0y%5gz`zT&s7Hx@B;Gh;{teh;=b<he2gkjhB@6k9e-w
zG*FdVjDIf`Hd4(=$ekoE)JC*vYuqc-Io^4}Ff+5;%uAuFc%pV8lt~Y49A@4KV`ihf
zn*m4h>&zhIT_HhP<Y~?8faqzWaZ^I$hX>h33r%2)h-7bDpqc?|jP%Q}p4*OH&+<T$
zM+N7->IO9p!z#=|l{s7buHQJFSQ#;o+n#aFb}5jb-FH=T>G#CwOz3b!N=jRhi@Mwi
zSxv+|1}xI*gpH-zO7yVtd3tz+aPwlxd>A0c%C7HjZajVDH6z4Bxhs8IJ;|)8I6!iJ
z|K6sVIPAHJdULzb^t}SkfN2pPQ5Hl&kL&QQ299~HFEV4An#cyDAt6g%Yd83VM@l1Y
zXK<D25LdC_FfrnSHRIeCXUqG^xVQdCrMu_&(B?U504a7pd{7p|jJD&RT}rDRN^32)
z$K2i;s`+qyH{K+2zC3L(F~#fspHzvzs2g6{E=_&&oQpt<2#KGu8AZ_O#R&-J<jb|0
ziso|(pAi(r(FFWe!IQs_4=%E2z5t1X;BN0M2zU?oHe{Qee+5Ew?%>u?z2>BvK&jX0
zOYdu~6LkW0PYOm4Lp{vm8cganl062MiHyAITtIa8E$9EaBKFNPvNTzGsGSC<+|AoQ
zE6?G)yt^3KIK8-oWWeVFRo;(m3c8Y6XJ3l88ShCQ2CqW-7vAKhM4xvm9xjUkd!x(G
z;eu@umELaH+c7Jn+nt0OX3C?tIg067gVwq)PWOCLSKGN1z)qbik>!cYHIyRVp1%~b
zm!W0*Tqb%_xCfLoPEQw*Bp--TifN;}H>_U&xxMTAjxJpNM?}hW5|CCpJ7llA1*(6@
zI9yJKIMmkDBPG2nSt8e}v{Z>D(-D+~W%$~3g?*IG@4G6+<%fO)Nyb-n4Fa1ozI)NU
zAk@uly~$GZN;{js<;PGSfgE&r{8|UJH+_4s-sQUPco{vk2d9)|v76Q0f3Zww_71r&
zXgvtKU7SGiC8?1S1$sfsA79$e;QO+MI=nMmMiXcb|I9nh<MG1>4Rk`3dj*j?RMoru
zv*`q2Y>>zgra;CxP-SznMLu!TJ8_b$4f_&oh8}pD_!w&oyYd~>EN=1TzD^Blb4rOL
z%@k|sEd}Zr|6seVskf^JI+4w2(kd*bcKY+0hOYHMQxX^@k(h$_GE|j!vKP+DN0w96
zL@HmMoQG(BmJ%kb&wjMRnw*I=$9*=Jc7@)Hz?T1c``uVL>4}aI;dGC%Y+5uMvE}^t
zTBNeu-by?PdJI6SF(W-DPS_EYi|nmh?}U&(8a2a>lZ#`c`d3~>#xSJ0Wm&Ja*1Uy}
zGolKa3*km1-U>v|nr(wVhEOGU8j`)s+jLRMm5@d#HE+^yl<O*jUDl%NH+l^fz!E<5
zA^2LO-Nt5|3%Msli+%;e){B>WGHcXnu^nyi{`d_w;6zG#4!zobGt`vJqcMx~{XmPs
zYQ>*%iA-UBc@nUpK8N$&a`$MFD1dEe_B<ScijLaWCE4}1J?r{+ngtb=UvZqS8}l&3
zc`Cb{!1<u+h!ycA^<y5by3$=^Xw*QLFPwHTLU}gscTgSC82T0-I^b!Z8ai*wY1uYo
zgvOzd&{Rs-N}j4TJD>x?X=Pn9RQTGP>7kMe7u4#hIjRfw=2a#MUOo2JH@9f6Wb}L4
ztH*;TI(vM<C7*u+_@%Bs?TNLwr=#HJxJizVCeeWA>}tBH<rbw9P|Mqo5is&`Rceap
zP`;%@bw8=*D7btDDr^rE%@GLu<>4XxtNxaEM+JB5JK@KLc>$F=R6a!I_esJPtj9Rc
znL|iyi+Sa}e^<!(pLf1Hyb*S)mjX#@H7hcHj_-0pPHBXa>K8B9PG9K9Y^#6Yr1ZOu
z!A68%@lL%XYfXy`vAxU_-4j7(DkG;SI@tvLPejfy#;~af(F>L3FpjJDu0mxkKWd0G
zU?XVgTk2?;d&a6Vo=#=Uqh4;c5>lzot$HUN+j=XDSH7XVGJ?xYX-g|R(!8N_8Bjl&
zuEandeL(c(QRTZI06OnqF8~BGtJbq=IMxBkQV3ff#BS6ybp|k|y{prP*~+GwH~jOc
zxDu^i!6c`Zs3AjZ83Jav@n@ZaNEq6Q4+WbI(fOE3RIsx~YxNRV?TWqT@0!Gv^sZ{f
z=5N%CIbTTXk&&Rg*>19~&vc6vL@W9VwXJ@f&ZaZ*G#nHKh{lX&8G`8HeCgDXMc&TN
zlOSCV_gY9*`8obl`iWakrqZK&jt&-oSgIL0c=|f__cAC98E1*L@azMNR@p_py}f&M
za@jP0wa66+WaWh47K?5o81wYzeqb>GaBYDAfrPNFgd~T11sA#6dX@`q=37J#xEV@~
zBBUa_Kz$Ad@p;MN$#qBZ-Gy|5Ia+<dpe0{=M&0w1XYEX$t+l7#vG$Wip2UQ$Z^Rkr
z>mkyfS}F6+^ZlK3YG!%V@pv6Ob2S6cRsh=!DcXzC2h?Dre%j{=exfp=7b*o=PbZ=5
z|KR)JGYHDQ9&#GiaTQ6<jIV2!casUPKtvxrE*s`EP<si@qEYO=--nl@<-3PU*>I_J
z>@)6{9a1hU4gstkpZ`|3Yd@v1vwX@q(e{C&2IMTk`vsbb$~l+rima6T1uStBFo27F
z$~19Ly2&-~S)`cw{Oh4@+fQ%P^YZfYr3akKo^IIu`aSkqwqfA6W_gMuJo`+RT$}G{
z@7KB3)(Z(naiuV5LOxC0boB(X#76gpB?m*)V&L+PXC{0OupR{*PCwB?YA#O6&q@6u
z4EoUZwRG%hq1?x`)~Ii^nOQo!C#6a-2$^7ioiD~RPBOj}0mcSoLv7K9(>X;@6f9NT
z56a!LWXAS@RaAyB5S2v@ah_zQn?X^ub=$m`yh0!M`^yCXyKX9Dd;0C{n|$e`b0cD(
zkMKLonht&=eo0iJ=ZQ&L>~Y*E7snT`m2FpR{7rJ;*(<_G6pXe$&Oj0{g@4nO=fGJx
z@ks0Ea=As1VkN140*b1+cXKfpQ%b;?99mN{_1{r_I-q2NqyI8{YY{fHb2!FHEm5p2
z{zc`BBW=H8Q5ptF$+a&j1;0b`zRF@`?$*AIqZt(Y4|SAH@D5>Vtw?)kYFW@7j~Uvp
zjxaFNEils8KwTca|97e)lila<o*>ZQ-><)Dkw*ox+N9Q?vc#uIF%+(NFAD`z3L!R~
zSH=xyujOK7Y|4hc>S*p&TI=kBTb<Njg_4+07}K=z89>@Up}v*pcpH|6<sFU0$N+yr
zPktYlh7(`jc)dsV&&?0Ny-LfUgd4sye@Q8erQsA$S*MDx*x5od(ziCvWGMj7B=(<@
z^@~3g|GQ`SCli02aLCNRQjpr{r5rDV!y}4Gyg${}alXZ)-#e6(&@Ex}<||39`$69B
zRX4k%1;ie1BZGS2ywQ09IG4%LH!Ml_XMUYy82$W=@zmsKVPDfoWf#-lPO_cP^P2_{
z%HoyPWlmxK32o}Zn1o(kyL|H1_9tLxP)>$HM|fNNRAvvpI)unOyQh<$9-$$is7hLP
zQHi}VP&H)SMV}NbM1|y;yLB{mX4b3{Ze*Lnl70AV{tSI&;^`rIPxws+(=YfAxJ@Tg
zJXCTNYTQ6*qVyB{qtvOdQPe9>VrZ>fm4XYVrirLwbb#Yak1q&{NYw{)>EnVoV#L~{
zp7YYW{4xD*LiHm!WTCCuyJ<1PQSLcGE*j-Xi%&DT8IK20N-gn7#=d%9zhRt-G|d0~
z)W^Q0ho8ebC6eTT7Q-%OYNq7Jfo=U1ie4)|NY1SJ5d0eHDavk~+P*aEE5yV%Z)F1F
z<YtGjPze4;>dZ*d$9bjd@)?^f&LBT3-zcJHVy)TsrC36)D$P@*z)9nv74v&XgS@1t
zbE4*Gf*JdW@%aRGXFru(?*R;aG$BkYlK4n8`0t<H)zwv?tn~L49f(Jqi#arxK^(2v
zu%!j2J`9=X{85PcLOJ`du*`x2+*=oV&~-*gd|(J7BxHYQ`;O6IZEgD~{O(RaKeA}f
z-H!dyz1PZx#{<6v>Bl%3k)L8@YR=XVa@KB}Ub>jovo$w2=Vo;;Djj&RY-@UkO#U89
zN|BDuSK3S$e_=K_EWMg9{Ydmq)u;u}`l)m-9$lfDA2@}3pEcAa`|i{YA*Zd<_A<hs
zqghAI{>T=8Us@SIV-{2H(We_2nA13dLpPuzb9p9pYRZ?Q{#9^UPQSgq(D_vsR3{%0
z!mq~UIpzqH#rTqLm)njPOb68x1!<mjcRh7Bkdbx74l)oNYB69f%~F%6*9%ua8%yMC
zR#erK>A)Ql6KwxFh5bCIz0FCjya-LXCX;!fP&LYPz)XJMRt>Y#9?Q{VM`Z-cy!Q+X
zE=E4P6C@Ruq7A%H@8ACns5d(Fum|g0s4hHt)AcR7da=vd%3CMn?8VE)>!`(zeWm98
zvKF!3E@Gq476Qu}bwWSZd&PekLR=e|!!7np@P3rTvcIv{K`2!T+BCF!)e#NGmH88H
zE-uwnY8UnN+q<KKgo-0BxhZVpBi(5#MpuTEyHDN-w}eiA_G$muxde=ANG7Wf(-ACd
zOLus_LaHTlNU=qJoZzjJ6aDA3qO&O(FIg8lD!7dh5ZQHvGwANaK=E!V(C;vj)wmHD
zw%$4g&bAI^`Sq)UW7}Gjo6+5w7s}Ek_i2m=+nYyfh7UB%EFFSn^<*n8@F83Zn=<{c
z6Mha+dmsYaN5iYfgwU6OWT%jkMQeps3Wf#>C483+b9DQSoch((>}5AApozadv{d8O
zNK!$aNtB5VWoS~a{AIX3ShJ;-lDwqKazk~@E?(3+@t<5gB?E>;UXzb;0-6X$_qwZK
znFqjVJ%>6;UZd(GBmHPIuXU8p#f6v%Qh`bfq82|F@eHhP<{v!NMRrd+U%oY#3&1Hx
z18!1L(68uep*@}gi@=)XhQ@0s4NMZ$^I`t;)r+>3ED6WbJ&A_SXMKf&98?R<2bOTL
zGy`n{NP2Fn-6@~r-!8J>H^F+8&|05XeYgp1F^OZf6)+ve{X;q-z*q=vsj8_=t_}|8
z{)U6Yo<3Wy?AI08lW17(0lyqj!yynT3I*ej@tyz5JlDHA{|V50f+-Me&bNbH9(!{7
zjdK#!hBn5jUstvGDS-Uj*ii*g$M{@ikbPbMnYU(f+l2@02v0D!w;+&zAj}qCS7$|)
z+;XNMh(0-&9JG=N;1nj^f5UB5VOUyP;ViqIY7dh1Jm1JA_{Jc%pUwS~CF3c3W7TRM
z&`*I8(c<s2dq>B`5A(NspO%%EjpZtRn8jkgu!{bd4}31epgOv`1Z6YUgl_`lC2VKq
zpQ$W^`UdXA7x2)K@E*{dUO*WKwwjZ~rt3+Q*?XW&(e!rXuuk6F3p;nEF9vMK-2YcX
ztw+fa#IZgBN6(GF=Cy)Ac=Rn-o^<*Ic`e{lib|aL{!#enP|tEKA;6$DKv=R!inrEn
zksnY-SEHZFU0geB8`xIQVEL6!RX@0A0-fOHA9ok+(8wQ$wLT5=SXbHYDXEBR^wAvU
zY>`*VNpRu=76kc5p!ys0%~^<D@q@_4UM>4EW2zkvbz%X8y4WinU_NOMV=V468FVh-
zMc~n&c;@3(#>!v(L$-c!N75K$TcOq47>(UqM>R8Cj1^=y#3AhGS<j=%&%z0q=Zhw{
zQ7|6;Jy5#mR^s_KICz^c=K2}mB(Ei$6BKo*%PqZL7zV|E{AlEQeQy6|^4OY|`88OY
z)Tb~p*wcMocLU91ntQ&}8M!TLWpyemo9Lf5hLdOVz5t#A2xCiH=B>!o)2;-}1CLJ_
znK;(14&Pk0#1O_S19C<>o$16k2~}C#w~DfF^>k2;&3~T%{Nivz`)YWNtueJ(+4RgN
zh2;e}Ty)i~HUc}3vI4a*p|o^QoOeIIXF{zY?=LI;afF*1$llv%oN8CDx+XhQCmp*N
zNadxS7?~D%p*HpHuUX<R!dvzCoE0qfyaQBY!Fs_$;i%R|^+%$~sy7CeJ&A!r&sMZf
z!hLIH;)+WBDY)nYU<~mja<wFH&RV3O0uScgj+>hIIva}JGZZwN2vp`c^4GuKZFlF3
ziIo3{JJj_>{z2|(@PsLcbhhnt_xNI-1gb@`Z2OQ{qV#uS?tkPB$-8-DOP(z_c-_gS
zMhp(1``HJ_4j4A3;VSR$(f4}C03N#w?!e+h>0-@p*cac1U}ZVI%=s-?4wyXoV;3qi
z4^F=CDRDp33((*`sXC&kP}@M33`Yw{K1t0MJi@ZE@*EOItuwg2yMRkR%5994jPkt-
z3#Lq7#g<DS?Qr3XNoX*J<y$mx{j$R&A9NYaWtF0D|7R3n;f((p%r@UW0fE%qZF<{+
zpd>*c9Z}yB7TtgfPT_31h6PpyY6AzGR06%$lOZd2B#ks1QW-XyH-(P;Ej~6Bz(uc1
zp>4-nswGl{?0p3~{F-)X-`t6TVnZM9MewTQS4|0Y<Mq-M$6a#Ik~y_O{ypt>+&dY9
z453vjOrRT1Z3P6C%+1~+r%KulIjjtUFYSLXhim#_o$%@v7;vBn*djZ)F^?HJU!gid
zvMgb~D-uZbDcocR11JEeNcLj0zCr-n;3|oKoQV<5+%4~`+0ir{nh8N(?8Mn}v6p5>
zQYU2z<6d>Fnz^~;thOM5F;MfQt!ln;K=V-Mw0OS;HZH{F0B0p}VZ3Z;wUJEMw4s-G
z00W0mMH6k~^McGt<L?%sI$W=G{+(}@C71rX-xW(*RsIvTfh?i~;B>>bBnOW4_h?)2
zoYH^8xy`9>aX{}i$r02jAo?sQCWF9js`4Ymq0!OIf(KYP_G1nbYvUg88F^B616xtR
z1QxKJBBFaDHRX>gC<WPkg;XB}#z_-aW@q(NWxu_J0d4GI&A5jO`fR@XmM33iIJK<w
z#?7$s2-lIaEKp{KN@K6(StV)pIyyKEC@;iy8RH8Lvp4e6UupFMiQJ+Gz|<hsE=+mr
zi`Rd1#Eo533;7INj|21J*9XwR#BJa&`sWHtx-!?4oUMXUH7*E?RZamq>aTVT$&0va
zTTAq$e!-X~Ji;~({c5-eMXlX02~za8w>h2mB7K|`1w3V5S{y9~x_l%n*CU1S3(Y_T
zTf8D>am;l1(R^{UCviRR3J{&dq|w43f6fH;ETf&xBBVykdhC*Pj~8Cck!-Xd0;&$%
z0wxtA-bvi^SGdn~D0jEUDlt*R<|lzmR@ayWc)@TaYAGl0@qe!zr2&LtXFsgTk7I_<
z+RJVZVyn(N+!sL~KfV>FJ^!9eUr)m6ncfkzE&A{b|9MS0YDgr{Yf7rdW5NjQ9L*_k
z0GvF_gSk6BTKKf?4mx0PvuMjwkG#f!@P2Nb&Oqw|*?nZ#>>gD*d_lt{+eN6&G62x5
zSA(PyWG+N^rCp8RdlRptVr8tK9;>&ND}1_!``)qAeNx2Uf+xmljq}iVkQ4F!e;;74
zjCqjv3ta6>*u?8<<-X?7lIGkVFDv?tupfKQ^{Dv4^N-uQ<KzCJvfnV}q4c*JoG&g9
zXel)#PA1Y#POe-88bAkFYzUG}zj%)SP_;KV>$kEk;djAlo+^SRf-q=k9^=n0bNJ!<
z%E~!iv@$w}%LEsEZf|epn(vw6|DV%@8qD_j)Iq)X8dIVyQYrp`#b|825SmwKTcJUP
zG@NTVmhj5qz)5jq%)Fj&VCsogE&6`u{Yi}KS<^uUf5(IrYJ=B>-qp11-)!yTcUo3G
z=Iq=SqGCOXCwNjCDOaq13LV;*RZGn6NKo$W@@{$578HMEOMBjA{YKn4nBI3Y79)C8
z%j}=|(x*+TjZ&|l@FvzCz|AyM<CuFbAN9iPb7`~{)}P`L^7<AN-`Elrw}Oz0ep9je
ztfWYC9Z9u)RqI6>$u>Dju0rm0>4nZMalh>L^Ztv208>%??SuR@BcLcKDuc~cI{_e%
zNFIHQ=3c~nJgT7*>;ed6G8!rBn|RY8Rp7x@;K>z$=Vg&`>TN;3$Su$MJ52)#+}9<j
zYJ-i@RumZmDAK$h?DZ$0#EpI5+g8|(HWd_K0q=Qba+IS*v`fhV0s&=z_A{Y2UwfNL
z9bo+T`F4uzUv=s5BfO2-);tLB{py5Hpfta$`<+uX$Na;Z+s}HrGzn;6CEE%Pj&y#{
zwj4Xu_)wsA+yr4yepg_8RegutXPi2tcxrQgW-u8eQ&yU40JNo~NepZVxBbE&AAU?W
zEPOEUc-rG_%A;^q@ej%0#Dqfi`3CnjtX9XVB?`Co8wzcWn??8u8GeBiibBNa&Wzj%
zjEESnXkrccX=7jSFwF2Pk_}4rnQDJZl=C}rp?c3!*sl3r>lv6RYJ=4ZA>4c~jASH$
zN6p|7>azlNx_lX(3(u}wZJ4Ry2x<XG@!|bs#t|2pt1+w#J6$#|oJOhQrh|=K=94$u
z@NIoILn}eVT5w#SQ7Uuf?p;F%wCsdIdGw!jPO81&93@=YZ<J(zAtWy>&!)Gi7M<7t
zBXpco72dL@n9PaW#z<~GF>MP^yLEe4C^7$6c+zp_JSKB78`%5n?jEsfuTK1$HNy&c
z!U5%`?=ub$H(B4RZ#9`Bx0<|~aPKiPZQRRtujb8l$HGEGkG8zuZFpz30sy9ccOf4g
zn+`_Cveus|c2$QJOF*ZNu!MXrFO8+MyOx$dtg)SQ2ejVy{<}O+(*yVNx2H-Dv)hH5
zQxow7pnQ?M{Qba94dv#6!>J*ER#p^s;vYaqyy^%T50_?aT>r+oE%EHmN2<VJzB_+4
z=202f%-5uR@Tvuj<D-bTf3F-nK&4$b!*ZAk3><D*3ilm9w2&MyASI5TpN752>M#uX
zrz3{s6*TQC@Z$Sr#hQB&-lnb*nWZ*qNu-j;7rGt*&Ln7jgGlO^GiZ@d|7Os$EP2LD
zo(HrJu~hy_;84KwoUf$y$oMeF^|=1&m(klhdVclXl`G;8*b&*ouoaoLo<!h6l*>{!
zrOW&OHFxa03Y_BZLSdQ$_Sb+ZT0Rwv2+nimYi-BhcU^!DsH{3jrtQ8OdU#+$bK$j2
z=HPZkp$q}cDd#4DDcYIfMIwT!%o~s0`2TRD4LcE}o0A?`Vcp)<gRfu`cNUoVwQnhH
zvp0X<Y)n^CL=^2f*gNixsTh^v5c#%X`rD)uRjVxFg~wc#SV7cwk+!Tc7|y5g>jblN
z-}K*k?C0+=%`SvJXEqYPBqS##y%q6N$>~W=4<WpJ_p*1-_y{!HN}D|DSS|7~#J#7<
zq_M9<+$64uz23f(*11qmnkFiDce||a>}X;7C*^_iuHBmke37hP$7NgozTw@CZrz@S
zV~bMGx2;jCzR~&pY%k^gE(oy2v^vi=tcJ-Vx9mSUBshLY*k?Uj{}MgW5pXLmdMVE(
z@<EV7hyX<vG%>Jb+CU(?f>rprSeOo_^$db!z*{Cmm|w_fDOheV4%<GA4A2Qd29Icu
z6ibMx3pYKe&NJdqj0mm>-9Jx+%k)95OBGlLU-owtW%xI}2Av$TL(Nc+zsrh3prvAL
ziSg8?L@?ozHe$1WPXKc+EO6oEj#XNk)cCujU86($&5i3;f2(auS!>>P?~$kyG&E-1
zQdb~$tF^2K<qq-J`EH*vozD%+8u<LgOyKeI(Bq3O?wq{O#V6(C6<ohQHP6q=*cv+d
zzEY_;^q054WO|vMkD0_PyR~71Iipgs7GY8&4QIh)jOq2e<=`td6FgR{t=KRwr~q8&
z>O(blqYHMgBMJa;S%0N*`@7jS$1hlJ3`GqnOma^fNtL^XqljnKC4#yo%899-ozD*}
zz%JkO%KmvJRdOb1Wf=|ki=Ymb9csGhT}M=C7+VO$fzw1YbBxZY^{h+!(x0{{WT+R{
z&)CJouq$h`@66i1@<H$Kg~>UyhhKh$=Z{c5k%rGH^`!XwP;1vvirS+4&?>T-E?b_L
z2lj4O+%$EmhyOY7#vtjM6vNs1FeV_X(IqvWVVe@}w(7JC3F$YdmB$|mO4Je;zVwgF
zear5S4u?-_MmAZKfHT)S@FB^Ti~|RBm$P^z-J_4s`RA5qh0bV_1vSeJI?`@KIkI2`
z#!FudvS*A`s{i;hnviB|yFId?lzXp+<i)%ySL?}b=hMpfPL{d*XGq}k!qmfIbl}tS
zzsjrP+4sy~d4c<5qvNVk)GUll;^hAEOW~ihLMS3CMFS&qD=p6u0(<yp>govGPZOW#
zvVMxeY?|B%h=vuw)~^6BHkx}-G3&N-MQE}lN6Uu9-74W2Cz-I*!*UXd6t`$#Y!BTm
zre9K&ZT=D4uc+N8pyWPNw>qHG;Ba|}JIJ|1*;G^R*Lnng{Q==xs&!JJ#To6no&X-~
zeYM2F`p@l6w;~q8O5BodUUzt_?M=LLoFQRbr`5X(xcZy=tsTk*9KSNOsWcU!iQx;e
zVUqz7dZ0tCG~xY91y)(`a_dQ2xMvo>Y+3Vqd)>5iphj^n0BnY&M)6kV?$GE_M^CeC
z_rl$nU}f8)l)McK{C(0p-f|JKT}M+06*rhj&vh<5<u|3-<CNA*R$qrkRP@?NJ*&?%
zZIN%quD=qHC3@B2c#Zw}V%8ULpkN3U<L+{sv?_I*2CsOJ$)T(6>367?hF%74v}Ra$
zHsra_9k}fXD{;$0ZERlof_>a*gzdt}keD-RzpVT(tj?TV`t>d3s*wAm+YG-g_MR?5
zhI|FKI52^4FmB2USngXtJuM%c3EXKtman*;`3LxI7U+i8U5^ssd`90Rn!UrxXN%o;
zF>cu|6b~|;v2c9jcVvHaF2!;ZdYzM%x`5q@y=Xv)p&E;<9t*T=dPvJTV3z1do_xln
z<P?onir8juTly6h09Z>487i&|MT*pSkIwB<QrGu7B;5L>sLNH@gpoV&E=7)l<?fZh
zM$bAeK-Tp<h2?C&*FJWyj{&-a)<1^<aGhgIrMhWUUZWm92hYConDSC;D=qdW<Dy0K
zw)wiv8~n%ye0Bo3XH>7z-k;m9b%>Nu4<ciF&<_nNEmh~ezb-@v&R3N!Af(Qa)!U4!
z2W>nPv%~-Nl=PZkPMEX+Q2*||*6^@UOCSNCcC6w{Fy#!IZCJWYy)c@P!mFxnDRuBu
z;-v{SPQh>I8dNx(MYAt2I7pi9riZM2wimZ8V((IaVWwED*sH#eYM09!V2S;Gfj75$
zLpJUA+U5Dc)R8H?Lf21G+dx&bCZS$<n|HW}1n64DUE<E9K1aO(i9&ZGxr2o{d9kq9
z)Rm`Ap2Ktpqf9_IIlgUM>=!m8&`%Y>QcHVwxOB5k%4|H*%NDyEg;k&;w5Rg;4oLV;
zJ&q$760TE44`olK2==><_^+AnH8>e$^UJ33Hl-Ovg3^J`f5%Z<Ly$R7=S4RT7O(aA
znkM3h;=`4M9qpLA5u(QHI~z3vW*1#$CAw1*m8>K8skhx6^ud}WT_uP0&C*&Un$IL^
zd(;u}c%W*4XjXQqCpZnx4|i>sx4o-60=b)zXt2*b76m`)PyWKn`>f2)zY;;>3-Q&2
zZ@)<CSM2Wp)V2nmq{i!?5A!oqPgL^T>R<~haU0u7`z7x0^*YV`lR7UIhUSrN963le
zS)0%*xmbO`ODgw;;c?k(Rls6>jR+tOLA3@r-$BpN*?qYCd@(uQKg+3s%tg$Y#}75L
z!L^~OiJ2XTOpd4s%Lv|boAncdk*~p)7H3WjJ0AzBJg+-#J|p6#e_~?CUK$8iDuBgB
z?o;WLL15JpuaDnop3fo2sNw6^8aJcm%4=X1tHbwf)q{%OlOlpf+_FrZDuvqk)t^ti
zOsI>@wo_fBv$nnSZvJ;hB9}hE*B`ndQO(?Iq5RcI1sz)z`IX`UgRMaSB}k?ED9$Ta
zKvvM7#0MJF%q;vX^}$pWFCsx4Jrr-nBideFm&(c|t4mGg#;syn>9esQj(#HR@46DV
zex@<IPxd(nM}77$OX3Ji?YMa9{z_$4&hHL^7S_ZM_nNY;?_(HY=RH^NeTfC4Yo3()
zg5VV)Fvfabw9=A!+P@_b{*BJI!`%Q<zO(=h^rh(c4-Tg7H8Wp+r0SXq%kl`G`anjI
z#Er$9TfEuWQL6`yipC#2Tsk6$bS)Lsw`kXu{Fn_;d=)Ib$yU6Xl?kBwWcvE+u41@X
zS53cBDhwKu$5>!|8O>D<CMC;&x?KHEKPRq}Q{R45+_q5BTy1ZYRPBw-AGCb-`XBji
z>i*YGs9*6Tyli;B@b@d_x6_=V?pw%e(8*R|4T12-ywUCg!-&!Z4v-ni9E?o0%th`(
zeV3x>5uq(hS>qF1fxr~Ig};^kwr8lX#M6n@QDWY_MMAzfw<_PEG`mLGlx_SN?UJ=>
z0OcfJ$bM<lY>@XdQ1wF*i#wfP0fQ?0z0VQ#cBer4D17mZyi+r^{^tY%;ruyCeCCXp
z99P}c1=Vzg9^Vt7?9M&u)AMc1wlerL6d0+2HvJ+*)5#+r7ChLqEFD6aw2dH39?3YR
z@3hyu(oi6G4OAmhW`pBisgFZ%r^HW)c89x$s>f2k`{s%se$cG_QBn7QUVyFWof)+r
z^+OTwN*|is1l>7cy1yVVjTlHem8P(3CJtu4m*=R-(mA{zt~!MJx{tR~P%NoG7ttm}
zPWu%%v*Vd(Rdp%Zan4V{>6?!2?6!vHh-`5DSFC$af}`r%gk3mAbsz**NFd)bu-y?q
zF}R{hJ&^0~i$}TJ`=D^Xpe;+>3P^=q^bq#vsA<#2w$I$uV6YH5TD^m}#*n0yy`kt^
zL24YT-ev&U3Wt?7bI!HvQtuHr0vj^|-|ltrX8tVwEgGAzY|0yXOLzQSZLr|35Mbm5
z$ADWQ0Y0^XbkvZzZlT;~7W56LKs{#nXVVrX!CBy(pA@I*9+A0l?6zuA$E=Xt960(^
z!$^FC>kA3>ot(4ps`IV`I&AB}#}Xzbm=`}K6Gpidd$si|Mus+`vnFL5qq<O$pxoXb
z>m41QGR@RU%GeA2<GfyM{XRx~F{1*+Hf2*%jq(=ol|TLiObehb+A0flm*xmNn4GRp
zi8xgRpWoSWJ}pxH(!`MGK=gsGZ5^xT8>Hk*?t@><Mm6x`Tsh($66X2rGeOIf;^Vyl
zrd-6wyb9=Un57xyQ88rycvUr}E+r{#F~Uc05@F(&fksefdD{Pfo=<=Db${p4^YRY0
zQ3L!2hN84K!}}-d&v!vUzK*)<-;pVaehexQ!K_2qitLw>kxdS+P;1EWqvMBxwB8LG
zjPd6~0@exzL&7TRo?J4*WdaIR{6&C(y9d)2aZOM)E?;f^m>q5NMlTCIPvrahj~+DH
zKRrWb^k+zfSn>CM_a0eC)qo2{2i?&QoP|{bGv5bt^Ix8nbuV<9;hZ(md(CbWU$BMt
z-`~@q=18cyq;`{3535w1g8+m5OqzxBd~xL+VU8zb_$-tt%K!uv-|^fPdm6&0yMfWm
ziCCo1>+>_VEtk;FiTs`iMPA;z9vQm%d8_6olp19*S}N$AXhDvs3J$Mvva(fSp1vE1
z$NE!kT(_a`Gsb*{;7b`Fu4In?KDS-DxA=W;ZgIBU*?4sRZrUBfPKBvjpJhBd*V?#^
zj%^(?AEtD3N=T}o5BYyySJu%d@y4F&BOK*xb}IGu+>q=6y(K_2f&Pok%uM{rC;xRN
ztI5IprMWKM9zrgN>K$_<KZSv=kdL$eGbDuli(OC@j~&Vue}QGqE(%HXA({3Yx5wu5
zxgE#*koqnuTJY4+fZ0WHD}ai#(ZT~}*s1fEEBYylx;1~V#DJ*30w{D>Vo<8M28{Fw
zCQ%S|L(lk{4>>iCv7Qy)-SC+f6aS{vcTpW}rZHU5jO2Y*TAFD6vx9+Ah|a=wu-P;p
z`W0VfRR81Gq+Iy_NjV`<fpv9TZIE}e&c~iB30tLJ$T>~OETPYkfns9Tvhi;plaJZ>
z9QdsnK(i&^&MpFcG^1km(le)E%TDZ(BK(ht<|N9U?Bj@NgBuQxjy1b)y(!3&QdfD7
zmW4p#9R-eSV^vOkduKGeVVXr9$?LKqXfq4c+yecuaMlk_d<){7RJW@~iNvh%V{C4;
zJIp2H%FBcWKm3ioU&T;y=D9q_Wv7MRT2;-h%^qC?;Q0-bHN{%WfLm6Nx!nz+PD-8G
zR$p(JoSIWh6#2T11BNIh^(vZ6Oj&Vv{JSSWn~(PD7kOpV>gp9cTDE@%E#{|9HUaTf
z64*OUspz05I-!Lc{~v2_8CTU7wT}`KA|--!s3-_X35bAzN;lFSill^emw>b&EhR`w
zNSCyLv~-DpNO$+<j*Xu8{oVV&AMS^{K6p5=_gZt!HRl-P8RHqxV#BuITf%)cAvaK}
zOQEUVO#18c0q1Q7z<A>Kxp$_`UX`*)msCw!-sXLw5u+`N#q^>kih`l9uV1kJlydPR
zIM%oqH^uYl;|nIXBb{~Gq=o4@Hs1R~8>FTQmd8tFCSDfMuKk{9h=!`bmgB!7>M8~L
z{NTh#+-8#v_t)=bYbBM9AN8F_Uq7$guWQGX1oZSMBjAi}FEPwCM<^l0As16{prE2I
zw72gKae0O9k%>m_-oU|Gt2}^VLwzE+r81I2t4;uAH<9hkcm(!CY8eYu$L)JnJ70K0
zLcT$?FtKqE`jmwu&_zsUz~aHKBXj_m+GV~#a4&3+go?3e!Z2M3%|o{OIIGZb4@@{@
zcNT9S4+2pBL=%eDB~a;NH_p-t>EXyoZxf)X?8v8W&8y9~Gfejdm)y}GW2xA)f4_`d
zsEhEq-v;P|da}^t+Wi@&kk&drHj0pM(4q@Rk7dc&;D=_r5V~e|iwRxpW4saj0j{Cz
zGSr-%_Bu!NE@P;5Z#-gX{6F$=&Y!0ZY(ku9+qGwibPQ`$q|y$@oA+aej_TL~Dj!u2
zt1SBiBCT~_O74F<Q>L?ZD7Be7+5P16ZlCq|rTaU~2FVVl1Qrg0OY9(Ul9`VzqRn-H
zXWG50@x`aRe*R~VeRDC(!w1kjD!gbY*shk&{NabLtoX{#AJ_QxgT=Rn_X!GiFPkTp
z6n95NEAXA$CyVTPWJt&B#V;g!*|PRrfp}QwOKW5CQLW?aW0|;dg=$Uc6><x!=cgEg
z=4m+8gSZxzRg4lc4agSW(gdb4LXwmk#BkbQrsw8DQ$TJmi}cvuQSn$Rq?qhn<CA_R
zF%RQC5-wh8|Ez~NjKKou9q$@nn-VnzHDy!Zx&qypj6|<Buc`ROCC_#oo}Gjp%tFd%
z+M<dwbt3YWu}eQKOcTPwugD2j?p>Mu)10tSmAFUBANUeX14Am!50~@7A+y_8YdXIv
z&~H7vC*IR(AAO~#?1e@-xMSbhKiT<~pLXJi{))~(-EL~Uk8^rhVz-4oWYY4WK`}JI
zX2ygZQ(0y2^i*4As4Liw@RA~Nd}fX>-|(6L(~Fyie;R=F$#f6#dC1IZx$*?6wsKM%
zwE?A5njVF6a`Di+LEO>a4z0G5MC9aUwy6Jh27ECww(B#b<Ac(iK6%C4HMsCYC{!r@
z#0chL>Hb0#NDw@+6zMFOUKuG&5%NU5-rQRmc@XOf-xPR9>v1?!vr%_~@R~)MF&QY@
zV0H=gei%@kclcUn${Bxs7o=Ap2>GL---%Sn5LJ{Q?X7!9*;hR0es;V!(8yMQx^3bN
zj;KLv@1Xu%=QU!jVhS-s;CZ;!Mtk`KG9{c9=>=I_8!92OncA`lOmdB_$Uj`*Vqa&o
z^xBdOC}F;)!gi>uDI{O6b357|kQZ7y_duK;<+ht1?~UjQd7K_jou5rTh-QLwO;ZWF
z9d2&Tw!z4~`+?r6#t`}usZ;d54~1Qi@KQfz;O1zptA|H9lBG4{O|`+Ixqq~?;CWKv
zIdEQ^^vx4SJ=+$3ew1?VjBbh?9_vDL06~hN`|*0s1|tYZ!L1}2(zjy3Smxk1nuQQs
z;XShMCu<d5>+nafn*Ei+dXJ;+4BJmz_2*|BQ=aE%ZQ-;lzQV}wqF9T+jp^Q78p!fI
zKk&4&`U8`47A}N13=lf??zMmmZj#k`oE^iM<WmIC_6j}ojTckSPZvB-+dPq%TuAXm
zOm29z^<5}s{3I1KOq|O~@h}Uo`+kaSMt8Ds-`PVT;IyYl_fhX7FWKOJ<Uh-;!LPhO
zF@@OoL@am;dLm@9k)>{eRpjH{T^cw?Y@B->ZhlPe)blu+3xMf6-ebn1MSdMQVp;e|
zmRR;GOhrB7c!d_R8Q|K190B`~<COaWj8^DuNq%q7^Jp%X*7G!i^P;Kx=hZYLB^Y!D
zhuRqJ`Js4gBHUjC<t{v*^W(zv+=Y`u&$AVnoE#v?!PFs}<6h|1d%UuvJDG(qz%@@K
z9>HI}l)_1iuCkx(z*Nk2lHy$OPOM4a#w4vGC!|dX@zMMYZu&wc%Im>;8?C1WZu@tq
zi^|%?dAX3|?iD)E`+T}zQGa%NwBt@s4X5VK3a1r<JAdrB*~wPFsY1TlnvV~(vRf^i
z>_>P$`emBV$!aP?c=TX?rinbB-c9*~Tpy<kLiV9><@l-&<j}WZs2la?y0mey9wi9w
zPv7g@IhCD5aBNdjFP^vBxDEY*PibTK6<>3M6~^Xy6wG=)(~PByKrXSpNyn+P-2r(|
z#7@c_!TBNF&US1)*PSyAFKMa2;t1L!8PoDAF>~&>oxUQ&`NG^>Mx4%K+~zN@CW`Z1
zB>}9cQ^DC!=h!@R99d2dw`Qs7k+W2pn&NT11d}w}HV-QbVq0q<bIdtgPVn><M_5b@
zV^FtMZ4Ju3TW}^8-3#+NwnFOf6vV<;o%rJ_Rxj;^8BKYdtPu#ET4MzwA35uyWBrMa
zJBoijVy_UM)*L+3+Al2T6%)`DI%!GE@=YD7X#@1mrstWa<K>X3e>DM2I=$*4vuL9+
zJbGY2Xj34^3afj_&+~XGi(i5RPHb%xI@19rsEY%|`W}Hm%=A$lvc&ZiUs|2mZ#QMa
zf(LYRkP;z4j0e^fnSXHz5+UpGXTYQBe;X4z<kX&pR2W0~v2+4YV}HVGj}#1H!CMpZ
zC<8J`Q90#3`kA(I(jQxp%TyhUin58xVKt_%SLfDu+w){%%H#a(pduNW{+*Tr%NKy$
z6fy22w4YGF+Gxa>9(I(kMbyTEipGw1%?X*m0g`Wx{#jY&cJ13;&y^kJyF2=)1x8GT
zGb$9H&3_un6dOlc45PovG{x_IG_1h_6)y6$FKmvtd?T9YeoE|XT#>5o-a9j!aEPRj
z{A9Zd%kd{?n$h#2EG;^u+8GM&A=sSVHa*X?=rlA!YAzqb7^4;&0mA#hWwv3Pqb+^@
zkdIjfio`&Su8m;FJuzeuRTNPiTz?KQ38{rai#G!D3c8QNX79^l4r??u+3U2~jMwIT
zs>CcG8^e-@ZqQ4Oaix3Fwkj%;k#=_`?4dk9(N;m@C9wSI#~oM#voNhHw8EK)ZDfT%
zbp~k}U=y-<p1>k~yoF)~)GsJm-lXbpli>G|Bn045ivE%)un!?N{!nrx6j`R~erX$)
z20zhY^|PvihY-((pr1?j<fddKTOdk%HATaYKreg93_|ys$z|y_mb<;6hSo@AC!m*V
z8AqQ}D{;Yj=;OkL;Nmas;kFdeIw(*}idn?{7e8$xlUr1lTj-F%w507TzWY!1hu#01
zki`Tb+w?m`JKu|Cj`d$vOti+x3;@v@S*@Xb^k!Rtzy~PX0=*zrCE~TbRRTn>4PeR5
z&YDR^nh4y%eG1VnD&<)j8?I7J#+#1i6;&0IOWq;dq?a0t8{<}m4Fznd%*`_R)j&!s
zQdwoFYG7n!WS^F@u}QGMws5EIf{H{xO73sKPr=lt^TcTFNz5hCIMXep#OmSdX2%te
z>5}}&ne#4^&E*qvLH;NIXAD9-RBd}}A2LBxq_Z>lTR*6@e0$B(8QG(?R&@A+?ptgu
zX`s%%2|X!30ZFT@9yu${T;OA&_W>Ugs21)z&FsH<W5%jr)dd;;NqqI(gy5@Kmn1J%
zWNyYjFVw0bYROq%`EjT5OWk9NIkv}yf3WrnOq;KTCyDU=lC%;{QtUlA+v<a){H8?R
zj0v)yj^cGcqP?B3BHt!zU*<W{{L+=p@7eA|o9aA6ip1?gZc`gqOxnMAyK608DH+<C
zcvv&~qPonxTerAm5ZFkwAZKQrErY2U)nKf*D1FN_2?UGC<v<DsTC0%~TXs6o-Ex%)
z9hCL{DM6w17^O<T-)G?GUW|r3nZuY#P!EE;3jMwPkE8dQ&SP{b4z=3?@|Zf>*0Wjg
zL)cp=#6)Cq4EUN#yx#IMNdZQlg+u%#DuDRr|AcpNLeR-^tn@dGG&+Q#G@6`~74ajK
zjlVFCPYH`{qFiSrnm682pH|}Mx*<Ay$7tZVX<p$|1NX)6X^f@!^-}JOc3MotJe+vp
z)CF7$&n3!7pxGn9lV()04lxuM6X%tT%~X$rnkjd%CkN3LZ16iU`j|vMbI9M+pG<Dh
z<ml%V5UfVzGCw%RatMFm0s=yv#h7UNM0MG1(p~6*MM-S09olWKkV2$A%gaX|Cv$L{
z(g9h1UyyqmNl$bf`EUU<X3!-5&9Farb%*4l-HS-zvAHmo87FU?aQc5*aP~JyylV(y
zn<cc?YaPsW#v+3pSLE1`F%r1!la2{_p|f3N1cDsf*_xH-32gBHm$7}VOMZ%MfH%Jk
zCi(6ST0D#Im!2E189O*yO#HH}AOg9(o4vMy_l>X=i8D<3*eAzjkisquSjarY>JL?+
zRCfwfR;1!WL|_?<x!bhTGXyf)RUy2D6#jBD|F>hkR^Gjp;}1$l3Xn0#-{5g3Yzpp<
zF5?~GDyyV_8ABh4Ygnb3?@%-O5c6deLjBpaX#&lpp93Rq^X4E*vT1_*z(;#lxBgau
z6jADm#0ytg(NMf7MgHn@p=f*K=Zvfi(f1KUk4r!YpRaXu>F4)$UDba=^cLuLV%kxB
zHDXkd`ZI~!tI9~iROD}{JU;Kx_EiV+{o9y($$qW-bF`sX>G)N!93NORpr9;5^Gz}`
zqA=PIc0GX585urtY;T`Tog*4Et6Y4MF>5b4_IrOmEFYu%dSZVDZmIrcy@pZ(JJa<O
z<s%erFFCEDFr>EGZ^Gw3<kzk{=sb{&ypIu6&|*KiW$T^<>PWpipPbqpiQnHH{`5{~
zr1pg}Cp03>(3Yi!wKn+BW%mw<b>Tw%1e!0&Zm`6<JZv@~#ye%-88|)N=_TOl9-`cB
zCJ_1>5nr2KKrnqzc?1F~2*bi3yd#u3kbw24E~Q8e?I6b9QjsEAw{3MQY^^AG%zp^C
zD8?9(oO9lZnLVldw&_F1j+=hpfHP}6?gkcHGxRLcs2NkJ7hhNt%JA9B4Z3o2|CtjH
z!b?ap5ow!^o@9(sl|C`x5|CYM-Is|JDYmt1x{qYPgCGInCDR1fXgToxBvT5jNDG59
zEkj=p1j^D8=z)PWu8bOyiRtQC`K!w*^pelQgCwm!LDq;dNK7o3>rMK#k2x%zrM4e{
zkvlN!tovw;h%_*5@#yGb@6Mi!`xTWt53zlOgEFYeg7N9axCK+W{TG~z`6tR{2f9wm
zl;%$AF0qxcxH>zVQoSu^$w(}VWO*{13e;oHhw+vwnYsg$)JYWiSKA&bjX)S`E%{Hl
z6lLLewWT64k%Z~SO=vxd*lJ>r`XOpf7eRtoeOPBTv2Rw!m++EYqlhu6n8nC>fQx&u
zk6bvi{=0&=dpnQpj*0Lbviv?8`HjvfW$*GRR~D#SHuhLU%Ij@*VHEfu&`H>8>wXEh
z6l2ui+_eM70k;{t3<O`#`&|3Zy+2wNVnAj<h95qx&Sqe57a|U$n_bk}pU%y7Iar_k
zewAq*JkrLmTK<7)B6S7AOCSO1c}V2$>|7PGA;W<+MB?ZTOK*A~9{HF>6T8JX0eYYQ
zk%@Nu$zyE=&X}}QcTifZ;L(Tlg7~k`0UZ_->xuoKZUE`1hZ!;7+94VA{b6;FY#RT`
zS)JgkIEU^2x6{8bCwuDg;NAl1pi1Sm+}9%i#NAlQX5K#na>agS>Yfk90J{7<Q^~6n
zv0R9K<HGBVD(yuTL1p5T>G%=v741=%TGr0jHIvUaE8UgVeqvbeqHH!mQjFUc;f>p2
zoG{<#{PELB)GTlRy}Vo|4e93$F-AibLc+f#4alVE#dtuR$~y6>|BH}W^=iKMQsZWG
zYFUt}xHaI$6o0wvz+KV|$-ttKwWb{#+~i!VGaRVIN?6_Z)=hLzrQWc6QlsSRFL~Bg
zEF&m*Ay<14W<pNmJ<DtCGKX@9j@O(2iG7RCv={*j!BcdSSq-dU;)YVE)>4IzGKqky
z8VR#WmB@RpiB)<_&mFmTk&QpJw>W;qKn%CHbG`@IMRo?9#9W0~g3+^|j#w*u)sMp7
zg!C3%DODTMQ~Rv)yg)Pl+98sz{R@dxE#-A;bPIu$`KQ@>%;%1<Y91R)$<qGX+0juI
zM_hBK_o`@&DGS}@Yf0!PPRUu6R$Y`V&3WyEjm0vq{tFM*Z9Cy=WfDj$1Qs7gVeb}i
z-tq%uoSO+7?MNj|Zgym*Lz}pz#&HAKq@iCu&+C%CS}{N8nr%EW^RuYucvl;O4vIgk
z8A&MJBk(D^Ca>1($8sv<()X~@@|X9NE<ab$p^Oe)cKP6MwR>!SYwhP^RUbME&BU^Z
zasZ{GO0<AZ2>Dl_viz(bso4hTm9Lu|D{~w`Kx<d;c|4HPQyPoJj?bB7+)2pKK+y~W
zs51|?^p`7zkfs#$gYr9wxN-ieSgEe|+GW~w*F&iJXJuRI`TNR)5Xe31tIJn-!=5Jq
zEqB|o&Bmc7>9llW?M44fol&SO#1`nve+7+N&In>DwBt{v$Gf}GfJ|90Z7SgcN?Y_|
zVlmcK;;K<MmL@1QRBQ1)4>xD>P7<tSURfmKgXLdoIB)xP)>>{edDO%}GEw}y^7jup
zv)yXu_+2#TFSXHz+p?N;pVn?@VvRCs-K(m>EVwtQOO$<O#KukfSDS|Pe(M#v!WF(&
z7gYRJoiWnQ+D>oQv^taO>CwobS7(OYVWqvSJi_TkgO!hmJ-L*1#A6JD3A*%LUJE?!
z|Act`eFN*U@M}a;{E=b+BI)76n`VQ@gz61H7+AF1T3bz0iC(^b9qALQd*UXz{Hj5r
z6gnXSB(etFzKW(R3N|p7AMqzkWtd#Qr0O&<;vM~EH)HR}Et8jR=d+Y=?T@Mf1tw{0
zwUmZ)BT6zd_2uS!ci%8K4P960OHV7LWj?H-Mrm;9ICAz9%2j#W8lme?O9uA&3+@k1
z)q>nt_pXUX^ft)J$@RyruIAtMlak~><TuYu@6-zn{93F?y*p#xO}BG+iDT}BGhU`>
zfUUCW>wA;Z2ZD2?;cZ9sIs!BcY%NN`6^v#zWqJMP$(?gDs4u0q6XN#8iW+Id1n-t(
z1zjr_#6wg1y?dpAePcdAsy<i#-mkRmg<;M&%#SNs?<?<SFRox|LIlylpo$&q&=MLN
zT7>!}_IL!DeqbT!(o)UXU!R;9A2(ytva;Iv_U+BmQ`rH=t70ZNdIOAN0x7-5Wd6Ci
zTs)3k6w&l|i2`B%bjS~*P4QoN5L>}tsb6)ySKG9nu~v_yZ)U%QlvAXUV^ifxXmKp^
z-!ZfmFOe5fgvKw~nVL`Hg}3kByQib0GklE?l2T}e4@IMLb1g7UaU^h<R!aMGbGJ`E
z#;3^A#*jHCSBo<S{oXmU(@|52=DSIJ3){#3Yq9g^-OtsP;)S#r&Gv3?r@vc*GYPI1
z+@{lxMP)kndnyVLjN6xH38h9sCAziw6aKM7A|nWGqU6C7$*8Wb_GwJbvGDcv&7@C=
z5Few>v>Wb^VoJ+u0qrJa^FT^(tW<`~!#fEI-%0~G6(i*oqfEGRb93!ktaL_<G*pR=
z`tIjr6yj3b6wSXaxR#%v4?-`)wq@lp-LnZFnz-X-Tm)!TRIWL1IVJX9Eg6fOcb{pV
z`I5%i{^FS;@iN&9P4E6Zi}B<ZoM>k^6$1|(|CQ<K)Uq<%8BN?Kd*{?NJznKq<O;fJ
zVr{BOoFgw}37|swl}UqpDff-uC=XzqjG|AoxVIJxKV=V{yX@1@OC}?9BIC=}mnv^W
zgj8k)@lKT?`y|Z0mv%Y_Cua$ned;f`t`8XAbuyA!zz;U38Tm<3&}R}Qn{sPz3)Pgm
zyk|Ts>y0(9%%q*N3b$x_^UT{8!$6Uy*)4@GEw#7`<Un(7Xa_&8R3%Z~%^(O3qJa^r
zej6QCXNrD;6f_jR_t-pst?#@3D)baEa)gkDlIc^Fzuj^YfXnO)rv#=+pflh5!{pa&
zprW$m?4X1eyr0<qg3-4t<$F`n$Lx-Wj_MKSx_sKRTa~7z9E!xGWb}0cl98iiMgq%i
zPpSP)LJIw;I$ain>T<sf5*EN1tn+EI7lR)opZCQPjFE3t4!h>As3>`<1ifL>V{t{|
zj$o)ab#e+xMHZ|?NW4jI$t;Rw6zWuc+cq0X58~lgvPldbk4rX)S1B9AuV<KQHqujk
zSk3C^7cHS@AiC_bsf~oJOv|IF(l*_A%@XuPyL?1}ajmiUF@SmHHY$>?xJ=S$=)ZA8
z@lMCGi$JmuYI`edaEl>E%C%3DpXND|)gS7qsLNrG5D^xrV34?TWG^C<S#anVrIvN=
zizv!u4RG;?O{_tZflug5?}9WvkzCe&qChycV1!7xKBRN92A1|*9?^uG+9m{wgD9MC
zqxPthU-OL5`*o3>#l>QltN~NO^Uh6-Y{p5LRX4$ukrjKsq)7UVXLrrX^=%&QB@AR6
z+C9K)PW{jm({HU?s;n5L%+%6G=~vM<`!Vja-;<p<x%hE*9Df_WB&gUf*&UyGC=$cE
z<)%pQW8{B)EEeVb^EJ$U7hypQo%UxOreh)na9|flLz?ycYk`j*kPiE+3-FLqr@gVU
zac!caq@q*fP@kQbi%X=irltmH!7;xXVZ%T=(~*}79UXnR{ak~5)qcdQt<9Q_kc^5d
zZ@zQZj!!!3eGKd52x<)Kd2If43N_{3s|BmB0oG+@<JfA-yWG9HBxjMDjZ<rXM}QLj
zWD4Yb|M$q&Gx@KUaPdp8W5415;{%_cO;lJpYJNG=l={Kdy<}&Wpt_+(c)j<>k9XV@
zreWDxL4RgmHWc3_D>(i35DbjL!;fiLDA_<){*Ab}b!C#$zuMMdN3UtbJpHJ^a=jVj
zq6Cvo^FISnzvFM9@PVA7>tZCiV2X34p>nt`3Adm3##`7cCccA3k|=cg&z=j3Xi>5v
z%k}5X-;2B0r09t(E1-k6a720~PBrxm)YXYj0yLvburHylYGYwyzOK@imnZPtuPz99
zaw8~1g8oSr3N-9{)YnhrKvQ~BkI2odfg&b;3XCtteNE4?AL$bo=wBHQs;jB_Kmt7o
zlq8Q(gu8>=W^<Hr@LpaFPX)P)7&>3yjF8B9R;6v}<TT0`A<0PG&-Kc}f+e=IWp+!Q
z<)zen;I){JZ*E$%#s-U}2a5Q}A4)lwo||w{1d6b$;Y10`qP>6rzDiq;SX8MrI~RW2
zGy|XgkeJAYdy5z+-XW7Qh%pGxZ(#v_JFw4hmJ7ZrSmD#8)4zcnBXXCR$#YQ{;k`WF
zxR_8B^}8nbN*-y%G|yObQP}fR`Owk7mqdgNepP0g+j3geGum?cxx2yfn6Q7Tlnxs|
z;1(6Wl3eI@a{n8W&je@)0-AS;U+_}33N55ULm-!|^qu#_a(yPs#G)QLl94bv0UBfP
ztd;7!X=%>*0nM{phi-y`rqbpwImyQbqNVz-+C0^#fuWIJ42|f7BtnATPN(gHs)nMj
z0b_e0<rL5PRRCOPK&Rj}QN@uxLvk&tj3V(KASmJbPSy7=D5;eFD&!|N2z_u(sS?LW
z-%OwV>Lu?Dl#jVg^|mR!4A^LBMDtiPIOSeoVN?BYzn63wMxEuj$SURWmc|UBb-0nd
zAoDl9TbsYQB+B!OK(=3LBbHK%i8>Smy*LeSr=`6}jtfIU@D1}Q29pblR{}jY+MZ!n
zSW_jEF@}P|jGb?^f9Fs1_XbVdno`b3&4<cSHt9JEaqaExF7@jQ-;~me^o3){LrVip
zxDs56GRRPv{H!swgu&1%Or-TOp0%hFV^mz!7W6Sbf(erfkNdN8gxfr=rl3-`JN;Zh
zkr?A~L;cTG8NrmoZ<~6<))`k@J3_>8Bn0fQ1Y^wLICvxPuDoy6sF)?4k`Fnm2An70
zKOi0xReYh5X%H)e&KT%y@f&*OOgYcDd--U>r6Wa7z+^nrt<|$*d_pV=ep-TlG37Xt
z9wvRu3DP7uH=5aRMu26%D@2#xmraNLQQ_@=p{XV#aZ|U~(c?-VomAxZeKlhp*F(_D
zio7gJ9$jgSO6;#+zoy^gQ}cb3;^pLg(s{@k&^+^klRUU3QO=m2s7O#c`Tt>5U0jM}
zz_-QAbQH@B#AoMrARc0Ivzc&kayH2w|M>W{QU6OtMQlCplHK@>HKhBq26A9!(LuEo
zjD7rjq_My8zCFs5#MC{!?db`D-;*@Po4DR|+Kt!qNFLF~Slsnwx>#o)7{ZhD8}pHQ
zZrA`l+N{Ru!9u*emF_1F`wkDU(BfhS`1>!~J32bb%lAH`%E-!EfTD)HPPeWjo|79c
z7MeHMVY4~`xvZU>6_$Sn^_9MULsWVN=^OLz&!N_$qIx^4Dt&A@B&U=3`uq_RB8tR{
z#LO;Sf4ANBtN-3^0+U6y{2+w<qC*8ml@QF>U_*m&&rL$?!4rG$XhQ5ud40U>>|P&G
zRj~g(WaO|RA?}h|hWq!&zyB_iBE!-DJyJbBmf?1!F$N7R4RYbdUdi>I+6Wi9SxKUQ
z1zYWlpUBObJr|q8f1ghkgb!1JNJo;KsG{}VR0=S1^;6+msO4U;OyAMBFWFJ}drx4X
zC?r9ul$VMo9P8lXR`fISnxX?<ziFP)W(|<WP%(6JBC9}KMT713uB*8g4MqLxpyvF}
zk@eAqhgQWojv4ZSYXewvE>iDu5d$+9Q$1-Z8F?s*5LmwPcqPtX>xC=pzPby`_4Ewg
zLP#bI*et!(EsP!J$VVv?SBz3%{Pv`Y&xN7>2_x}uwNz9g=v+jE$-yG<ptcXE1FPt=
zMJ&2?a%Q0=Xb9dpUp*Tv`EF^pE!;PT&WMJJ)|WdF$@v9MZ7iti7ky*01KoJ5PPDl4
zGF<|lkzn^Lqr&N{#$t^dfE@**b94WBoeP5m#u7TxX=r-pUgszSE8E;PE;qOI$({Ef
zJp(zK!OGoAl3Uo8IhO#l+%@-l$*y|ZFYVhHM0H@*+h*aOqgp@>ae#}@g^NO#o=D<w
zm<L%pfzN_*rwMNx^q}i}O;5CS<VI`(^)^&V9kS*bcCAKYr!A+i`cXtGDz_rA*IvAS
z-OnBZK1efw#OlJ{55!^+7MwnN!>oK+1Np8J>maf8BJ~ifh<<y}A)xesyl2lvF&EqK
zKWG}F8kd;(M?M$>>~lpjYp{%F+w9hxCpVT+8X1F{f*7!$G-knDf>JN>(AO~rRYH;s
z4(5wcFjPz_hd9)RHdIXZNE-?_I=wGAqY41q#ml@blGEs(4ToERKmGF<QQDcUC4AM7
z_85}nlsZ%h-#7^rAUEyo7BA&0b8?0H2uQG7^LB+L%VmKSB*qJ_@JzqWBe%D$d;zfV
zrrRWBGM@R+ZF(n2&<oJW!z|~<c#yCRoAzB}UyVf{I?Sw#WQklV%ee;?=;R<ou9AKD
z=ZgyPmuKu&8qv#ZYt&R!T+y!p)Z}D9{X||Fm<mBRjecBD$~4XN90VGNWMrl(#(9Q#
zi|`cZ1UW73#?lf;baj`>pAC>%fheGP=5{qoLgv={b!;DnYVXEfBkyt86*X~=qE@-k
zh^O<-LHpV<Nu%d=`~Yc3TdwVx#Q-c6;k7>dBpTUN@fR1fr=O8pmM5k2)v4&zM-iQb
zveDNn2VOg{M59=m*}Rxvu;Rp893vwmLnY+#wd-1%=O%NIVfU(%ORURH)@q+44KDV<
z8{mCJMM>OmMnGW26~h^vaT{mQhvd<CGYZOe(nmgY*l7POfa-NI!_r!H7O`~pzCQ+;
z$%H<1ER)kZ!#~#}=VLz}_w7DR$P`WQ(`FS{`n`qvRL3mlx4;_#bK#p5AIab%ep4H2
z#D22utq+t)LNY{^trzSR-VmTPG=b(Ob!TKdJ)-ol^JLk*_n8%7XQTc*w*K$mzXSb1
zt`IP9PvXPpOc&@ZGPLQFDaAR#JqikN-5C;1$8Lhn?1u&k$|@~hCuJ|dXxi}v;8Rmj
zQ)i4I`3w^i6IxnYW6zeSzrU>Hu=j<xi_C68GE?MQ9Ap|iL^u=Zt(Gtuea(Dfd3;gb
zt*x!iOBsWNRZVsEVFvWPCBy&%BIcI8k7vRXl$ORQO)Rj>>~sENsSd=Z_gdLQU|Kva
z`id@Wr+&-Fvb^fjc@pNx&N<GE(Z#D)F}wgYGL4A<Go>*?I1(*&<R+*y^1O(wyT5lB
zb|oaeb2XQ69~tUGIF~g*6As`(GV)nYR5vRhv2o}F2<LPwDk?sx>faYq%o1BkH2~0>
z7WL8sun{}1$Nrt+AolP(znlbUaQx^cgW6{O4ZzrjxB1Qo9DD!0snb;>fWVo;89&m3
zpu0;lQijBxGai`o9~Bsr<gWOn{>d0H^`LeM5><?nDk*}1D`rlCQDm*7NL>b4SUspH
z`ZT!B*nUrT`7e2@T^Qb6ROFJd<IB&@9aw@hr7`+98w@BdG&h5^7T}T#G?df)W-0G|
zJM_Z!d{tmnMsT;k;1o>hUD@MHk~3p!wvE_hNmu~OEtkd88C5-p&PfrPIbaI0*(gqV
zpIEx$b>&jIcr^%@;5taZV^=fjL-)n<DT*r6@`ks)m&!0KiW^u85xLp^B9hOcq}Wv;
zd1eL@ON?>X0^gKvknU-wCMme<%;%JR|Ac3cG%jSE#3=gS**PJY7%&<#Uu0(*N-71f
zZB=5Z<+thS>HW2dv57erH+C{A3W~Im$;nB8LY=nSh$wV#l=Dk`qY;x<jOU_cWMp6V
z=TTjA)+mFl^Et{~o&s(PqZN`XjNQS)WVw3!`nl?+BfoHTu*l7zGzBGc%%Z=TgNQ8f
z;+g<1<A03qZ8=fN8eMGZA@uyeKcJv=Ub2n8m}cc0|6cKQ@d@~6?@GSW7q_3574(vo
z(+YSuo&Wh?4B3H1xvWgnN}Ye7)aNG)EUFG!*b@EC5YYRwWS55wcX-dpXRaxLw*6(v
z>fZP7Z<}XKx|{ELlNjTvA$spNk>tK9OIw%<3lC3+80_z%?qLFZ5_ScEK{nM-pm+s<
zk)J9|n^i4~6g!5rMT?`b9#P#qv$(##zO>XB>P><)w2V4^dR-T3o{Q6b>0&2JU#ps2
z7rA+(vl!mS#g_zb)lLk&S3D_>pPi0zigY9(D_Lgvk3-Fvmd?o7yj&LObqpM5gcmRd
zjjUw6;=lv}f03IW^?Czb^;7aUjL9%Pe-Z10qh8r#3ThGrZp)J0_Ex(fRDS`Ll~%#R
zfUU=@dAxXmy~cSUDS(5Ctp+ZQf#R2fVid#;c6>n6&SB*dy;9FejHB_KjTmR^^WC5(
zo&%TKOdH}97dyZ`<)<UamLlZQp!=gDPik$lW`s{${cj?!tU>eF=xCe)S>`@dM?+X>
zsIx#a<REbTS|P6)L5Dqzn`b~q6CO|-ZKOeX$))L0%t9yg!s1{**mrP#x5FYf%W`63
zVhRda&Bv}dgr~9tsuN5JFFy65`*HIwTeK7;WF#YRMttAgWcRx!nsoQc;=L;p|Kf&y
zCjn^bHRK3yyB6NoDd0j+boG+K>(|c+FKNWU9$*aGbJH?cX|$%MrOiU{-N9B<e@vpX
zTg-CeR;SIH#fUSN3R3rs#M@2+RoY>?;x{Ag_++IY(a1LjB^S1lh+Y-tc;8Ou|M>o&
zqN&UBkN7Z*nW7;OD1C^Aq|qUdRVHI*0LiEJx;<%FXs8Z9VIkb+lX7%?Ky9e6pDIFr
zNC43TRSd~or++}e7!hn+nLA7Nci!9ZQbB}r>M--wu;m)CJP?EFZL>BVQ_-Ve@wYgr
zurKAFqD8T0?X3{v2VguVqogb;ka6}DM3HH{gZ+drY5sxao%e(4TJPf8*zVfctvG(P
z%N`*>uxT|4@$)>psXcapKVxQIcC{c{br-A>1X2{C*MEM<$<2t?*aTE+POX#1nn{Zp
zFO%7o4!%H;Q&zlCYd?1}b`S?)`EUJa6N?$<VYD1mnIwqiYph;N>1|8K$r2>2E?^F7
z^0(@@PukFWY2+?dJK6`Y<1v2A_ouxixgs~$-(d5`%+3+eJFPGt|3Gq4;3YcG-z2#3
zB@nMtX{;^+?^}RIsX_t;>4XDwhm||8_%s+sqeWLc4!rv=rdy<*@hj9wb=Tj(B*Fh8
zlZTvZGjte^4KiyWhoI6h?1bDOnwnx1Bn8I|%*{2Bo;+0kT5NbI6Q=N~hmhy(L+p<Y
z?~UU9@s)^0N9>h+z3C+rGK+F-n9Ssaxt?SoQT*<A>Bh)n#iu!GS&vC&`?6Q6SvrZE
zcX)NaA!&%nV-OSkvXKv6T<en;vn*G^y2eXCeb_g6=4Kv$;o3uIl@*`m>FeQSFPPZz
zQf1mK++koC!+|v)S<bacC42Hs{5i{Y$eF=!9Ek7(nECk9u6BT(s3C-Wn#|1)H^Mb-
zb6!I@R$TyM9`$%85LTag)MuKOUZapIl6gX7Nb+|mFaw&MX7RlX1NKQN@20-ZZb4^#
zP+IQ-BsjRlu1Tlm<mQ@cGDS<>dC$eGE-DF0%7u|zcxLe4Gw>W9JQ(SUkTGNbz2(G3
zk+C^TX+k0@Ij{IO2HWCumIo<{6Zn<jewBOnH)HAVQiMXP(3+P;GV-p1X&R@Q{^!zR
zo=}DV*1bMtuRa?g{oV|O02*pYT#KDDnwHfA$++$r`QE^iVNM>OYiCnYhOW6LTxMiq
zfIvvV@h1xx+5#Akq+gyBo*e`cDb~w-im4`_@(OZF@TUTATn|<u4jrUVl%O{uh@`*K
zd?n)sV9xAye7`5e_YyMwp4_OLCU(tBtH5OXytu%z{F;kGh1jp|PIUofLq(9(N#rp~
z3^S$8mjro|j|$%bH%wYSCOibq31^G9U#xj865<%5rCJO*vIfWlMIzfj^)4{iw@4Gd
zQD(}>Wui_<wk|w4QJjLA;*5_n8jH`$WfBsgzHNPE!WDWYK`!fVA}p}7OKO7%_#bNg
zfD5BOd^ZzC2Dzww=m44nYVHFwc&M4&6nQN5OpP6O50)8MczLuD;6gR7N<&^MxjXMy
z9nsMKd%UZi@tt!P%7_0S6O#bp)&KuUOltZuU!Zr$=o8+$bqfj)v$M1CL*+Z<OzNA%
zb;`=35qd(7MEOQ4L7v2<`bKzoc#4J8Udza3e35sd6cZD3adn+?UPuZIy!`VS@~hc+
z7IrD+<>e!~uKAxof2K+3X1DtHY9V1GOe`$l#%XD`+Fc-0q~}~jkY@dhi;F;KsIVC8
z9~$Bo6x5w~K<*C7NON;@I>PDTsP32I;^Jdk1O0P0jt6z(4_fsq9Wr{>F^S(+L_|d7
z<lJ>RYhm-)Y9ZCro^Qi>Ss4Dy#d*QMlQe;k-`dyfpKmVIs_5A`U^{nqD>Vh0k!ylH
zO-ZNGB}kFMeM11g>U^Qh<dxvau>t%K4|)0U&bi$70a0@e`=yTA;BWHpehm++_S&{I
zZ<n-cv6H=U_7=Hmv-so2&+FMl$aFwI$yo|wDP@0u|F;|(igL+)<DT8!-7gHUWzf8Z
zU0q%CGcQU4z&h9d>PAa54_xX@oA17j`fECuOJl|+5+lw`28Q><@ddwfa7>PDSo2ai
zUgFWN`$6Z1=q(9&a3^GgCb{aE*XZZqSkcTjM!CuS@{cv0B)9s2i5fm?{I;Eq{l1B%
zIz&y)LEp-UP|AE)lxbP&M>gk88F1ELLY`YQp>555*b9I#>I3u^e&v)L$R#D+FaBI!
z%9SF}dQeqW`}uxscd>2=aY}Ff#7O0Ai3eGQ?yrZg?or45d{p7LcKpoMhAqsSWxbq*
zD}C-Mi2iij9`$8@svHfc6z6qJU^}tXJFitTzWWO%Sj+Pdot6m4*CiqUrGUo7h}ymF
z9{ZDnV*AXn<Vwe77OjD5a60<MW2+yZ2C(XWHL0ZswKMm71Zq%v2h;zQ=Yu;mr@#u6
zg$lE*?5nA|Ghi;-tUZbgbM7>2giDQ6dw1V=<f$!mKmNsNRP!A!QnE6EAMkvqD~Zkh
zv@xA>Hg7LDVR)GKA+P=JrP^9gL_hq_?YX*=aoc=6zibk<d3o8Z<f;<qdHjTq)Y6By
z_+8z1;Wl}Ce`-{C_6~kvm=9r@@F^~8s-m5XU9YWirOj|`hvh}{D^4F(z<yU`r~P@}
znl}YywvA!)+!f+?Cazw7FgrQ|m7IexJ#dw#s)2)Ey|jhr-@ArB!sAhNKWi#u&g%=#
z6MAhu6mxwRc-0Qe-Pd|8Oy?+@O)Btw%+DfxiL*_BPRGdjp&QfpvAGfwqFMc{?3<Vf
z&b+7@WZ!0ug&k&YZM|80@T)}5Yx{Vm@Iz})4l0;wjFtV33<M!O+B}b((B6e-9U;d-
z+}NxmRRaU=XQw#b4B_0^1v%;Aw%?SjlGQ!`oVe9<c}c-1hHJLQdcq#wI$3f}X@ON)
z>$tpnd<!eb7r%D8q#DoX^HKiu!RP=An}|}s+mM2-F%fhQ(bO7Qn;M|2%zi*0&*zes
zQ{vlX)m_fB%-Wt3bFw|vt4*D-zS4VsaBXHoZnhj9ML8&;L*{wkB{Wns2Cex(#f#8C
zM#<In1n>x?@1&)#c#Z~>vJI>Il7mr~^_}i86562Tu(dbYD~B9ocX>Hxqv+Fz90j&n
z8%cbw3qw7bJy!8zH_+9wvsOyH>(#k>$TjMAx)WZ#$~4ig@RXkoYe-V#-_Wc4Q+Ik$
zdHTfyjP@-Sg&a@%saHid;?pysmk~vADq5P^v`4cv@2se`$}*7o<kV*Xt~t4*-Oo>^
zXsM_~_%DpvU?5bFI63?K`WjQ#TY^b@6CWzwCq>zb8LERF_#yP{wwx`DF|#LZsm^2M
zp@0>>^!jio|It(R6V3vm*PHP32R{CH{eBnNAvK1*?$fF5ed6rF`$WCp;4LW4@%N2o
z(Ag2qLZ?sH@c1vvItmG$b1twSpjdpE(gqj=Hij!ZS)O-rz;H3ef?d_{+SIKm3(R!X
z1lXfBu)#8K`(`}7hxg&vh*Sl$4E3X;ms(mL-+%bVEnHT`X(F-s_}IEHH8tZy<9ZU`
z#I#u^DH1BKW@lvuL|4PYdgy8w8WQpqxc8?F$fXDrQkUV03F+F7QSQa%WeRd~n<@*T
z#%qvr?R28i`xFszb^o~07i*V56p<2~kv~b!-mPc2=VSsMnP7}9YrpOwj-;J7sp1+N
z9}l!RZBC=Qo|UAWg=AVY?u>c2QhQ@Ia!-!v-BfcuKh5}S=#5}ojG+gB$;WU+?|AbU
zXoZJ|t7-NXdgQD%4t*rDKWL`Bw#(J^)ZGw1yohrd#li0M&_;DxpS?0M-v}Px?<ogI
z$A9i-SsO6S;o;$rAI3_^bP?F-!G?Ds|1jUF5QZ#`fRnIvr+ixeM1^s6;rV_+4_+)A
z7k0teO1hG`yui67tyUwibzG~-@tK6I)dPEvPOkCMSxaNJ05A!{9{Im4rs^i`T2HmZ
z&ZER126M9EhGGuC+t`+<rBXjslyY3|ZW~xuPz%&z+bDJLO|3eh(WW+AUS)huC|v1*
zIfJ8;Z6_)OZqQV83d{#A+NDNPvu{c6R&O?x1glQKW=)y>ylV1u=I~SD)Zw}T!Wc#4
zEYevMH0n}?##u0?t$D%Y9C@Z(Y$~%+{*A8#<KukX+@e{B;C5;G?7puL9P=nYCfC0V
zHXu}JO>55d2z(FtjE%43RSw2p)Ry2|&URu)?Tl8!sVoS!9UV9BM;lMpJ^HL?c5)~>
z)!iEUfbzl3kC;2<i0_T_6=9S0))9tFjnNF6SHVwN{=AwR6NuSZhWLJv6_)&^bz=5W
zV!eoKW$ou{j_5Jiu=WgKd%&eX)Cx)&IGi`Nl<^A|F2YvkH-4cp8^7t8CFE`umEhqF
zhN_!fW4u}XL;3F>bQHq6S9w)c@fY1FupnbQjgFnQ(FtNcLaMT3?)9)w#=!ZZvFHB6
zDFay1oi1v`wUuwr@qDCt9jm7YjY9d+GP|#Y(GSyA=JkGjdRA#YbLv?4-0~?@?mVkJ
z&yTLif71ekK3L_QH#Ynd=Sy2?;mH9pB}W(W%a&o5A}lY%nPtF>DNuGbiq`0RVDDg&
z?3`xxJ5~_UP&x$GrX_a?yzRCQjoK^IB1<{f(cL=#x4?|7-Ra@KcJ11#FNKpbQbrO*
z|7z|2xH;5W@mj67ez!olFzT|;`5#LW#R=^^+rrki#1!qK4+w>QkD=s5A9qnpRcXFh
zDFv}oKXjs!t=e1a=B(05J2-ULlkeEdww|`;damp&vtHYynH$=l$(y1di|TbRe@Vm2
zesy+swtA}Ty2h;~M1SF6zYzO0>T79DEG{yx)l^iB3=_mdGB48&+It-M2NlH`ESUA2
zcChpgKO@g?9DZ7VU+CP3g!mH9moPr{1;T<M+?kHlw8EG6>V4>gUmSlq2M9g1W4SO^
zjM<+*uRx1e*6sB4^f8O!y54w$f3K!S@#+P$@@1sEyL)YItr9651FVh0+I@<1^7E(T
z4nO|syP|AXft}drBg6gOaq$`A!SV6&{{E(zO`%V(ux@b-=lRdG<yoFc)#~|0?^~ZX
zi`OH*)D<=wt9tH*Q)qXqC`R%5M__iovChZD9?5#LL)U%lKQ4f%9v%<{26HWVI(u-b
z|CW8EOkTZ8ii=B~gOG5)RvAo!#^;H;ZMS)bJr5GqajWy=fO_|pfIW9S!P(=!DXzNh
zy<A2q>YB02qF-!}onbyuW{#D!7eAvixp--|++h+ozi<}9rj7d6CMIuHi|ozG#SH%?
zRv5eLO5mmPa0nm$(0s{&Y+KXVh(*b5-|#r#|49I{0B`;W2J4IJ)3-A1#hx5!UD=<J
zl?g#*p5C789Pw}t4XZJjZLM)!|FQBh<(#SlJy*8K8Y7!0FU`u*GC3h3ZRCHb9Vn`o
zkkzrHE2McCC%;MS)i*{kRGXri)y5RHG&M&PLs2d&WQ|P6<>fp){P<9({d;(kcn=pZ
z_m7WLU|%}CIr-Ju*1{r0S7@i}er5)WbO(ji1FK&z3hQ?>*v@8X5rE5?Ezy+G0q6Ua
zrRmi;+%v!Dx?Z)Q>$WBO6Di|`JiLtjk*ADqBVFo>zYorguh!DcjDm`arKG`jEGItY
z9BSWrh-Ec+j%98``t9no`L(r4wTSmZNAK&;ChG6)jHZMkJj0EpEG%^=_ep-TtK%TU
za37L0p|b&@zLk8f3dE|h5c7@zk`W^Ou;ZM8epHT~tW+(cZ6`OC?#rR(j@eSvO>J}a
zP(Nm-V~UqGL3NlwerbK_fe4=X7-l$kzN>eh6eF0jWaT!uLXtF}AL}Ssl40LGhnD_$
zI0QnZt@Bwu&p*=;9<AKcjrqCuqXNF$X(--K<2(R-fV%Cnh5JG|+4K-{fdzG)8IR_k
z7|%T;Vrdxcy9U1m&urm}$>VFSgZ*Zu_-IY<e6zyp?8|7MLSHO02jT^;gjy|4b?)^;
z*CY*aT!524BZppnL0b7ghtD;5g5gE6*O!?Ou4;Py2{**qt6;2c#jZP|=h8NjtfB$A
z;_tz76OtSly{C<_YGoc{b~g#ygxq#l2v*MeiAU%v>vp%WhBWzH&E4kAhqq%F+tBd?
z(vSWW&A)H6z8yo`WDl%^POCk&^Y47aH<^z6|A%&iT+OOihS!dQ*-nQDWEh^g9A^H9
zUifx-HNM{Z;kF%T`t!@fTdsiTAU@K3d{{F8k?Q7$0SRr0-N7?}3FWaD<hb+aFPtzg
z41cCO#D(1t&;tAk*i<ni^^*K<`*ki7{l75i0<hd_a4@16Tb&6Fbj|uz=ZPSyCFx1h
zt?(q6ID!cWw3Ctk<BAgMsPjXrFmUQ|QawDM3P>_v<*<XBc~r?MqGmOTcm&~k8M*q7
ziuGnSxJ2pIf^Xam@UY(3)bDJziWI%A_C(ygch5;Y1U=xceqN&tHSzG;M2VU7$^}?J
z2Fn`K4S>wTdlx4K%Zu#~i(R3`=U`8%^uao|MWq`u>`H^x15as{ad>rYb*~R%flniV
zL~5<qf^z<3)aNQALs7;-a3VH(h?8*|bnCtw+E)w8r@`>3w`q!qh9k)e&mdP{`O-n{
z{?ld*G*p>3Lq`DePoF;Bv(CgGp->>00dr}XKu@`f{k)-o1&A8yFzLo46)9u=)c|;m
zUV^_r0x05^usU!c_F;;zd}iCPjk$jOA;cMOhEY7{{cW@m`an#Jr513gJlHjKqk{_u
z*#TqbFBFuU*1Xoy>PR3oF}8JD;|>#iwtLJt_Z^U{>MPTuPlYMB`vjLx+QBcng>4{?
zg7H{MnZMdhZ<<+REbj_mp7W`t-42|+;-(uXPR=#+GaSN10Pyolzh*PJ#sFAGIehg*
zM8X%wsZq`E;CHC?aHrG?9>b9Fe1bW@!09>@`{%o@sMF=w*MB{*wSU_}z<uLS@tEv}
zIjpFkleHT?1y))MS_!p+e6H?x1D?3OMp=g+Ym?Vj+3~47oSysCQJ0Qmvc}Q=igU6y
zdb|By*$LoSo=w_gC#r1am&bK+VKpzcC`Gqn0Y$~kMenF2t*=-C<Wa2zIJ@fIJj{cu
zE&?g`&-YT(#F1<ih0ty$<rK3!f!A>{ufB5_0nu%Se4~HQV|+YFI3EC-mPTAfUUf)#
zIDz1jFM*(E!FgZ0c&UZAQJv=u(bTyx@4=zX>w!PC5U@mby&}{`hC#ZtMpb<fg%ViF
z2uzGUt{Tkj!B+}1U0w`?A375H(3b#ZmR_G7>%r&y2CMMMxD`Og%K|rD?{0zkkN5G=
z?j-V0LgeE-;f#a_u09^H)nG%fr__t|_UvG!CQeqLxl&vr-MgA2>3hATq-0gZbLYbp
zE<Qe#NZ5M0vz!3a>jRWxsNdicMje|rdwHW2auIcF2Gxb0^ULp6ej2;q$u{shzPb5&
zY!xyR5MaVWJ`Cf9s5MQ7qc*9EZ;-C?&`Y~4WA4cDM12NuW6j$>^&Yio_mM?=Jn>nj
zi*>G0opuhb(g8s<|7)*={*hwV<oO)eshDSWce(Cq>BrM*lbih{pB8dXtYAtGK!(PI
z+AI+w1YYvB&OV`i-#*+gE~)qQNPLCZY<+W=Ks0dDCm@}eY3&HWh^^Ru4uRc(zSI?1
zJKyapGu;Zl*hN|vh3Wd0YstvT6&uz;+y;ESjkPr$iN*z+?vZs|x8q+6GrJZG6%g-3
zL^}|LeJ7lf;SwqByU!J6TIms1MbmKGt<<R37vO=gT(9U#jL^w@mwiDHuAsi)X-lc?
z(z<N|HQ48W3}-`Rg0XqL0ohz$<_b_ZcHng*P(y^qnfQsO{&u4hJk}jTE%|QEZD~M5
zMvx#V84=%D|8)Rn#Ny@Em#_@LS}h4BEZm8`_h3LovKs5?>yYm$nNg;xtI2tVqkTVu
zlp{d2#>|?V^pq;5>9pV>Z2=@exF)*6tXlC{euaA}T}zBG>T@cwnYGjN6n(He)?=mk
z;L7k=Q)!c(p3s#;A6dE`p87&27V0<Vm$653eS+%9ki_8V?US<ziBIKh9}LKl?7&Y<
zj8~%mQD=69=eX}gNI2E%iug=IVwV+ed2@bD^Ln7jsYgA%<ZZ-yXYuNu3&>R4CkZO5
za7RcSo*@zn?_IYneRm!q!P3!_RDkvz2a*#jEZmtShZO)y+z^v>koo~j17>*a=~yZ4
z%+DqG#o>97y+31iPRaA?4F+2=2UwKc6Rs<BNnpXbe0v}plM$?QbzG$}xI{hROZv68
z@Dl5<E_+O_XIV)}%=X=~Gg41YkqIHNgaAQdg4+tK%%p?wQj9IWm@`>kRWHl%^ZyRn
zffnL+c)Zm$u2X<NDOvSK1#GjXaruzbW0u5xy&(9t3L%mJ&g}Ph5wj3fOeCp)qPVUP
zHHp`JNltI6EKvV4lCJmVI<JCIJ&=$^nWDFk+=lImMFWAZ`(VWW5it%P0fDOk&Cd?Q
zy5pfj&DgK!2v3+3Sc!3Wn&Rj0OjM-x;I*g{_MDl(F5Ew6)H2wU4yPqsFlNjdgj^?}
zduhoR1H5wg)dIk}3Ic+qw%k8nyeUW^_f)Z4o7xavCR*@-Z%dEoJk|$wsn||iJ=t9M
zrZ&=g6o&V|dGu#X-|)zT=QPzI8jx!0UFW>#e(sTMxF7gC_z1_IC?Bi%5zSOH{@-p_
z`7$KmuYN^URplgjfQr0RBz>^sK}wy`pt9J<>!D)#%w+zGc{zDqB>Iv7wzC?C++0VD
zseR#rf;DEh=Zvf36KQ6B^p(SAV{i=ilc1|P6>nF6UaR*5ic2Ha?kQPakY@Z)#rLiC
z5S%jj32RXXk&=zBKjOxmkJit+VpMqy`!7;h7L5yyYW0JvF8Gm#kPLL%7}Q9RF@i||
zvks{;4T*V*>vIY6)Q=uNID^C}Y^{{pXQl9b4JBv5`$3c<NI~g%3H(!Wefa3#&_(LG
zZ|118)AOeB(=QzZO_;P`R4p2#Sd%*KrsSFUOyYrJ{Ste`@Gr&sGQ{Dj{zfHZkFOx?
zOf~k~i)@#iRe4-}?CHraDk)mtcr?vC#?nv*N@L9ICP5OgN`34J>6dF7IvNN>K_g{g
z>BUii&#e!As|4`gEu;7;=bI6!VOQ;l%~93cfmBfTM_Hg^LdRN*DTH2fb@@i80VgM?
zAx_)YmQ8sFBt0Ox21Rhw&qO1MKvSAOVI)(uFm2>-LTjby%JcPM=~Tw7D|x~gt~bW#
z7Edq9GXC@<;nZCaPb_~rUNx~cUopfLDw;mEQ4#ZXS1lUnDaVE#U#%{in0+ol`Wdq;
z8O$!_#3YY=gJ_t_QiqGJd9|&9Cf9f#ffvxcWXJcu9jcIz2}kAfJQod|A+8ABK<EWD
zKOXQF7-Zl<l=gBNVL#&W0OQlfcAUscx)Llr{B;nS^RVEFfMmh_(EH_ES?=&~O#Mzs
zSqt({Bi4*}<P(HW{_M<$m(*%$zOl^;MYsBC@zV)HOYm%}^@BXd@yr=Ve>ix~dLf4p
z<M|U}OGv@J+xG;c0FNFb#DdaIg^1S$M-Zk!y4xA94+hcCHtR?ZMfNB#VNsV7>z8_k
zXW`vjU%@IsQjfpT;Tby@c6aptjnumii$ViwkOFzA!+QOt-r)uoZI)w+(kUOfZa6&$
zK2_$E1U|PYg&sAM81kkvvNz@`0Lsw-U1b|U#bx5&;XsCwThj<TZb$a~wg62yw``_q
z!UYW(8ia_<PM>uYL95RE>nbOKo`tWAv<M*8P~k7Ut^(r6-Ch?g!y>Z$Yxn^gVKT&`
z;cd+e<-pyWE0TW!0s-DQz%zpaT#6wOi-Gryq>st1e4DjFR4&;i^nBJ=P%wpxJPEWa
znkUGpsPIWKMMpx<=bZveYRD52hmo@Bz95~2ZWi!?LUxM^86(;Qr86_YRg9Inlatd>
z`mkow-yjYxvj5xj=g*1BGUqRqdx|QyZXa$u;ia0trlCoba(%#F2Tpwq#e5^Dv1d5S
z%YIL%NR_6K-0FN3r8<s)PS#Whx*c?%wXXi>93g9vy{OpN{A7-aA?PM`lwr#^V$V!_
zWH*xN957L)9R<UidbQ{_ohh;b3k8PtPfqIm9z<->o)gsXHvSOutbExVV9w)KT6Pw5
z*p_)NI6f{XO^bWBLlk{I*=+fZh~rE%H)W=rL?#*jB{UU}%Q=aptch%ucIS2Orz11)
z9lOk7lRu>QHpm;UsdIT$zC?HLxoEcfvNy*>U65J-3wvZ)?emPLqG&HuMEF@DWJ2T|
z3jY?<1IC-%`nQ!)J3YUN8XW&3X--iD`jL~sX`{O!?OX{?U500$F(<hTzktB1H7{eg
z@;g)8l^I4#EeitRI_c>VzUeo8<HNiuS?a?W`1o=ek*jpp0Dgck(bRCo9I=(+=%PIH
z`D{L!l8(j_H|fsk1e7;%*puyT!`q%$L*e?%OO*HR?UHp)ySz?;(7uH>b*dl(RK=K&
z{e{cO#;sAdiQBSK_XsZL`=V(~^Hi;C_?$4g_p4DJUe$Dn(Wx&HP?rYNfJX<VWBbLs
z{?5cGGBjt$$UF<78b<E7u3pbTEmQ|2=jsrkGW7$f6nBtfUxHpH(>*9?e!Q_mR-T(%
z+-4xjOT~Jq%zG8yd2`7HI|<Rgv2#Qe_@-bbifvYn2e*0f!(-wWk((b)6Yj<<Sp1}5
z3Q1#x#;q8mX7-@`c3L~do1!Wz12auCU+A!NbFX*3VOj4p$4z<s9CF|cckWzw>K0Y}
z)BJyt^%hW7ZQUOzDk6el(NfY4A|;K4q)14obfeNK3Mk!O0s<o44U*DGNJvR{x73>l
zyx)Jk_Z-}D?*%+(pS9PXbN*t^#ZHJx?+f}Xbeh;QQOfjxwe@g?My+h}(3sl0{)p3m
z=zO30FsMd$_`s<2VLT~xxg}-W*Fjp!(C!4#8bRwx36?WmQ`|*|$`dVC)+`2m+mAkx
zO{ytbD?P@s{y!(v>+2ut6BCL1nZN{<4Yz5eEyIxUNbxR4)t7JIC?s|}OK5Kgx6+q7
znuLE*co`vvo%%3NTK$DmH~0`6<Q}4H#W(Z4;L8^yH?Wn-G1#e^cPR&59iC`WIb&-^
zIjsIAwmRY10H4Wf6+L00DQm8KK8ux>c;2}ygW<U;G_mOsme|orN3Jh*iP5~XE*iD2
zHrV1{Hfp{2=`zo#CufGC;HC&hQnb>@g05b*Qo(>c7gq!)TDS1{)I19QF93(VcH1^C
zk#x9YCnhQSYT;HyUcqAr{{E!fy-9y^pRyxlK9{LHH-z&aDKZ?qJ?_!yj+B^YY6L+)
zOj@brupqifVf}Q={=ob)b`w;2!`%Wy=TkR<uGdeUwVHSt*1r8|R?Qavo}%$qp<JGm
zh(Jhc$kuJ2Na#-XwaKl1FJ5engbW(rZ{NW7iUmh@D$F2b>>I2x7T?)9{?^bZR37=x
zItOg$h@PtJT^S|*wwS3e=5e<e+pfo5@`4CA+ou#`Cp0Cw@Ou(Wl2@_RA3|#c(yP_`
zPC70RsU>}`OOnNf8ee!DCDUknvP^zo);?Qa%&zPCWJA>=<=#8e2q%awLzVAO72WGU
zN>UrX^qZ&^f4>%kC~wldtr@ROsl9yBiwoHeP3?tQOzsLe`D-8U-a$UMpdD26X8O;|
zM$beLfcE|ln%7s-3p=3#^|hBTO&Y-AVN%ZG#vi1@(6WsLMliM(Y1SCUml1qQ^c`;)
zaIzM^nCue~B(&|8a|Wmj`#f%BG4`ZcA)ZfY1=}-Q44T0rv|q6iFg}X9$)NFMh&M$1
zr%V)uL_Zt<gubd_V>_+m3H0)T)!9!4QBa7d72L)AxW{X@^Wn#gt_gUJ(b8>}G!~KR
z<^O-FC_fe+`+=A!f*LlNo#WF%+v%-x+i1B4wT_>P>J8fKld|YVBiS8uDMLxpvdX%0
z$@7}fmi(JJuUECZtBak@Ww71N_79yUzhd00DYO?BDx!6UU9lr0s!kV8d66-ss4>!1
zIDWmWTq!iryV74bM|)GWjd|HmM@_QPv7k$4#@~Q7Mubf0_~6I|Y}$PaUmnGx89FOu
zDl*`Jp%BjdG&AewjFA>2yG!?u6cv0_2E<XmlE)dRov9buyP{_SuDmk0e^cAV&_@k-
zxF&F6YZTm-5}{9%?=(xW5#+Ev5>;5;><M|y&^Me6-7j6YC=jtNVD2TB;v6&+#{93D
zkn(fN$E$C4<ek211pRpY<cX?z-6}eMfEYdB{`c=l?w^ma-Q2EvAn{n*+aKH`N`oG<
zCNB#U6I$2l+umnKkXRP$)Tc?J+{Y$edXGoXkM_wDkX<a7J)72B2X3}uHF^ksZqJKg
zN3A<-W{%~j-LfsGJbT1*dPDeJV_wMpSYYz33%AD4{@&&jJ05<GA`kxA^FoYUw}$_!
zJ=8Fh&VdI$1?+lX7uP!0|8<<ba6VK~OQh2|3M!`=KFIxJMXYpV<WyNZp{O@WX@4fC
zsgFC4el76apvuOla88a}W2-+GWk9SvB_;0$$0;Y)Id;*WcE`~!V``+nX?F?JJ>-W8
z&vE1Kz|k>-Ww(eP<fy5uADMq{oI3oK=eF1TjHGwvWtLJ*<W&Y*v+C1j<+@jSHCh|<
zU2$MOetrKl!DZ9PDRq(yD+h;Io+cA+3fPA){^%Ta6_xlqd4nyOqn!GfB}N*@j4rj_
zddUmI%UQ(Fh#%VC8?GD93*8B+Lp^*4V}JhWjB8V<@DDnSR@%}#pVq)np`E3i=mjOh
zdUKurulrGyd-4#-3E<l-5Bv8%NFC-v_bU%wYC*8M_Sld$28tHRG)cAyVE?44!k%ZO
z_8RjNGuKe5$S_aXPys+Bsapg#f6H*D1sFRuWdwN&H6xA&up3GI0FT3zfHW#yKL`0%
zYL&$FyOrBcTr;;1LeeiDoA`CK*-~z@1Za?9*m7ezzJ|Y@8R2<=AA=7=h~DKSrKh-I
zrb6&G;rzaU$6}T+g0SAcq2f+=(OY|Q*>iF3ldM{zJJQ}e)<N@AzzUs+-KKUsZA&}X
z`#ZZcG(&Q>pXa`4&6PO5vyi2B_Q&U!r8M4+3FGCH!SuJ_OZowb=B-$y_SB`n1H1ba
z0fqtT%&u?nRzG5zRW>9B#)RL|9LS&J1!nvkn0e!O0oZ=_0(=N8+7PIY=tH@kS|l|)
zrWxS%0)`t+n_tB|eSY*f>g*uZ=k}2-aiDuEa{~U~=EscMT4f8&(sQsfXS;t};ctR+
z#^LPBcC7#0uDf+&WN*!=wAeDSLYRKiu>|2D0cucpMfzZmGl^G>e$2_yX#e{ZZmy=T
zl-0&^r$G^r;_w?4fU6TUCnrEYGycknSSW}lwb7tJHFy4GK9oh!CW21_7rHj&!DZ_}
zoXf_5lhOaAj!(kT?~v>6cc78#p&R}hL&#Ba<k9IF^Z;Uh?0sTxz!!UXL9(mdg;J&{
zEx;oWQUd@zb8GgZFDjt@9WZN5U)*}2&}eB(K{JPM0)4LzED#_BbE5Xouz|qHA`Se~
z2GS$2fG8N<Fz5o&9oVryr#rYIB3X_}Cwb?5J3HDqJRP!ULo<=%Ey}fqrf$)X3;SIK
zRKXeJ)ekQQ^otAUb+qn<$+grn;GL7SND{c65Iz!fA{?vT*h!ZQUhR%q?y&Vy1E3<B
zhLuzAU!0(=G{7?ak2^wQ_y8Z%Bo@c7`JNB*-o31^diaVTVzPt-c&;_0v4U-)yR~rP
z2W`?QX*HNAKEcr*`5y-t{8xd;HMHCT={+-*8nAxkFyV3}`-C*H#-oo}nI6eHV51m8
zsoyJ4ZQ1KZsc-%<v23w6Sg?WA4n^Ok2`mYW(WE+O#{z6@O<Mi(Q5{}L9tJ-rCM>yI
z0dY1p8TCvS!=KVpwV^92ci~1}Hxo@lc)A3n2-r9|6~a?eZbX+I^{B(ibqpO~+*{d$
zxp^zx>kX2Q`*o*BJ27tk?mcH^0?cyp-qY@!v_yk{OqmWHV}`SNL4yXf7wpki1Nvb?
z^Dcsb9;3ffNx|$B(Hb0P7|NzPG&^J<ALAg@YGB}%u{vLjB0@&4f)%1^_Faall@&?x
zAI=nmbkDpL8m+vc7;1^oJ<?DL*i`T7X?&lOct3rp1_s3#4rQ$7jf1FRfq`$J)kZfI
zWM%m!m?<j{0jNf}HH=7nL}=8WCI_?k!quafzM%PHB6~}1{ot59L>yo`cC}WX$@Pzo
zcm0|rS4oy}Km)H=>)p|9i@aQbvdBjlcE-G+2R<VDsx37t+z|UT>Rt0pk3PWWvHaV^
zMsvSs=}i~5tce`%4Y%)<XFD0DcQh`<ZKsQ}u7yaH<jsw^-<7sz_3zbF3!s{d3M{Kw
zRrIlD#0!|aG5d{?G2K}I+MLiSqOddnU7V@&NV)73dY8cnh-IT0)ch)iW`5hB_l%3}
zzAPO=QU@n-Ay~EazkQ<$YrBY``Y>+5OB)N(Jj-J7+_^Eq^A$8Dq?^ERR%kHANXHV~
zzUZ9@O6qOfao`=KLcTZ?4;xvC#f!c~#XlgRSN`{d?<a^90XtYxpP&2@bhWJPVD2hw
z$zc&BKOzW#{~z;{>?KD!5!<LaAa*|eQ~v?VZ6FKIlgdhXt|5rkQrnq8!4H3?Kk5Ph
zHD^>0V1NJ5@=u_!Bllm`_Z5l#PJI?MypnXfYBXZb5zV=g@}gSiU=KHL({Qc(oe60b
z5IftcuOSR(KDduZl1+@bARuNz{zYh}I!~Y)MSV&ak)lTxCecrxA4&mj#^5~8d<Rxf
z9}(&E3Gnakau5Vi-Mh(8Bz@!o*g6gj-KvpGjPHDO@qAMNXroal_26QDJj$lOZUoI&
zWzh20Uo1iv@;qaNn_*&+y{;mK=G!;S3x@AwBYfNBD+Mw|=e0|zPEd5dUyXy8H+zM<
zBtSK;%h*#)H8S(tC$ZG`q<_-GO~CsFjg`%VuHdC%)YQ3W0TvBVVc0PVak>-STsklb
zIWTIA!J+~AEEmL}fssd`f&f(v;VV3;#)98*wL2>Tv_dcd&RcVV5Wsdkfl7qF+Ds4t
z12I<j1m+;td9ew+5Cj7u_@s1TwJ!Ovl|HZ?W_}C+aRxHpdWXCAeD^<w>;`5P$ZkbB
z7Tv7`Hzph`&xv<EzVabrIG7|454jM+vdZ&s-y)-Zph32gh313UKV%o_z#Yj~3@|6%
z<OB|egN2pp-?U}~mu6QJL2fz<3jMOEkSeWAKeEsty}aR~KS;6&3mUjlUxYXYHZ5*B
zOm7`-Zf*__<C$r3B!8qDv$UtUH|1>H;M;Ng^KNPky)Z-qw_+26#ZeIbFXsZRd}GW>
zua5#+u>7o52-F`o{5IG`^pYMqBZL1DPTXM##!2)WuCE!S&b`uoknF0VR<K`S&T(Va
z93?I1s6BQk!o1<t4@RUJ*<;9IwCC?FLGH^ent?^e+QfMJ_&2f61S0+*_Sa_@N@A-t
zn`qnX{iM&+3RE4Ton>+(&*tq{&a`WVI&6_7a^QP^;+sNBp!;9<`2~H-YsvwDfYK?h
zcLD6>m9cWvuV@&fI#i=FF1#;FrjqB|?6!7nws!fya#DTeh;#m&0ddLEW3!p$vNUsB
zdId7HcZHp4ix>Jr-A;n*V9`;>>2Z&Cc|MI%1;{xcXLdAWLQ6qF3rEd>%se%Tb8GzG
z)-CpQm2&Qc?q+T|PkIJ4M9g@Bfh<H5(wDLeXej-Leg_N!ZUv^P*BHzGL89;UH=PI9
z?jqN-t=&g2Vm;W{a)1UAYkHk2SOP@DIvs0It$~v4nnn6bC4~e8-;INgLVvN#mVcyo
z*x+;j_N_UP*_RCCM-zW?HwI;k4J$N|QSRT?2`IZKd|$N@JRZI&aznQM1~>emWfsxH
z{h7>fP^~38wkw+t4lsgVG_5>N9KR{U3p*6F3x7g5(@~Dg92syP^9=)xrIbkVXXz@{
zi9M<-)@RN9W(v_7W<ZM(BT2%828a~>3Wh>cp484Xbiq13@N2fwga_Y13t_CEvIdL$
z=>u;VXb?Q><tPkM723JKwmyCKiXnNIG{9T8wJ=^X&ARSiO^RczuRja5E9L&(TPRSh
zLFV{9I=XaU=#G3QRu>l;>Qi1~PK;VC7#LCk`UnlJSW<Y~!j1v7`wP_M7{ARg5&nn?
z_L)s17JmM2t%Lo2Oiawuva+NnZ@pckrKxN+Gfb>R<i4!4n`i#RTyk7KDba2Ebg`?@
zSSuBZh2}-0N*Ey4u_17!1Or%kot(v@5e8>IR@V2v#5x|4`6d3kU4{c$w@yF(TQVGF
zjyNI$0=fbzwRWRHLd<7eK{X{*6^ltb*X{8zKgv9Q4g6&6U@|X(r7aQI0%`9C-*+7B
z0Se=MaM!;CD3rU;Avv6kzE4&sOH58glx7fT6%-U;6W|y>QR{%C8F#XG)EiO`*VF}0
z;Gd=vPrBDdugNX14_}8&Ic4|t8iHNA_@lGLa8_*q3=v@HHM?hIcU2>EWDL>+lm7iY
zUl3&RCw}YemKw?`cL$B3G1x;83H%Q)>aCjt-kSn|*YRO+n`B*<fO!ul-Gamzr5*NH
zy&R2=EiJ+BaAF4z6xs8=$t7Fi9bBa{S1+Zay<Z3)EYQMr;EQ<q0*A%`tPfcxOBt!>
z9GrXVPN$tDV8mL9%2^ri2FL&evo$Dms3_|6M7ddYf1hFG8hX-amm8_o`v0G)2cJRB
zO!Y~CK7SH3K>&0U?$W7a{c{|hB!ZI!okJzemiBw-n^T|$4$6clV~>uQmtg9c`<7ST
z-z)1#iavc)%a>Lp+cA2T*DL!c&G6IusQ+_$e3w^Q6!%Y`!a?VVfHp0C)+ZbskWgR)
z^j^%Vg*AIYeXG&BYCM!;%3GKn`na_6aLcY<2sSWY^`iH^41LuyF$V^>_fD?d3($X0
zDw5hOP4$?A11IPQAMx#!G?(tDZkrztU#*=3>VOCf9`@JH$_wB;`R5<k1_*_KO$eD0
zq9**-NlFlh1^BT;O+J!X{BF;)14*Xz2y7E?#Y$6BC&z1oERHu@4qqj(clOk}9;<Re
zQ$gX-0<(rnHvQjAgUgtJ`%<cmI=p5)w^Kv$``3S1aPQq;-GOB1hQM0eaK8k87e~~%
zg3fONx`JN)BID~_$X`L)yVxg8g4lE*)2TqmHv<|@JfO!_c0_8*CkLTUoWgqLGl6*M
zEKGg5<%Il#4-vNiYwxf6Ox&4K(z&t+eV>RHZsu|zyPv1Y0|gHG9ZM8h{ugVl^C9@C
zL6L;e%Rkqr@()~vYwv=4fh`$T{r4Z!#18%tEYPPxrvw|g6MWt#Fg<{}`K60w9FG&T
zTDJzJIna`j>tty!1Oo90NQ~gYkxlB%6UqTg!(R#Q^Yn8(07)&JEWE^IR>}(*L(e#C
z@<6}@H}?AKUk==W+=|ukrO?e@z>JXJhyT2}w7YjhiUXz1h*9X}>f%wlA18r5it$@C
z+a^*dgAfuek|6sXL}MK*v3Bvtr${*O#RM8Ck<}4}<HfLo1z-$sqvR(5@W3{s&?9F8
zuEoEcO8{C&jwL1!TR4Xk2Xi&0I&fC3d+hA8N9%roqYH!=tzf_!uqcpk;gbFjHI4KE
zPK;&E56^Wql%lWEymSb_FB<PawbHFYM7OgD3k9++$SRseIRP9Qr{8@SOI5&OM&HU=
zyA=R^c;NY^5$!N2LfC;w%OMAkSRd_ZnE|$k_19Q<t`HfCQ;@w*C+k-tf$)6e1pLp|
zv{+)jZu8+!A&P!M>Ove-EC@g&xnPEPU%-7~Nq{&Bz16DqKyb*x>U?JnqiEYWP+(qw
z83Jo_q!OXfvWX2^Ox@E9QEouE0VEc0b!gSiMz^j1(MKr(lM%0%rRgS6NT7K=U0wVX
z`1?ZX*Ly_XQMhDdt7#-OcW``!4NoI?!a!HZ`}Q@vol*G%)Y6>2r^}Vy?-^l4jW-ss
zTre~0)x?sGQ;nAY6D2TW>fpi<h?gbT&E#?D26e+u9bLREeJM*9UJ+4_3^lFn#nw_O
zvYZ)t?>Qsqcj9nc;qy_!ll$@G$4ArC4G1Ee+7)ab`p%lLl#J@xjqG4*{yP;Pk^t`9
zCk%b%<eQr<$Saeeaz0gkOz_?W*o4#*JV}=W)D`|5O+qwcS1@74hloyROYW*{cHN#l
zndbyv94G52`Sr8Ue-=Qod+^U6QDppOU=0dJxFZPkHeS;MGj1v$X^|1ol6LU^g$9Y1
z1TS2n5}|T34GREBacUiiz4-l2ABaf59C%^Y95hwZ9(9li;P^>|iqL~ggTT8!qb;;e
zpD8LeUXZD0z=I*-dB6Ad9*K6TG<h2vNDr#KAg$9}jGORBemQy}jlF_?x0Y9r_$Mx=
zk$LOF3-zI$$o~dDiklEq4aP;lpcohz(a7>N`RB~Devsy5X|P!N7;<UQ=o3j%jcapY
z9>(GMNY#0CX*aL-N$EAN@H<Ju;#6gldV1S6qX`K<wDU>Q&$zCr$L=~m__FnzCx8tZ
zL*v*qr9)Z)JeC+TpH-)reB^h##P>0<+gu`+nr$fyEBk*nC*zd_6#JQsjsNOAwR}V+
z`jYD56RKO3RLJgLN&CCAgrfxkS4fUe<zEsdy>)TpU6|o*+lgP;`E)f#$i&4ZK1agq
zR2K$*tg?W<wC7Zz{zGpVO_S@LKk_DUX=4qfniW8xQbgw{(dP;HpVYjMAAeOayk<8`
z<<<4FII<|N*6U-m`}}jet@K78x47`kuabK#i|Vnc46-sE4{ZDrAF?)i@O;wkeO%0T
zx7A^g5I>D(_XZLny|javR-0zyK2Gp=Kkr6u=iCT_IC1jT?ExEnqkK%Cg)|?*vE<vb
zl&LJ-H~L@%L;<3Y42eRx6SP6qLDLc(Kd;eG^9RS8OaQA%kCX*u`h8*>S%v>jwbqxY
zSwL6Th<T|B!&qcFfWhq$wiHT9+iOQ2HpF+JDSFM$xkihFc-4-F^A#hG6pmFa+n>T4
z_S^~j4Q`g5ane+!D-HUrTWayiKS?!&u*wUJ3RJ{D7ZwOnsl~>}Q_xNqt1&)MHd_dQ
z5f;?KX3)RMf(Ol=V^Y&+zu+$dwuON%jtM&V4f@o@&M+-lV}i{dF}E2@iYfS(aaYjB
zmFr{eInWjVbXT4D?A+qu`Q%K^@fi<i*l<G{edpHww3c)t<zuM;nVlNf>iu#Gaj7v$
z2h5vI??`2eVGpsGyz`f5kU_@b6V_jk_d283*FmmoOt==HPuSRfo9~{_Y}WX(JhR%;
zvWP<T(M@>(K5~fV3qBBg_RNbs0=ncBDy*oyPxK~N_TVJs=gh4TAiQ<UZ%_x^8k?Ul
zk18X^y|zNPUVij~CcOPgg>3w2L!2#alOg`&0Zf$jpzGT$YJI$5ZEw`q=<R3ga^3I*
z+W5LRw7h2h8spZ?8{#3vteYe$OQDW}LYqUGs>VPciORgPb(q>7<K}&Q(H4HnnFn^5
zx7${<rB`10Y5(LU+@drUQBQE9<}F*SD4j8_s}|z4FEpzy3yQN?GV2Mh7W&bhUKHNF
zdb*90;Gd58@T}N(LaMyULkeW?Yi@ifn{eA49Nqp)vPM`NImj0=U^@TmsJ>jHvAk3<
zbKd!Q7Nhc4#ap$YuA1JG2G0?t@z&Pm%})s=POXQDX+G~*lvVDjKd>FEa3Qs#xUIbX
zjJ^SzWBkMVFM%ibH4&qzki?sC!(MH{I3&a;i;2*U-itH!VEcGeeetJsq&rOF2>Z(i
zr_a_+_DEfCDc#F66Kn|(vthv^@TC9rR1KSN>GLZE{I|~E@Vq6qz1s<HUJG@(h4Nv=
zUj7|>dqcHBusfC!4>>viKHuwYv?tC{`rGFzbd)ANjAlGI$A{ze3!<i!aa|_0Wz0E)
z6ThyNaxiODO2<yXLcn9@sq!$4b5SxtW@?67Ts)g)%K4dkg~YOwx^2v6sY;xcs(O*X
zeDD^ZKK;^1Qlat&&jsh>K7y_ngTJfqCF;#2J#ix&ckq7y{=X;96NYreb-}ce&SwoW
z3{)=H_^O7f9#6>i4yd;-FE5W-a`Nci=fqo#w#8^I@BQ@7Q{+xADVkDN#O!s-u#a0U
zey}SN<d~~GJtVb0sn!MRzbjGiuQtf9hx4Ox4m{QO`IWyo1(v;evO71Uu5D=NL0J;p
zKIsgRBqVB3%9MQ!_j_9><?6|aK~7heOl5rA8!Ri^37+5F<ayz}ijL2Y_v<X`c&4TJ
z7DA`Z7q$m>Vf%ksswh_~;$=@53HV<5yNuZ_kjwEj$df+If5!0}JW%kF4YgTAQAP?&
z4eiKiSnHn4MTBCs%In=Pn=1J#DWT2VL+HLX(W~V6A@HoraXyM#zDJ2VmG<xSlWld<
zE&l;sGQ^`aT;ps%*1<4wuH4{LdqJ$q%E)v*BXMm~cZ4=iyBPDF%uL-O*e5kwJ<stQ
z%vfOx1B^4Ddt+eVLbU&<y4ugO4BT&>N}-%g8s&G~w(|;MN<b8|dl>rEaDg7sDOd~4
zco!)5_U+qYc$>$k)gInXH4|%gc?xzsp(OrQ-Y4AK8x?{YsLFS)>x#PX%y%hkyGAgW
zvBjd}GNID{g`mfQ1=?zvazv9n3!X+0F++}02g@za*I$QL=H?8MTiI1t7|Eaf=y$bx
zb^7LTJ}o8XuqP|~C}J%`YavHEu{N_hmCq67OQ;$~3_EqMk(GOWZe%ak0l8}9@p@vb
zxtdWBrTwyzKp$=o^$0$PRLc5jdE1Kk2XnIOJNG2-J+7AVElPl|F{1Y0u@xgNo*oG&
zt7$D%rESsiOh;)?EuWUZr!gO|WM4OsUpUi>HoJ?kZ*IQ8vfcbsPmxA8_Dv~kgct^y
z)tjwRzKw(yYSt$Lj-m<Kvujk+B9Ult@TfJ{fN4paKKKb?abFdDzRPH39cOlPJ)Rrp
zFyID%$%#aUiE7}5;Czvuk&%&`+pWU_2Gu^cwLsltr<?Z#EX#PtnY8?xr{7fELTQA=
za?_S|zHl>9-2-uP6nTp?mT4xjIb)Fgm|wuzr=x2eaa}NaBkLFRS;BFdHry2nMrt{c
z)#IV-yy<`Os>b=bxV|c7X);9t+~-dC#NKLPw#EqlOqfU2<r2%Z``@%hb6>bYfTbfO
zO=3%%gK>372HE=b;S*&mWr>hIYTnEZ=R-A#sB%Ad^DTQyfDGRrF{k<=bYJKpy!-Qv
zu%-`MdDI<a#2O`PeEj@yaxXm{UF$R5?~8j*z+(hR(0?_KB_<{=Rm-6W-E%sH&(`N9
zL<YOX4?1P;(W&JO8Wx3U(XW?;2}1KVISJHYo&^K_{dy&8ZMYKEX$}VzFQ8>~M7zt$
zU+G3`N~d!No_B#-4*o%ykgsQ68In1f<@}X*gwQm7ky?=AKCxB2=>ZRsW0bH{Z>UD9
zQdUBIyma9;nf``G{=Gj`VLk^r1IJT>Ul<;(mY_e-|7E(TaN{SJUg+xgxTGNGO$wRs
zK&Orc$)0KoP5?VXV_c%9%e%=`O-<D{zL}yau3}IWiu`1wS|3Ny6WcuZ7UGH?;oVx{
z#320#=P&;C9VOVn4lV1KZ|?tVUgn)DX{U~AEoSB`scJbejRi7yQ2NcSi?$l1@ApxW
zp;=On4k_E$7wu!C$M!o7XpU>uS466~J%#oPQrH_Isu(;+O@nA3+>5s%^@Fw(W&4A#
zJqI2OuANOcFTR^U!McRF->a;Z{~QN{jWCXEul9W9TF)^I9Jqc<78}hA1qCH^m7SZL
zE=1ha)b!_d^anaRI;2==uX}{af?<{rc$%<8e+>-$G5Ma3SPE2nC(AK_{XnFdg-cMi
z=&-ahsWag4UW|r9dbh{I$vO0ula!wP3mFz#!reb^8!tM$;y6dxv9Yl)0?$iDMHrk?
zHQvn=1jqBV;N3#8RCw1o^))wleyy|P0Q(loNFG$cFi&Yz;Amvr(%#mV{fJ5au}?3E
z*vy1rMRF#IV%FxjS^NoyIhol7G)@Ierq%2G)PaF4eU+8m6D~)VWmPby#Zp{RQLzbw
zW!(}Uywl2h9UVOHsvU(M7z{sihJ!TaYwd+Wp1laCWppO;-a!QlOeGcWjEYlDV!8?M
zdl_8(<3pi?(du(DJrZ1pg71cR#b!RNmu<0;az2Do3@7&l6Pr)aV`X{W(s%&og*|0P
zRk`6-H@Rjf+p<aU5(DMK<VI3_yg$smka=(m#qPdLfAyd`C=e|EYB`1il^RU3>1|{4
zAb**bZ2x{67N5ZO9fs3x+ftzf)!k9aQ3`9rf`MyP_c?VyhJlAJuI0xMqDQMrx`Lt6
zy4P6oZb7LO*Ckf_{HnA{yifP@l&H?Z$^L$Fe|<p!A`gu~EfUv+z4FVHT}!%^7HSi6
zy(AnbTET9IO&snAf^3L9seQiB$4i4Gs3ay1W=tN3O;W3a-d?%&7dLSlS>Q4FqG@hz
z9jv)wR|Qfz@Zcu64`8xvZmt~{p*|BziJA;SX}Q%;h(ea+YA`{yYSo6j?Fy8=bVk$|
z)({@}!Z!nK@Az>}x+plLL2l{q?{C<HN08#tbk|>9n72Z8Up4cWDdIfmxmO1due6OP
zv8fka$yCY$sMl7glvSd_@cPH2CO)1<76>j7D;9s5mQaP=Y$V-&Ic7Z<&~NZiL$?TV
z_o!obBTY)w{tIp96rm9XI|G^B-3l}g;?p3;WXLAXTL`%RwHXs*tI;Ak5O=FfN;k2L
zi&Dxmn8c?JFsR34^?YS*&4PD}iksk}iv@4*84$5Y$$}!Dz2a)N_xB>30|=q08vio1
zvSwE`xk3G~z4n)kd|${E*Vy{U@*lG$SE;$HhE@#lsMe`k1ztC@OIYDU@dicPJKInR
zs~D|3IIM7$q+YzGT-EpBTxVhP&cSTZ)pLS%hYt)^BgYz|=jXz9^zp17G9r6tr|n~e
z2PacQXY0KS=g%XPj*fcTW3@wQu^dE#(L+D3>~m^Gt!RmqH>5kK9~r$F_Z!PXRM!;r
zGyj6YB<4zhE?%Z$JKxRG{4}L3i9o8dQ2+trGe{c|Z&|-LY#cb(Ox()FlEL_{7c`Zt
z7iR#gJG8QQeENlQXw9ylO(rFL94g7<(=-1YBc!Ac84-CNo_^k#PIbcqm0=PTlpRYl
z{IuYqy4KS?l^x;zH@=EJAu_CJ7`6F#ec1*6T&LjNhpYDSqKUP6NDcLD#k!~`{p@?P
zIW2gdJGgD_cn5pB@5~?Ol=V^<wAHs_>a*U111aEYuTsLU($q{CT?3pod&h*`w>g37
zIvNfU%r$nr(pI|M$3}Du1(KXrUYQC*gRxD8hGEeYJV>pWp>6SfzAfd~sF3bmztw;(
zas`|3-3zGwDZ*$~t+@!r=`NYs{19=amfT#+aPI8;tk*wSSGq;QfiGNy5CjsGKJpw1
z;6H)sQxZIRlN+*Ga?*K2M*At;v-CMiaTFA-C)}_jK}M9OicJR<rAz`=4EQz3YoW<Z
z5;{Ku#A4GW0x91qBA^?TXQ&3l>gm5sM^q0Xi;i19vtqf-6JpE%Vu)@yw|)ZbxEN_D
zUyD;8-O}TTtH4nIQa|*nAov!WKG*fpPhvRY!dDQ*1bL2fHzN*uBMbbl3!GqL{FcTV
zhM)m9lCc1z?eL~fsvHE7DIN;9^6}gHHwvW*T>9yrvm$*u!5Mqxb;e5y<M%%yJiW<~
z!NjGZLp>}8i3szVsHoS7x!sqWvX@<-q<XFGs4E}+&Ec_-&n!+!d7o~w?s>9|$GR&-
z77~2--dGL*S<@29H2I>DvHt$lU#1FHsN{~FkmH`PTZxg-2*Q%W^y7^KDLru<9?R-p
zwk|R9(Qh5=Kl}S(Cgx3QACXr@BYo{k&#so|<_5;cvUy#h_*egbR|psKz`NyLK12W5
zLD%EN=VBrW5>m`VReT=VgymIUFEY>Ku{w8wqxI5gMUK5DYy14Be3A-<<U4IWmcCPo
zm^~bIls7rv?+Q}<ZpWa8nNGc;{cIamFi!n=E7Qoxh~nX}i`0u~h=w@8?f1cBiXZBG
znb2m#bJy49k~Sb^Vu?;&j{QaYLK!x}ddF+-XVmS=$(Ewu8X*Ni=tuh%kP(=sk16bP
zZ;;-+SlPcCU_pF?(bj$li8<l<$B&meKA3wqaxUjbS*^<^4nZ%evdih{!6t(s10o#5
z1&}i78zB3LyrDv&yz&I3+-bRYD*rZ6_I*{9k1!)F$nYjH{l(r9w6nFf=VV(32VMK+
zcpg&h7=K2%9W)oiVUXU%du97yhZEMPX7-O<R#`_X>mQGH9qe}{tUB%Q<FXPwUdE1$
zjU|{k^?sVXv}97Kp`@gwtV}&-RO4FWtgUKRG;MB_EI{lazioXwaZq~z=lhUr==RSU
zin?zYugqlr%zZHQHu)3MAZiTWPCL%r+tEpfnx9e+mD_BYaegxj?CdDd?g?~zpkHy|
z{<kVh@38r1`SlGL*x1?Gp~9$+n>WP1Z+Trs0GCPQ4-w*gNb}|VI_MWC#={fb5)Kq6
z&J{Rnlf9D*QBQf7AOHoM?>j+q8ABcx?SZ9$ml#_swn10kAJzXBN;nYadQVy7kw-SA
zWK}s2EF6E|IYX&4zLjFVAUG3A_fjt6O>LP~NlK=?l7?&97p2A;yZJ*OFDYduZFJ+G
zy>(Bt71JnIl%qER%K=od8*6m2nrukxsIt_cZfsSG-c+tS2$9hj5>x1zIJ=sB<u#*|
z_d1cq%lI`HhjCndRU$gM!9i(2apU&AkJ%<i%h>J)i&FpynIl?TO+T8bg|BgUJfp^q
zZjYA2QoU9Fx71bQ@nL*k&yd84AY6GUHo>oUDD(rMmXFvd|J_5hR>RBQb+V0ob4FgT
zKQQZbHNExI#=7nd{3l#u;rzO8stDQ0QSwI@3IXIoUh+IF;wyqStTAPm1B1~9v#7Pq
z1|InON_{(6Gcz85%Brq^Xkq7wMM|lgOclSv2To;zx5JlN^XXjo>1;Qqb2&k_Um0&~
zd3N7p+<iAJr|6@5yJcu^__R6oC{8smJKyB!v-5f0QIuzxQh)mrYfhUJTNx_b|6~$t
z9nf~$!HWPW18(4Hti2Ch>_qP%TzX>bDr}h%E##Ey)S9o7ZG`RtF7eK(W>OKpqg8^K
z^r@NKfp+&c7oFd+Bft-XlXoe{;=YKX{dw>CVi6K2mU1*p=7<E*sRLcpzFSCc$#&p=
zMVRL73@}zf8BfQ$S?)~%Z3S@grlL)R$FM<NS?6(%4$G&_izlX4xxc>*G~LR|DsAVS
zR5`1!V-;r4=DNMbPMOuz&nG9(O22*;r)unQtaz_)u)4g=wEG^(1CaSBs=BGUT4Jo>
zfy(YJQ8AZ+W~-L45Goc+diBhL%<P;PoBVNszkc#FcO9>ZB{am+blZjY1{hd$vBplL
z%#O7&acBUW0CyEVis^07%h!A3t)HxVA7dSF5S+&CIN;>xA_9Pc#Seg~ZK&JRxNNgT
zxbFv+^s}S=)4%#H;PMHN5dkk_z{YN07{SSu{ht@W%<E=PP4CNzE&9%<Xh)1ZF?i#a
zoJ(Xgd%-XTyy+rd3VKAx#XV3LU00%uuoxj5Efl3Y&Foc4&FnR1ql=JF?Ik7TKrdu%
zP{?7K!lsh`-4IE26)u~~kRdl`iX%k=4uG3J8tpOLAT*}vpwVf~#h#Co>j3K1gzWPn
z#{)e4T^1n7Ovt`_8v~I+gp%x#71l+7mj4JI*<*b+U>KXU4E39oHPjp37&3pg8q={(
z&nTS^H1F7BvWbbgq_vQL8Nwh9$|)%W<rC~110O@Ke-LuNCDYFlhmm|8ap!_3iJzqo
z<(|%zn4Pyyz({@NOhuxBrEt15$%^0Z-raE+!iB7Ee;i33_XyD`MYF{Ccv2GbDGl10
zuj}LTIOBi+8^bfwJdaGCB1?6JX~URWnDXZr^f=5Zw3Pfs{MeA&lofrUYBHGxCcTeE
zrV{XIkYWxZFH&%gmKDk*S;r!yR8-Fbj5D3CG;Gy_87&PnKnBAWjrXZ9djFJ#&(%-=
zwf@G-<QK+lS*blVzI{u}5isS>Cn|dgy{&!Naaqx5A@ihKxb+^s4E9S}*w9I{*06|E
zeRH8|T@YTPm2M0M`KK#ov^!@#cC2e~u}T7}>^d@qISzzQukaMu8ez9FsA+{^CjlcH
z9eHb8%e%(lRwLl3I)M@G*qJ`l5;hqaHNDLlD^1meI6wzU(BqiHOE3F;bu_bNc?B%6
zA9@tUr^<<gBOaZ5K27%U@SrHO{kv=8BhuH`mm;4jUvUhVGd94+X&e1>?185GH$@(S
z-LWS>TL}d`^!>jR;Nbx>9AigJP>2#v{t0K%zNC98B_-u+pmg$ne(Dpm#6jcs0Gmmt
zCg$^=ngVV!tin+tU&QWiqemRXl&;=uWiha2IB=h=@dt3rum_klD&Hpu2T!uDGQI`+
zI+qn7M1cxaHClOK8b<l>u+4tYsb+MIN)`X^<?z%yxJGEjR;!bBNhvfOUgU3X=H4x2
zosR*U*u|f9#0j_?tOvb7w94x#93I|Wo<4%lgm&LBu&y*GCq|~F+<K`!W#KnOBIReF
z64$DohY$S=e4@UE>=rXR<tDr4{JeGR793o#E<^w1gSSNavQ6bc6$w0kM%t`}CXlE!
zRsXJ1m3`Ph>(ve?2x@&`vyIX=l=u*+Iy<+;DWWfj*JbpZsJS?>5Lxf(w(r?g<HseX
zT_6$t5|gVw2JqzWcNm+RkimoF_rMpZb_%n_vI^lzT6W@xVR3~@9}@x&BQw%_XV4u5
zBkLKFKL`N><+i%C6cZm`JYw!t!yl1nR-#rIT>p~|ahH*3RN#EzBf$8|_V#9G>Zubi
z6g3J{-xvv}o;CRlO)RbK9Jx$U8zFmSK(L3w$#RN!hAwhP`bd=j<86eR9oCCh`q&)8
z$w95cMDKI8AFzJFP{L<Yil%n+p3D|-dnKmzgglQTY!^xY^A{zBBj=tzk#|jfPtWIa
z;&VqJtzlDj?u-4bdjhfg9m@nPSev;E#g{EYodiS?qtEv8$XPZ-?daFKE4(MXv-*QN
z^Z|&3Q+?QWm4y)c@nyz&ez#W@6%Nir3@l3;3RMxsW&Wf_rPrasgo^-}Ae5+SXPCq?
z1iSbLgHr`YozhUW-UW0?Tbpz>f9YQ#HMr=56*&1yfos`j4@>l+7qn^9>m}Yqz6yt(
zEsXcW8O2N|jKR9P+3==QV^3Zyhf;A&gd6(wu|U=$7uKscHM2M+iDJ6B;by(*e%3X4
z;$M|MFkDRC*QB|#y2k7<NtgKx^~dC$YC*<t@@RF>1A|@Y_yx<+hJYhn&%feHZ(doB
zeTq1|dc*zvc#NGj_V@jmx!k+Yd8tw}MSc~7j&RJqk%zH*_nsH`yG8vz^_IY!k7J~l
z>>@&n1N5)N;3;^dYqip%ir;}IvrjO9QHRfE$aR11QzjhPmZjKEAcG;#MfE=0|6&8Y
zXuYI-^y~FmbJ7-ns$Wh^eDG%{+HB}7gfHU1kckzZB36Xf_zj>>w?YOtkL=v{x$jh;
z{9WI_>XBjfN>SPQ@V;_xZb>~Tqps5f-Gd8|s-PI{3sweCGbZ8kH_0hIs@GL|7t#@Q
ziTQKUi+4x58<SYe4lyvUK@JfC2{^n`<>AyE^w+s>pw!d1$B^;TLavb1I!{e#970=;
ztgGFg0Hp*fLl3S?`9-8H|4wcN4CwLl<u}6C5lxC498izM=LjK>3%#7itn=2D=epV|
z88R)Q#4|MyX%FxJKD#k|cBp#4{P5F_zEvAIaY?z|@uHw5Y{N1>ify>7ihp4Z`usUN
z6Om-ncx>?zltLUI=KITOoFWu*Xg^|A%2X56tVNVso|c{l458Pca*S^8=H$5_Yw1SS
zb3!Q_I&{uO&6M_zQR8TQMm;)q1@>D7(o`&%%i=JxT`}A9%lsLjaUAA66{yIc=Qd8=
zA-)_`EBK{WW(sdb01=K|UHM|J%B<27hC~$TSU?S1EQ5o8j2vL>S$*~cwH3iW;f>$U
z6fB>=>S;!I=j-K5e<|yHRlr3UsKWwN%0V<#4Qt1RHic1jv3{G%nZC;#Au?4(`?IzL
z!Puq@V+E2eyF67AyZBM7Gf*TF6rmTCuQV&YT8>iPq*c*KXs0mDNbi^<(=_w!G8DGC
zSSUe{L#Xv7m<gP#%+a2uNvfvy&K_bJx3<&RJ=UIPbZ&i!I8JYqOORo{L6?`l)fKSI
zAXDQ6Ygn6pfeJjTM~l9_rX|%=2s0br%$o{0!FtH7<ahBl=Ra^2LoLtc=J&<Av+IGV
zl32mw`tm(HQ@1A{#Mp+B$w>lEhDtf!Ob|sx92gq4v0lCR=0%G2Y1GzhUZz&s*Ytmn
z1kUkgZ|Vd<$_Nvu0IF#>tibH#(U>bJS(|=rGCT>-VSfMW<(iax(h|N!D`fR;I3|`&
z#=SD@uVYPzw@R&?sz?WneA_l48$Mo&R(1q2U5?EMkJ=jR1|vC<ArRD-Wp953P-ZA)
z-MDe1M2#_#=;Afs;XydDOreqFT7@>Af)0K9L;`bv1e9_Flkw<jzQt%M#2Bse+uzd>
zRRU-sKs=x#w{Ax=sskI4-5cxFL2(aS<{!mKGevbwSyOC0PBr;$Sft8;<9{)`4hiWF
zyS_oE&_9^85-ORt%4%h%zZyotDO74H2g=o0+~gd}%4~xddiAxk;^O?A%H#dD;|%w|
z)OP)_Z-4a5&KhbY);4QWU7^i$zZ--;2sT|2A(h^sz(8Vdn@<c;k&$o8ay=J_UC*o}
zEYX1``KjjIF>J{Kx(4v|ZgmJY%Uh=H>n~HFhAzm7Me0i++j<+Z8Q{T|Sbt|ee%7)w
zDqE8{7|@-ts<v3S4H0s?YImK`cuYR+_v;dY8@ZGuF0OMmH>_$B7R74M)|?UCqmM`-
zHEY~pS1!vGBp`+OHHHo2o|!)N-CK|aDpy+epd`c8czz~ao_oE)wvqSm$>fXPyz6v-
z8z?%aHCOhW;<~^Z#>>x7d^&j1El%nkerW-mO#<xhE#?X*6%~0^zXq@hV9p^i7#(U`
zNIjxGcM?%%;c%Z%K+cI%pzB=@)I$2K^N>FEs|8STsyp=6JSvH`qAg;AWPtphjbwi`
zayxsh`#j%^zRuZYr6ZsM=@A_}$C?kt1EH%c$WyUbJXc!6hR(&c&h;it<a!0L;X1>+
z3jTH3y?A#?Hu1Xj(#rFdoer#2pL?Df@j~*43&MAtTU#hWkIF^0kKS0*v%eGgIGSff
zwA8J{dD6v-#A*ek#qmb5!p54^oa&s_;p=aLVZ~;&kT%{?6ya}R0fMakT@yqI0_yuJ
zL=B7iyql~VX}{KRHUfU;$yvLIU-|ZP$mu%T&Uk(Yl=-!`8^-h_Q|r3q_BQ~Y0)$4r
zRw%xw5n3oAKR+p^NFyNvxOO<l{`P~YjC4ny&V&7scRmj~_1`r>($~0l5EIJ$#y*i}
z#P-<HBBf~3O{<FS?jOq<coTm&9X;=|YY*DsgOwMA&t^!vp+dom$mXuoXORzqAhD-!
zU^i-8argI4glQ9@!vBrj)xT26it`C7q#&xRRp*P7_hT404zb^!Am<TTc-_onnc@o>
zfv@S`1oMYxV^rfHFO!qkYI5do0Bo&%eL7`JvQY&{RWkj=5f~1Dgbj9VJ|f!-!}tdx
zhD&}6kr_`{|H{vVcu7T#1fid1RYjC=xR=fMMMPLf$0yV}xIrmmd0*Z9XQMd)yDv(@
zxk;S565xtZC9!WwxWKVWZ-POGEO26G#&<ma{~*+N`51TmLM3AaVo?zg(816Du$(bN
z>b?hD=*W+n61g8{I|&<WaT{x?%!#A$>CAZS;6CJCdxyLRC=n`Ez7SP>nH<lBVQV$r
zhB56QDMnuh4x7Hl=J7#9TyAZ)%9IDBiM~DvA3Cfa11iZM7}an+5B=qp=mdVH`5+xV
z2`^y>Q0S#JZtG<&TJ8&bTygh0dd<I`7)(S#pW3q^>}XkS&BaUiy#}q&B49iw<gT`U
z(=$z8U_Uf0l1$A2B_(aReyjpaf+*xzO+2SLmH@6bmk{)!zEn&b1GeE<v+t{jQoFHl
zr9)yhQg@X<SxIPo%D`|_q7+$(xy4ULS(&0^f$DX7HzD#=TDy->PsYC8+D0MD!?t21
zJ`vW2HLgxg&^Uow6!?y6!$ROg05#7}fE5GPiqUGU+A$0_@L*%o$Z8#NvNeoe21SYU
zbEibsR}nJW%wOU5#mr8*1#x8L{M#>De?T?&hUOKbh~1B{FAn3sR!C}opmDPYSRL8t
z1v91sK1|!606*P+=CWD=6?Pd1e@KDdk6&*4(`Dxt$Kd<Xr{K8~Gc9Wi-`l?CNHwUs
zy;xL@(K>r47gbsAj?(T}zRStsBgBg4i!J^;9PeGWkWx$68<4++ua22L9333~0}lgf
zz_-=doB-Kk^_$prpQkt!VXFZj(m*J1_rPLB<-R(`alse6lAF@Qq*Wyk&nO&$xIj29
zipUu&dho(zg2_`ejrYZb`TsI}uVKNLp|qmvSRZ-Yg^<3qxXyM2&L2AmXuE7R!!%;I
z(-rN!t#AE|JC{g@{9AU3s+ht-(AleX{V0}9P5q{_2G6<pnWzC%&t4V9H2o?<2e4_(
zk`}yvA}sn+(((Pz2yqH|UouzX%t^O<OJ|Bj&UU>BQ=2H{i@LwdS*u>mOiU>$xKBlI
zn+W~ydBq3Zl?7$L>xVdt5|_@l(56N+I{EmuFU*PBy7R3r6b%wWt?^`S8D$c%FGL7>
z!6(8ROSWPE7I?^l`@X0dYHfHDQk1FKW7mKgkZAkf67?i6L{m(m>YRniN~O!#{@9T*
zrqAa9+s-Xk<n1tpQ0Rw{DWQ-RemliGW{#$@otW%^N#61sNTCS4R8dA9qPUa$1dfO#
z`?`oOB$s@tlY))@T4=-;LjBZii0aqnE5C8$d`J7_`o{P3$@3!tsv^fVqp)5qE9q+{
zq@(g_lDiS`p+?mP2MNtamy_U$<r$$&rR7M94o?xsO!LnRrXYc58>p@bf}qXP&^G0E
zp5K7!;o+&aFJ@N{&|r<dhZDFtf!?M4ARvRguI)33pT*VH(vboL_($wU;!WNW6)YMO
z{EwPPy5}-Y0A_(Hgx~oX-7XA{;7AVL!ft<HlhtT5&<;PHV0ZzK2S8<<y3e#`JW3P2
zwculWbz1~JvN%?5U4=vM(wm%9A5x95c!H_eNI)K)QQBvd=W})0r+>aK-iHAms4S(I
zWFXSu^+Hxl-}4h(_X2If;p|9!EkhPo15{>%`?xV`RK0`1sfGTgmdCTO7!V!gs1uhk
z&ketEp8q?Ls&l4zu|n*3ti*@yRG*UPB2CRwq7f$(=+H1_&fs=_JZThl-d}WXqR`GO
zQI7MHwycCbk42Y*bPj_Xg4%11hQ}@BA$KP~xG&OY{@Sd3)EZ*%w)ilhbtLwxulPX*
znnXiH%2GDg?FRO{o@SNPO1m#jZIv(sefe)H?mFK7FM#C-0(+5kfx3x!%({X4gX}U#
zR5v{_4Isat35o~0R4j^m+($sZS}&nhRkPi7fD?4NVxkuy-?eps39#IGINI_kWW?V0
zTX@?03TgfD_g#lggaY+a9HQX^rVtvu`mgf+0((!8|6wZ|Bfaz?FqrUo9&_+ze^hLD
zfgDR<3bY*hsdD+z9Q^(Ukp<<$0$%ph1u125a)v5lNE*zqW}Y{O@e=rb3y1JpZG6Ji
zGtnpVN4I)jk2STYao3qj{wT8nMYx>my>%Du*TpbF(WHa_up82fpT5EFQQ;9Cw|OM^
z!+g!y^_OFhChQme79174gB3;fYlDrsG+ys%TB>8W?KXE)Jq6a;VmY)|ac+Z8)N~Gr
z>aJFLAT2B{q<O-qO?>W`eg*{Ko3#OPT)!_N2f>hpSyzT%!0p<KdVEKNWnmfs{$NvT
zvtiI)n|QWPIqvtd_>25J#C=x#Zm2(i#TYY`VO4vNJPI`nxWP77fY<~_A?{!1Fr7~Z
zr@r@<+rUSuWHe|p*OOfoo0(k_6YrXQz70{HHC9b`-k~Z->w6mg?A1O~1-E5cY!18z
z_M4t&&xJ&jQ0e|we{}p;j_HhtWcw#%Jh!z=d}^`~_{_t-%Ir_iQNg`=&Uf2C!SWOP
z+VhJ5*;?sqdz#E&t0yd~{h|Kpp@el`l!~CRJlWP~Ywn&SKluwxPlcK15N&GatTKQn
zha0+ZefQX9JB)qp=OBer3rJl-tJ}9{m8`GYfVYv7zh@Fr@xt+$^S17l$?n?w+H|)=
zPUU!Q6~@YC+^&2<-WQpU5~LG&Nfx1vvq`GaUxVbi0?kI-RjtE6svoB!2J5{}{ztwe
zaftuVq2cQrhIfNwqJ8W`>u?5*L+f3bZe`Dlb_X3w(FSm!@z?HACSnWH1^;&f5eH@=
z>jh#G%&i~(XC)ba(b^u3?r9PT#oa1vs7AfmZ@1+=gkvgk6HE)YC!FFKI#%3^-yRYW
zqt1SsovJ*=I8%Bj^AIoqxE3lolFK?b4uB_&L>z~EdDwyAmH*+yyGwgOea;*lf2b#2
zTJLN5$(ArEv}y;+Hf0AiN`+j4su$5v&)(+USh3;uy!?)Nq@pu;Tk<;Qz&G9?14c1@
zRzNenB9}iF2aefL(_R^uM}wIVGi=V9D_-^h_0^;8-d1-DGjiOj!a}kz*T=4BLw<e^
z-oa+~yjI*@wn!Y5f)yYTG&eUpNP_J|-YN!sAzXw$rA~U#*aHUJHvNYCmqq`(Vp4iN
zoSKa-^Vua3WFVixt+kq!5Mp>=pQ*DMH++cDurdTreHiE?+V{#or1^XYSOC>MbLKBn
zF;dM-{CCQIosQ0lhDLUSu}Ce4DQYFN3h3yf5p&iUILNZH;*v^0`uzPN{S1K|79|5W
zoSMNDma`b-86c;i(8$x%*?DX9R1i+;O1rmyEuR)aNj3QL6Jo;QJB1pGiZCWuCm^v;
zecrB0VTo>LePhoF{0;yviT|Ap@&!{Be<&qA-O$D++#U%Od?xg=tJz85soM-Z4NE$K
zqX0u0h6V<JrTl#O@EL+z^Ta-d+6XFv)@`shA`oT>L)#EO!Ynn$4!2^-23nrP^x)JN
zz_E@axcJ_gSy@t9LI2+I-QtA`t(TY+U&71odXWG6g6e}@X6rR2kSW^p6Bg>Vy^3xF
z)bt>@90Znz60H++YzKblaeIhsOI!tvL#d@D*knaIAS!^AG=&M!W)%}KjWs6-j(<%L
z5W*Me2yOh;M6FwOziXJ=wjOR=*lCJ5Nhn~+RoYr2E5<Fob%%pM%;ty-sQSR&q7Tx%
z5S-Zyhfc|W+O3N>adx?(pp`P)|G^*F@e<FUmbTk6x36f=T2#94;>!D8$eXch<u(TE
z0ZfjK_Cd6)(^dAxFJG6w9ST^v4CcT;!mVE*23NS^#8wuBJODZ%a>Q?}0TiW(1Ep_u
zD#<ca9wf0`raFCC4D7dQvq@=bl<R!p9QHZncYM?JV|Dw@qSU=$fWO`JKEF_TM?|%G
zwLt)!4k&8``OKKB`Z(w;g>LTBQ+?0p3#diox?VS#WsvR0%b8MXL42&>tdtCRroYSS
z*a;3{_<|4*)fuEUSm^c2<OT)>FRC2t0Dsq$&+6~3y#fvnxAfMZL}ggH0;r}?24|Qc
zjjB})!1X8@DuvKoW-#P&aV^?aNl@dfnv{)xsQ=u*BvNn*-38U==jWgH_b<|`3*4TN
z{H=~NTB*6sop;xw_i<C{uTaCWkL0z~wTHJ|p(2H1;FmcETgi;+`C45z02gwzPf(<P
zIW7Atse1C>5Og2Q*o`${VL&Gc&QrkcnSAoIoUJzyW%2}P%QcjOlw|VncHBEGJAqwo
zCU%Yx>kAEW0ru1CVCyGl*6C5(87B~qJXf|AjcX5xEDx^v$QPdY7nn@-MLbKoE?%{X
zcAqL4^tVuP3P>~nrj91~i<fcAXQv48kJ(uF;aoJ1@tGxN=yLf&uX^cAJJ{Xj-D{-^
zLv$DL=)sXS({peB4YC%0o;kKcavr7E)kTw`QN(ta=9@f~NuSfMst)}Eh7G>VR&1Mt
zJ{$I&{<y#Yp|6?Qa1dn)0ltiAqIt$O(pIhqmepIRjkac~mX4;}a)OeyzJ!qhEl@5J
z$;ruQ39&sjga;nn1j)ga57ePIpzUf~d9);8m1UTg1m?GHbuk>4hJUq~{Oj4)j*a~T
z1uU<=-$>2J=f2zr2h}^gXeVanGXWyhMn!HnFLXJERP5?g5?L?#x(Ik0{R#WmuxP#=
z<h<&-?RY@!wzdHj&V~<~C$5!7fw2pjhQy6VQ^9oJPaZsfsU1&*li}Q68s43qqzLx9
zhAMgR-n^z<Vbn_-ekt~oP3CXQPu)wt7|G;`eeQws+8Rv}26-x0!%eBs;NEcSQuj#z
zbbU4m4PBZQ4W4C+nex1~7m5@>>bE7|Ndd)95FKze$C993lNW2=&>`I_ZRn(|q|jvx
z-NK+np)g^l3KO5Iz-2`wI*3ryT81HSn86u^l?F8L6`SbG5H7eJ-d;oEz)OH(nA7@#
z1`FgS@$K>c3|T3PQ0Rn;p!-F|m<$j{gW<7C3ZYChvoVz51z}O(G>5HH{x*i9tV9lK
zzHO)Q+vfS8ZoOFaNk%~ohJ8bW#qIjHSTfDj#6-2jHm7CuQxH$VePxLpL$909#9Yjs
zDnAanwmtTL-S)r+s+8SZX~f@2<q?i2a5f+BTi6|eh=ZUx5f7SL&ir%DNG3RtSSf4G
zj^_xvd{RufBaV75L(}Kaq{q|0y`Cn+<OPZ+w=NW^61Eh0<*YSh>MA>pwW!_!;C0F@
zlmRE7kM0BGp<jBAP35v-Y$g}Mx+hi)B^lgj;C$YkK$;qB%Y_QC@TFwFF1R*bYN_Xw
z<>=}jY(H&ynbDt9aI6bMKcF;2kFo*u3{U|$pK^7mx*b1QuIno||FVY)6e2N@IBD#k
zldDr*Ys+MD9^8BwA`Zc(RkBGg5wStQYfQrL@(Gci^d)L@f0?#j<?&FhH0O9w=G-}D
zouAtEFY=1-H{a$Zwu$f&g_owh*LGQGMrAy0X6D^muc#I(n=y4amkhnsm7p5!H@-@K
zjGGxTdsxJbU&gU4e@Kk=6>3i~6TCfX@~_?oB@Ol2)76uo3L04zr`2|lekPAGUAJ6P
zLik<-l&7Cq)>y0qV?%t1GCvRs>>Lk=Ngu=AL08;;kslg>;QwKcD_IB&qH-DOE-D^2
zF9n0DU8rh~Qf&HNtII2Yx9Yd@@D3_MIkf-NBXG{EKBu9WCVhtKkXb-0cR$BJ9n1z5
zW^jXW*M+rtgYadxh1Jbg&dY5i9OyrMINz~4n>SLeXa=5PJsPEX<oN`qM1#cZ(y%Ib
zi5zd-q^w5L-?9D61xQ<xSh?VNowC-SD+L`K8#rr$ld0R2=Vu2+Z(ROeGsr!w+iESI
zaMqSzbAy{c9?I6`WBB4vAfe-k0Ds^=_wcYyn^VVtDf_4{OjR7@`iNiF_%D8BC;npo
z&kKNtU&wq}E3S(Vs>1}^a!n1TEL+B?0L;rX8IWGk9sK=qKBv5v*^P4hVny;N{vWd5
z0<7va*dE5fKm?^lKt!ZlL`6!aJEgn3I|Tu0X(=gb=|)0YL`sA$E#2My%?3U9{=dtk
z=R8~Bm-EiMX3d(Juny#pZaLL-=xK*<>Tx*V0nr=c!>_+1Fq;;?g+`ApueQD~8ZN~8
z7!c^%@FgA~GW`_D6mg#Hyl${|pm^_v#5nD}<p4ZB%~99rV)-?JTJwdTQK{2%n*ZO^
zdo<Xrs4)6OOvef&o*t^cI`TWXz@}C)c{76j9TZLTjxUbY?rxipH5OkKZ0>b#H;4s=
zbeonYl<<GHss4-csHh^Hxz}Jg+)a}6(0(c@V>c@IDvR1*D6>IQ#5h9%6&K`KLhC2@
zzEh!Aeg)dOLv0=ulj<V^yD6EgmWE_VF%X0WB1SzjalK_!iAd%)T@uIGO)&2f%Z{ba
zl1x#x?y5ELqEq#zQ1JmGX|Rf}H^y&%Jj9fT!y|=FUK2pCC5R@2@bZ0L$NWD!=p~rh
zGvnE{$D?VSJ@4!mwqG)m{^o&0tsOLy*ogdI@g?iCz9#sMxUEqR*kYt068t#w+5Hbm
z=lZSo+Swb@y5AUno-^@SVz-*oJ>7d+^_|U|KQ$ChXI&BMuOJG5tTKQyfM!4nD_7Wf
ze}NP{@Wm}aD+gAGO>Jn6aGK}@C;lN`L-M`lKz|=@0}_j-xM-LFt{~)iB}L!hQQ~Dm
z^5}!Z0R`DC*`<xee0@`&VBjs0x{tOBI15n0AiuHq1sDhC!S><RX?pS?U*%)$%!=r1
zt5onb`1=Qe)zSntZC#y*^lJ?0R$?g@uFwyq$Tsbo{5OA~CBTOk+n(8fQMYo-3B{w|
zf2-sjXND_B!KsMKMva{i8COBJG>!42@<+77-KvJS`k1E?CP+|+6!}~!A}h7>j+w+2
zWhG+BmM*$ix1%yNnE60`Ogaq-{~A0+ZFWG<1ywvf_+97BZ~_A5>b?;Wg9%G1T3tXd
z(T=ZbnSc%q?!MN+OhTvZ^YU--peC{=S<<1aRwCXmI+v^ffkaw30AeiEcwPsh0dU#*
zI3U*MT`exJ`;8qlGMMDNb>|`$SU(|b_$c?WADsIcP+T&akwpq`&%~K;bh^*{xC8Y#
za#^y9l`UkY(w1d1WT6p+aRBj_oPIw2viYiO;jogGPnF}|-S6Gv>0^x9()8kM>X+0%
z#xkVUEh>*#mL}4DGcY<FD;wi90aOAZx@>1{w|nKU-?D%;YxABCIf6k`a7DcA|M}Bu
z?avFd3xUDG;u7wUSoqVVOG`?0XbI&TtRsO3jnx|1yDwIC#+fXW3j%Qkb^xDr5>sf>
z0@Fz(yva+)jzeLyr%$CGD8HcC->*Vps25$ggU$(_chx{*dI<~;b|qLpZ9O1Mxfi^L
z#z|*|3q5sdkdY1!8g$!>p)((;!UeIOQX>iKxhbO<+n=KyZNULOsVrHfCOaX1NSR`|
z3QsxFq)!W!Rn4W3ms-P=f&$Zl(*604B9$sk2t+&xki&o&QPFr4ZL^7XR^_L{)9cut
z&76m5*lMxYx^r8tcJAOt07MvFE-NY7a~UhCYrTmG1f|WSkK`HoSxH|2FmA=*?wvG+
zkFjc+4dtgr;{4QhAjBaj6l!16?!0xCtY-k81)tpaFr{iv(u>G0`Bc|lSwa{#qzv;d
zG=l!)W{hoDR95o`hok~|TBv?}o9ucZ{podBe;!|#<hh6_)~9mW40d7=d@(xtI6T_=
zB*;&@cFQB+$r4Y?oiU!GA^I(K>TKz-0$4x{Zbgk6V2l8!#e>3M+h5#OL4owICl$2y
zbMgM0&qn!11pYnxAMBcp`W{8o2e&_`o)Gv}N5VgF59p3st6GTPW_h&NZ^QyjC%>(@
za3@NTpJm*JTCzZJ@pOgz06WN#N}3in>(Iqo_z_ys52E3X%NiQNi3$}qX+XylO+oS5
zxJCl`1!^Mt3-8G_kj_1G?tVzyn#H*nCVeCjT)lMfo`8G|0WeICr<UfPCQjYQc2)Le
zrBe8nA1%u6vTS#{n5!!e6{gVWX03li%9##U9$K=HYt8zSupq?AJN&Hu)w?zFh?(Ix
z*%%4$Rd1^<L0pw^`>g@Z6p}G4oHTj+dKR&+qI|szFhF!EF_Lq2#d~l0vyH(i4^)OU
zkYuqN6~KNX!|e=6&e6!pkW7*(q)?I^69G;?$iiFY7G&0utjB+^F6Jpf+)E)>u_G_Y
zgTys39pcz9O*Dur0EG<`3x+Ey8e1=&<~$%5bSgA2cYEUAhUDxfb}A~$N*}|^#M64k
z0EFMHxZU_TP6Y%=5RYJCkxPt(W537^&bZ%0VBBY_l%OJzJGGRgadnufRpL`*w)kig
zke)vhmdFGk#V1lOMSynmT$?_{tn+usj>i`1q1Nbo>N0#|x36E~BG-mfyG}Wam7&%U
z7(Huq8<6GR+ZR+L(KpSK#%*Px4(&k)qc$^!!phAJ*>^7GpK~2s`%Z@36{DRxp}#};
z#1boH&#aGlg>v~vlU{k5n++oC1{Z0dZgBYiwWidsb-6z{zb$cr8_>lm?RJlW45H2T
z?z(*jX9B+(kUsA31+ITf(SJ9TtqiiowX<|3<Yj>ylZrF~_1dp4HDDltzy9sO;C8`X
z&mRl!N^_{p)RNJ<eRLKI0@hzc$FO(*v$gp=5Kwa~ue<l^`^KxnnXAuz^ViKDvh1{}
zzLH5fglDwm_k*I+`HmPkZ_3yn9Wa-bR>*d3+L6XI{SE=qEm>}Hw0((n+;WAl=WwCz
zVt)nVWx}De<8eTBr2S0+GVMf)svU^u!^d7LDqF*!VKK-#J&6Z@>r5uW>xP7CDb@Vr
zQ~Ka&H!hMiT>~AUq%2NdLfya8LtZ}2Tlg%@f5i?e`ZlKtC}_pkoT7qj_7sl5T~
zW5OWmw7_HaH0eCOZ0PqB<n$_!6Gun=vPY?8mMj(bIlTvy=GK!?imuh@Rx&`3d~|di
zC+aEcNa4PpKgsf(@16<ko>|;~?+TTdjytOt-Wrp%XISv@CAKyx^fWpRfy|vx>Z&b!
znsAmhjV;vm`4bJ`pRGsI5YR&-&cltfgC>Ymwv4hQVutI_WO!Z0oKc5Dl*j7RGSEG<
zOIz#n-2E@01*8_1DrUD5O_iqR<0M~%-B-P|Jfq-z1M{Hz$6jiP{eJ6@!#8iUZSu02
z1Axuh1}Ay#-*S3?CCw^dI!g<i#z-qc$5GMa-%~=pSfi_zB1;q6C9nifX}Udnb5bps
z__>dvd9xW5nmIXD-CDsJkG+a^U&WT4SkDmei%a(08>Dn4QdkI!Mmpl9N_bE%>OV|7
z&SM_)AyYORf<b&I>UYC)a|WZVDX_C8$;mH4Mfr_S<>==}!zAH&=dZT3k(I(IN}ilz
zWZU$CrVEQE9rf?yO6pGfr;ENVl&Jv2faQII4L6=9Ybm{VE$pWAt#tv1w3-g|3a(S-
zR|-Yob>FRGk5)#%QlD_-!9XgLd0V*Lx;yG5k&*PjN40S;PBK)K?e){^xV@fB0jP89
zxs3O!n^);C{iZ|89U?f-VD5dE9YcVZ9JC&?-z0_i&uVlQES_#0vcaHpSsEz6d1gWX
zK5)b=KQr^&Az5}#&ZD>vFpL5i4SrUB<M#`Jc=F`58tR)CM>S9P>gc$YV%I|4|E$~C
zePr(+4$Nb@4==_2NWEL`Jf85ruH8<GfUc5IUI-N`;cahyQR6yI*kHOXwA$xD`{sIn
z=mI;PB4)N^t$J~`GQ4lK3Lik5wZ3wgm+YrHI$lebilZFSh3mvz@8j^d<meFyp@^Sb
zWNb}04h^2({djrm8U*++0e3@DmLR@UJL$a-qUi>#{sPyZT};3xMb>^o1<}`YffB}o
zjbP>ndN@(R)HWkU<@V19jJQr0>aWm9uh2`cOn(e{PApqE<J;DVi!zS(6$lArjfS^q
zVC6#`v>>i|=-#@73k8WqLG~SIb${Z7P&9MLXMFh<`jwkkE`9M@TU>hWRS%yWpM!D=
zsw%T&p{5ARw;Yu_^sH4?i5;2aiQd$pZ1AmgLX{ho5cUi}!KtHA)PmMG#cxQa())J9
z-Dp8_pdL9ZfJRakg%X*wnnjAkH9HTfHx^HSM16;(G_*5#l`OfS+j-^67cZt15NHhu
zf5b31-q6^A#ur(?mhHm5peWIT(Zim{NAqn4snm;L^HQbCI6)F7D1|$hYkZQ<8ZpBk
zbqGtOX-m^99~zLHJ&7R2y(X_#a3A_nZ}@*4`ToUDb_;TwimtB5p${1CKeA6dp1QIn
zPkggw#G=vrw_Z?g&C$2hgDleb<JLk>evv!@sE^oI9z)BC;sMdo%3AkRu12OSUT^GW
z-0JEwGAIdyU}g?{*DN)AZ1qW8hf*#ZVJc$Lu|p@A047ie@zdi444B|`5<(5*h#BQ!
zH){u*9yCdYh7I&GAf=1eZ$nl<Ia#e6mBu1ko{&HrNtmD9hHF%+w1NRC{1E<BnD%6u
zm|Cv^rR31hqsQPE=$&$3t33iSo?D$eP<6uzP#WdMh`9fThxDtN`*xo^CNVoJ6VumY
zVH({|CiFvAsDdsZ={I46E8XA~tk|d(X^6Wl%0v+Od<^TEHY*`G#$hk$ow3sVUKfID
z)kY@tXJ2~fp*n21^ZEuYGUQ(M#&=+gqc@Ad$~uxANNk*%`ZTuMb?Eo5=mr#q;(AZv
z9)q`%>Pbyu)Isx3sjaO&t~<478TgH_e$={=nijgK#xkHF(|bFJUuI4>LhdgX$~up|
z>jV7q6gOV!uJiU7Y^8>9O>mWcCVIp-j4watve{wF-X3RO8zZb@RhNQT6uLoZd3xds
zm15y{_8~F#JEX^;1jd;{k(GcS@5|Vjw+;diO!J3(Lm#t>QL=kNt>W%I5|SuEa%fNH
zXU4sPQO#dcA^3pMP;!zui1qz#wm7le*Vf!t0TWr#(YV6|{qr`9PI;vw4+$%|eQny~
zORB6c1*RJYG>6SKI0`%{tmh<F44=D3F8=#?n^_2rdIbYY8F`_v$PjKlr;-wPUxu36
zYl~Bz!@1Ou{zIEZG~@$<1)Mpz*_iCpLeJN1d?cZ&>YACn{v#&uO?xzQ^mRG52LUm$
zs`Guy{ndu&tM4*DRl2`46U8l9*H_{8TZpcxr5mU}p<MXViHYfYuQ+5f_(a!A!jQ9D
zrSUkRTV_aZriy53xy$w$8YiLESK{OLC2YsJ!>1!xY1l!7L7QnIwy1tDbIw|rOKxj8
z@k8e6{djN3_AMvZ7Ulb)+O5G^vNWSD!2Ll*)2Q$U`?JH)bVD#37uxu=KBGH6>rORO
zJqmPqzKxn}aUd*j){=s5Aqg-Sq>*XZ7o=|9m#+O{wv^CkiGh?&BNMCGml3m4r37+v
zgIJ3EH~J45U<wcrfVC-kh((yIKDHY2Jc{g#eix;1T2WdWpTjHt&Ui)bnDBQSPBoCk
za3Q}S<ms>42A_r}p`09w&}hStl66>OpmD%ibt6<TFKz?NL5}I4Zm3yX6qt4>)3Gve
zLtxnAruR#zc%I4St)O^gF@lmghrqzCsQ~^8z(&7;5zruf)bC$dVpE^7B21soX(10<
zKwDFMBR|Ss!sIvltC%EHrbNWV(~na^=`=rW`$ksxg&|~lNAVz&24#%n7I0Y^y=}(Y
zWZ3>|!SBg_3eMVnf%Ohe8q#k+fbj@tIXy*l!ysjrsX)KjhWiYW!pO3LaXd|P#Vfj<
zHxM8zEmd0Hc3NhJTOQ*7(b*EQ?gis7z%OTHw1V?{^P-wq@DXiXI-PQISiJ-;3-%Kg
zuomwc7&Qi6JMLb>Jsw1gi=ow2YEMLz+4jlmc;p-!^Y}f+EvvShu*i<gkO;_I?bdmW
zE%E)O`5hB7b|1u16O(8P{?~z-M=KcQs4@}~5->JVDHojECtEG$1W0;?6EQu5ifbHd
zqf(g9xiQ0X{fd{Ztu5#~p^uOW8<h1L7bpi*JZ$`8|1A&wy9z;G6v0nyjq)MGSUrK|
z_ZVJN4@|-yXhSpNYJ&(ta-D{|P<*X5YQm<UWBSA^k>>N0x5i}G5b>V=T>Dv4J+l|j
zKv6c78RoodWi3qB#+iq4rl$w$-fHa6kW2?)EkH3m-$`uhf*J-@fI35iDnh6>=br0K
zXs4EJPi6%9v9y;(2kJG1Y*NaY4H*0_r*__u=sSTw^ca6!ZvlI%F5#mqRq7C5m9PT~
zA}^E<yTldQrb8xk0M!9%m+<aj)no7gwWsJpR4pspwo1&F_9NUk2e!U1Bf}=zkQbb`
z%3Vhj3S-Vki0IXilTAAOUW73|hBpfw7P5K|vKpK~o520J;jw*Eio|8yV+wY=&-=d%
z^&+MH-ui33Sqgan=Bjz}?VDD7I!nEf(sm#sg{hWEQWoVUON8HV=wk~<PAAYLXfa{C
zq2H}JWC0)twoyGHv@2eE#WB?ZVpb1BcNFcep0(2W&899Os-(-WH|F=8%`=Yv`|571
zDX%u=q0sJ&d2QHJ9oL7EH9{CAJ9(gikJp557vfPJdXE_Eh!}q#xp6-+p~Jf&)(fvb
z=#E$Y`<0upL;Z{=d_p=ftou6WvDw(A7+xB$7aZXEH6Pqp2?vcej_j5nJYe3|*tn3#
zPf~M4>Hc5x)q}iSnZn7b#tmxWe+0`&NxkSTvB-Ox``WmCC`~#Giu1GH{}!9VUHS(v
za$n~-s#88%uTWty6df)r$be;FNjY*~=*PE#^F~qS$I;C-9wo7HXL(;!Tia4u8kpz{
zO+6fLzE_o?w+@pF<Pd{ioUioD!)(-aa-gUH_BDqG<8W-DA5idP$MIr8@E-p#6A2L!
zykMx!&C3(dN5ICX$*M$$w{xY9k*xVq#)gddO&<#}P^ZdB_E18a@K07}!s7AQ$$H+u
zYQQ!j=qf)<{7HKL3XW<D$R^m?o#wie1M6z>z-?^pyG)^&*Y3|ioeyG?4hS1A>lx?}
zw-5Wl+wjw3L_z2ijDhAL%>?l$?Ci1asG=uCf;I^{iwDO<8kb<>2smsFen);?e#hR|
z0=M+&8gy=Ulu*Gvt{QBrE1$iZ*oDZHaF%3|=h9W5I+?W5&h`l6o;(;IZI^NP`_MqH
zZwfl+{v+WkOjbmJ`)%QA^BHJmKvpZUv1#c)_NarKR(eHj@r^%5!(;O)*NUR;mo%pb
zAfhR5QtPpH*Gf6@Wt#cR%6_B$>q2}YUR0GOIuYw$dG=E6G^P54kK^VoRDi9>Lj9?K
z$_r9=(nGQ{N5fOqZ)|c$tateO6m(M%>t%=*c`Ac??t?z7!iAsb6@Lp}cWy8GfETVw
z5Zw;y1_xE$Vd}XzzRx>eDIK}FtE!Gb2Zi!-EDd@WF7+1dJFy*le!MJCpT46+xqIon
zy9Y`U#@`SHg{7=^GwR_2C8RaPGq7_x&QE7-|Ce=ofBrc}v2~P106jku_H<?RkMb%|
zahB0*Sq}XUc}RJpaMAe0#KeMCXmBp7u?mxi#XT{H=@+58LHw_f;)IuUJPd!u>5^2e
zoSb4&k-`sQ>cLNMSl3m7zJD(1jKmN`K=BDOqNlI4L-BZ@EAXsBC2wKD2ojl#G=Qc6
z$Ia1*q@<<kZV5pJuZDeoR@VI?hpQlW|M>xau6&4|qq@&X*WLmUm>6K}loV)LA}cQ+
z?&|RT{iM9CEcr98sT`>bX70=0H}D~lgW0|n_V>;Jyx&9UP7|Nqy=;f*>uVl)gKTF2
zM}+l0P_MzX2I2#*mzWC3m_n1U>!yeu?doe_(Rs-JA~N}NZOd0e-Abh51sZYW^bw+8
zsa%AvFVJ)uI<|@RI_L~cTUCO*rFR~>bwJY#=nG-*;8p);_mGm3GPNhOLl0CS)p{&G
znn!M@dwY<1fUXqPX5uXT?}^5s0YHb|I5mpGCy_rV$A>)zviXS3lxFB)*r9jU!ee!;
zu8S$KUgHn$N7(8Yw|fk1tgL>n?m}ZSv(I72Z3qzAIFV}5pHplXNJFh>oglrzB=z^w
z2X%AH12qYJ8>QssXSbak<D8tFU_S%l&PRk3JW`7AHfRsP8x4Ai;cy3SXvqwnfCR~*
zv4~!=<&bq-`R`!S@HXDTU*9M;6rj;Yn+{~rb>8gSqC^cXLtp~GM;;F@bJcehI)zhU
z-_a1h$Ot<m*}|#*>WcQzw-x8{aHRp2+<saT_<Olim}Ec1b#3c)urK|BN#REOt#Y+L
ziWj854H0CT^42zzVGIY!KOtWTqW`k!iC*2=pLydwqP9TwYumIeUx_<8_0zI6zPne0
zS_*&S-5fL^f4I`H9^m7T$cg-nd+$Tt)Y3XSpG*<C^|#fl{5)1Zn+MY*)A7${GaY|@
zXJ{`{FcdL{X)Fg$EHR%rCosN}k>>ruqnf+$H7jlUq4r$shs<;Qqme}?W_%||4mY6L
zB$3wNGZ{Y`SIz;~(2o_}Yr$fl(m&-x`ZM()dbzZ9flgc1aypUWTDtbmD^ec&8#Cyi
z2q;@V<i4rDBUpF|Mm|ln=ql<3-hW^4^c`)*MKmWAi-x&?0@5hm>7<vHD;aAocg;xy
zBD2fO%hAcoB5onjUTa$Ny-=_JPO2`KvBNC!o3%rMZYF-h^J8kpY=mYd?wzrpMRY^b
zPR6l%q3l|5JfweyFY>%-?p0uVU-m{BT9)0kj%tv8p;AvT*?kFRxAS>xPZ~PjWo_G+
z`#gm8e7EE+Zs_1@T<3gkd`HaYK9{gWiesjc;sv@c#~V-eM;LNNCm(ZSjWmUZhGHRn
zX?DM{;-Txz&uax{WkXqL&hg7eX&#k&T;#D4KR14k?QSQF{IxbKg(<;6AI<LWfU<?V
zic!8kK6|W81sDB8K9IK;=_*d3RpsH|a}oVSg{J*u!2Xs-Z8h$lllD9EgqokXMlt<U
zG+dYzXy7TK!c+P>cTN?dw42uF>PG1{YZda4YUF5sGMt6u3g-*GeVCk`oo<*V>yH^o
zZ6?<S9iz7XI70jSwj+@-YgR%ra;y^9(d1j@7--Tsd{Mfi!5;<5O&(XnB172vzt>fS
zfm>mpeqPGTrqQM&K>xXOv+>#$uL|6=_k_FxhuMTl_c4U8rro)>jOv2<K&)anE??|q
zj{uLy!cvr!eTOO&M#&!*!1(PAT+l`t#w<^?POb}*Pi#9iju#Y{LP^uNqT&92D2TxZ
zhYbUeO1Ijf!_?%u16-@>VpZ_`T(&Pl6VO58aeuCIjat5`)h`?N<#h+byLaJ}bw3fG
z)X#ZVv6)}Le}5xr*I`Lh7gtgbBOi2QB9O&N8#Ko{!ka+L`$IPuLiDTw^XOaX1F&hH
zUq3g(n76b=|F%6Y7#2<_5B#WWhw<xykTlF5y7?!Z54(B&)w|ZovPPj9T#gn85_g!)
z#1LJan>r)cQtCNPAtb$y=PjvL&lkw7e3Z%DSSB9Iaf$BQ1e)0!Pm(0|nv;XmU1%Bj
zBKllf=z%LK4eO>&J1!-VSuf|yn}@$fM^}!xTM&7waq;m@x<0u5-Q5M2ll((m#RE&w
z&&v!o;%*b}@I_Qk6dXjTZ@~FpSinNkqohZ`PmWb6Ce@N?sTVlTMjviJ=4#&_MPFuW
zK2XF*q=2rWYcgD6siE<;Ea%Oe3zi=wJy_mUcq%6)omb33RqMUYvM9wUIg40(IXQ5}
z<U$pG=dbeozklo=ZxXwc-YR$;nU&4X!V)G*(IflX8%9FGELt@m6bW9^Ti1LWD|Y^%
zfl<Ds!!tHEZmc=ybWuDOSj~-N3}Yvhg~CL80n^lvZ7`sI$|EB18Q*q#ES9r#<HGd)
zGtu5Kx&>+zt_r9nptNu3a>>7I_{ym@%F_9x(KbgZS&}NcUG7lMqllT6^=;e*Gq@Al
zrcI0dzdxJA-Z4~;t^$tvF&mq-dL7so$C?5izsTLYcTF#uT-nI<y_~Ck*DTcBv9|T+
zgV1aArpCt53#;t-C&|#@paJm^nxrGE*rclje2P!C_os!TU`tfOHJM)4h1I!jC(}v9
z4h^-(KP`aN?lr7{=6HOG-V$I^>S!tYi|7vULY$ZgcqIa8eBt6GXWd9B^@WULDMExv
zZ{3prqd0-;q^+I0r%Ix6khkds9&#+-1C#ap=?|*C=YJm>3YEJ7y>*Q&4a~mn?v?~7
zL=g`9!~v4@MfTkHZ8|op_|X{C(6`%$J)u2ZS5ZY%ON%VJJ*{`%6W^bvlb@g8s!|0e
zSn-mTDRHzHp*JSg<YW^DG)s;v53q{K;5>|O7ZVi~4AU{@hE8E;6ZO#IQmC{@vu<cp
z;Y`r}_QVh3zmXm>S4D#1Yxn>g8#_uD<bgx%M{96w^u~f?1r$r%XtF#>3g}O-9XRnR
z+lU<Nw(=*@Ui8ZsqjaSlbF;H6B5DN_S`57X{I0K;ICy|zZQ3}p7!nJGL8NP1a6znh
zZ=AUWjjQxrE(Z<AXJ$<4+jFlqO|GB$P-wu9C_5PDCDV-6gBckMN{UgBVtz<3D|KP-
zk_7RaZdV2t+OT&Jq<+^m;!ti|yhIBPeq|zQwdn|hV$%(8=&%w_d%5y$MlGO#OM&}x
zmdNN$Hypf}HPfRKSVZ?ANDjjiRsL41AdGzqWM<BUv20<VL@8eJ(I3kd$Y$_Kd|j&}
z%4{m%j+_!oM-Wup{cB~z*Ie6aZfc7zRi`ahrBKOwl}*7TW>=|&DSe{7@%h5h7?)hz
zvwfF?7pu%-^{8-^(r-X#v=pi%dhO2+m%Nr+FCx#3Ux>TajxT#a`|^k!2W@~oYe>OS
z5c}^rDn#g4<Q>_acb2|>^;qtPZW?GLCP>{4bn|hdWm8A4*INY-HKB_!aqwttdZo*A
z@5m=?Z2dz+#zV_4hR~q^*>7-WNsVVa;}^DNo7C~=NAKsRB~4*t3qZx)f-d<?==zUv
zbq!-Yl?sv)8NdELGfJLMe-VrA%5WtXW?(v60DmfsBV}C;Rv7zEpj=!3CEoT+yHXvx
z1afk0=mP=afoEcg587F7wi^=glivCxl4aD_pQe{pz@bkX+G6Adt|mgQySGFK#^Ukw
znwXf-eN1~OYcW|ObNR271cl~0zm^TBSg!#;cD7z=X%Mx)7C{S3Uus*CiDJ}V=9j$1
zvcfyb*^*aV;pJHW{$2Hg%AZUKYxQy^(Hopy!SJd3K}`3+bG$&<HZp~^k&CIq&>++J
zSA0@YjG1L^lJmaBL}K6m-JS%VQv3JzkDSg7+)K)12YB2KJ3Dqt6ePt@V_+{V<Xj;R
z;?HWlQ<@T!eh2-EX^?&q-J=$NiC5C^!|(%|4ZMK&3Kd+-I!V5$VrO7(KAW+fYwo(Z
zG2IUD>UQt9F9di*tYCrZ(plhm%(4A0yVGC!`9pbb8=mCZC?9qoi>Gu#SQiyJ;Z>mO
zzo>KGHFNv(#eTI6cWa9Z9ks9MGuS-s+~oRG+?A5xbi;9r-*yi(@7vuDGRn?oY4NNl
zt%0STZYcfbyi6AQPduy_44h7TY+m~cI%F!BnH0Ky5YKoJ`VO%esT(ucBApdc(Ey%=
z4sY^7;JTVY3e2Q=z%G54AZT0GAMmIj@8IBICO>10Xm++zXn8vOvrUjx?k@)2eKPo3
zQm&6xjPRkTT%Qg5NPj<tomY}SLY8KF+lkr)#-Yi{!E^~psTSd-)+8!n_JkQRLU7Xw
z;_`>|pDAEN@GMTk*Jm^~H7!Mr{N4|P1TcJY`H~S@lCZu#KQ^co49H30$jLo$l%0$I
z&Du_o>iO{$jU@)%2mNpK=ka~AW7>*=JqyT2Bz`ETbzFm6yhB9v2?4DxR$vBu%&~vQ
za8J+u{;W55E02Brx!<-&Gd>@p{HjWG=tGu=n=Wk*uZl6z#QfpuR}Wp!y5S1SJ^Saf
zBu2%Qv@(}JACa78_*8^H4_IuU-fj^L{AVr5|4Y_%EBzI2p8s>-joiO+{rTs!AE0ew
zLeEDS6I@|<8sTL@Vuo6A5#WZ0CrrpZ`x^c~zhq>&+WwvAqXRDt2$)zL9aRfyeRX=H
zU^rd~`;z0%(g55qy<-LLTGRc!KWLqvYA(;2nEvN@f3AP4{0<Cyvq(vp?`Tg?{jT%g
z+PLmuJBWWo+ECQp4Gg|bu7B#CH(*^@cOcMi(~%xV&UsFu+RB9i@(h6X<m84fz*5SK
z63^I2JQN}%B*c<;n;HefJ7vh#9(sw3^$4D-)C_)71gk%{FZ@>fMtApnx^|Kf0jUUZ
z?3P|ImzJ47R4}9WXj7B|z#8liO_B4jm>&TtSGxk=ou{w0A2P4U+s=M?61@z|B(E0z
zopEzcU`0w&zQYR5Ty!7y(HCNxkPm5*)E8g7g`7PshKK#O6$hu+w8>k9q4{Jc1{n;H
zN^k4>R`eOVPzF#?P>A;8=+}ml!!V|ydF7~wLRjuOhOuyOwlL6AJpR$x=-G_ORHM@5
z<olvJ(hup{Cw3E<q1{fXK!B_qOrGYzjbK+Mou|jtp@}S_3s}&EqIwAI$+5};lyp`j
z)QsLs;-J2{41VP@z^?PJuSE@lfw~LeHu_tF?wYplKvaT`iTNB;A&lIq;(0_XcgA<c
zg>{}TFDOE2WXh!VA{UBHA7ImsrC3FO5fa{eqeTJ8<SSlPd7Hl70jr#m!rm|MLlgSN
zbq8pWDLd$zjR6BER%;LQHf77rMi!2`i$3eH2+0+4aBsm>Zx}p-6NVgA!w<s*Y{`YR
z0%OvJ#>pf23+*w~VZv;>VO5@typO<&YA<UelaG&&fpK>BLY3O`Zs|+hz{OHyMD0##
z{xb;zOK-|+|J@bOK98|t%wUCOs<jny-I)5CfoZ8yeBPp#ZXR8GAoI!nz8xoyj|y}U
z@xlPm?CdguPCnVn85iN2Kcn>REa<O3GhW#Rzmq1hO+Jk)nyRa)r3Ah7orfez>D!Hr
zjkN<|G!2eWD#0J=9T?(7E;P|~fd=;+9shl>NiIpHpBrGm+IG6}@xeV&(cThCDqd}7
zDb>5`t*jlgG|-#3_+vTFMDHRBOoZ)U<`KPy$^zqDyP=_<vKMs8V?y_`k5rJfzWDRn
z4Q~Km|ATKGTt1Y44r6YK?>*d1{+byUP4U35bu!%$W+xC#hQ!CE3teLG;Wx-W#p-#d
z(>dFl9NP;MqmYKoDlRIzhD$D#BCJd}ACT^f^_&N0bScnL2w|D>65ae9QS%^3lKn3*
z|7#&(@GKw7;G}&sdBVCaP8KRDCl~e!TIL9nH~BVYiSPoFaVqx~kB2RDX7GZz6i&+r
z>np^;Gl)VuTEd{8yue5PzIdlA)k&?9Nw}`6h%PR@GAW<u@vFaq>iKEtu9rl~vB=5Y
z(T1+wB*$#%4+zlxXGP@nSm)naGhkbuz<dHcr3dLqlh_jcVi;Sf@Of}mpbz#Sm<d7l
zV)EAjg(q13Pr=fvPM!^M^iV;a7me=J{$$9Rv#IptSl558y|nxril3j;<KdhBBH<Qf
zXca~d_!@X^4JL!QOo*`-UcQic*Iq>TIwi|-YebeNQal5nbw-*B$G>$lR1ii@zz}29
zj)6<K$l(f;>#%ur!1QmTkHJN)+{yS_q`mVcd5h5a;MO&mt+$Mwrk-&Y*->?MQm3Ps
zAh$`Jt-qFnB9ej^-P5O|*g0***Xh9=^@Fy8HN5Sec>;D+x1iQh?o(3<YUhv{KjoZ)
zMNBr?X7$T`Kk!uYsQd{sU?hh~G+leyJO6p7snLf*MW`g%ME{SI)<8}AS9E)RK3T>s
zdRiF>%&bWi*y3_`7B5P?gt12i1Ygsx%2N>_JWu&h9g2`q{`pq~W=T|JR=(@9ai81u
zBolBBNPXV!C6x=7sO3B_&gH<crEoD}{S`6(uk$b>cW&aY-@kuXTF=hey1KebNOXO_
zx0aAFk{B4ATVqvN&45Q&*XWBmv||Yy1gHv}j=$69Q~6JzFBipDvb$Js?#wdAXDu%9
zUgUWTs~|%E#p#PppMw?Eo2?j?6NLW)47z7<nbGu)6Zb(OvnE_}^Df)g&d%$@$DX)T
zKZd@3MKO2Vo4~Kz1ON8NjVA`lWccH6sUJ<?OZ|7NpE&}X=FF^L?^NX*Zv5$a{`^b+
zVQk)Etl9EhY;n<qP)Bg^*kBKz$m-G(9=@15VfBh}?84$~;_zUPt+4>U`}0181nmzM
zVulJ-l%A>j)O)Pv70yRfLD9NsKWchsO&5nb-8Q^tZX#U8o~T7NFG(GlnT?vY&u*o?
zIBDzX7&1g9*^9|(thjVe8R0hBha^t%RhF)LEPu^7I50OG6&G10;ihItLEq#?pZUB5
zN4898^Ado|gij-rlsC-Ig?7kM(cpiuM7OtLoR;>;*78i^on3bpx_;{p#et#cwSU$f
z`o+dhegq}Dv#?XJ+3%Oxex4uTc3h*Xsj)veqD<B9Qh(YgpSif8^2GH^UXJy4`i^aH
zIYMj<FZ$wBI0js&alQ)jn{KOlZRNrK9p!Mf)y_xc&$q?h=2zh1HSc)AtBWBfp1tsq
z!=YsS_x-UljM|1lT6{y^Zum|zPu}RS;5d{3AAr;@E|vx3zmwq{16kO+;)(b}W*Lk(
zM+XMPrnliYsLNgEV?Q33E-HU^HUBvTC*&%vwH51ftG#Gi=ChC3TFOZB7x{Pd*_V~t
zX<i(Uj%+*Ia}p?6L+sL38n9g{tGm;%dq^^DGP9e={(`u94-Jf{kLh%({;F+LxH++E
z@239aAL`5aI3%xbZgRViqo>Wq<aIuybv~u6YrDIxs?b}UIAf85ua#?dbMr~H>)gtE
zIOfj}=1qKQ^mMe~H&fs)jZAaLzU27q>kG8*zmXu}P(t(xtt;1Lxr%k2IA?VEW%N0W
zpNgsz#xE>(ojQjccU&L-%QT^&-2I{a+R*63X?PE_n{;pV6?xOaZ)5P7)EbZAQ!@j`
zi3}we4VSB7LEfURk?kb~EVa5sC1MA{+0UPU@9S-++YP-}hud~_F<jd*RxM7Q{Ka@x
zLl+27vWaj0FdXRahG|t(d9F4XIK0E^q<qz}hQm;}mrOg?8)lFHu6mD~@C4P}zK5TX
zWmG!JIIGIf%Bt6$;(UDKt4k@q&Aa@ij5BY3z#W!nRi3!)d;>O;8xSSo_<mK6qH^ig
zqYv<13DX(}ihnpWi4+|C)BG{@I(vNDL3I90tWxFIMMbW(`zjH}y6fSX%YWc-umLkW
zdY^pA+=b0SjpSZ#ZuTZ6HOJMjkZ>zbnkV5Cu;Bc?!mrnJb*FVnB{)767E1Wa9qmnq
zI6FJTl&)iQ>W`zNnsIea$VKd<?YmIL4ok;vb<2^%0VDpAp<;4f115~|GGY7jFh9b0
zW~g7P@kqw?m~X*oHG%82ZSFT19OW2$?1()|>d=dM@YdcFsK?;MUfwv6OEf;+`>7FH
z<ZNdX|IsJraVz|!f8O833JQw20X;1v$Y%tCiDFjYbDivID=9fWRC3+T%c?z5eqQyl
z&LSO5BM$=%;{c3t53RrU5-WeNxq#6@PE{VlG&P8G&d$!z!p??h+qkbKrX?y$;?WwW
zBS}a`)67*aLIttf_6OTmf7VlJMqnARRlB~c5*{9|9vQglIWu~=SI_F4u(@>k7GjJ?
zB^e!;ajvzc1y8B_3DI+#!%_yPHE%*oeK#wyB^j>5&30SW{KGQay)s9xg~jq(E1siA
z9-j|X4#6Q!{f$*nvR~gcbJfk!JV{^O?h~d*`m`0}s$(TY@6hwTI}+dp?GHE-^Gv|F
z31{kxi-+ul7QWc$!cl^vrx$C}7hxt-W_?QT{J_}SPO`Ipr&JVI?Vcp9=XT_!T2ta*
zEb#D+olZ<P{z&PMl9-HdISw}D6026+Ef7UM_`bNoxLU*$YugW6cB?-sGs<0+y2m)(
ztUAAMv^?5*Lb-CP^C8xa57u#qqT*n~)sDLsvwz({(qNLgVhoIS?YtzCak_fCch8;9
zO}g=DG2J{M@hK%qj;5R40&jg*yUP%}i8|xcFom0&&l6wyE?Ca|m}hh!81;cL<79TU
zCZmfZq8tn<)B2KPP1Xor!PTIAXZwyPS|8~3HggqqZlG*7_bqM@a=lO)Z=)$p%13(5
zW680muHn7PW#ybFrCsu(`SSyec8;G_$0m;0O_TTauVDT2R&qC7{}!&Fn|6{@vbiE(
z4Do5Rz-*n&vV5}2KD*j_&4I97!bZ*J*on?jw5|oI<X7vlJ$8nGYnJr-?`VBJYvbiK
z|Lw+-%-u?_rMT{#PUnxE=#{E_TW!bBF800l+vyo|b3Qn}7ghg;=YoM-!3&l4I}_iN
zlNj~o6#i|4vgm<Le|7ig*YkgaLnbWkW~YU;uKv{nX*fEi>A|sk({Hn>81DVa!Kmo(
z<AT|Du2|6Co>0QLg%Ei8LLK8jJRy4p+`#9-{w}SQXv!6Y`}m88gjr+T4}Z*TG$rpR
z^S(2F#-noYI;!il|FDJRo&N(ZN}?*;U_5({QQc)pajv7aSHqYn9uMFC7n=U@`wC&r
zcTmS*E;Zw;e?CJw_Pe7iD>WsPQBhHVGls8FDT9YWWt@XvD14)WKL6{Nd5n>Xjj!0f
z!^43L5EvJ@I5|68+R{cYnyv7t-66V0S3~a@X`^}jzod<YwLA&1Faev~{oDsVAt&Sa
zPTr{HtJNH=jW^1<zR}Qqwz-qA>f&2>QZsf~gHUtMZ8#ylvilNXNc?>wAahAb(h(gU
z9qtDi?*7kV9D~Rl(D@bdk>7)Ib+755=2Z8yG?jYFNvqucg$8t1q;G%G#)M1N_=x=&
zu5x?Ry*3rQjT4DeIhFmBwYqmA8DpoEA+5y=_U;)iW0HLMx1)kLxxbGcuh{P|j-4Kl
z;mcQWALUzzhfjRB9uU%;_p<0`W&Lb)N<4gD0R!oH>pf3~E3LPxDk~My7nb%I!I;u2
zSRgxc{^c?4(a(ba<+Rtm26ribb#L`_Ys_H&MvU87PCnfnEBM=!UkCk2<n|}$iN3D+
z5%-(NoQJSYGkP^<T^{(zo#=MzyO|(ZHcKDUI_bQ<$*41`+}$R=y*)NCP_y&B8P+}R
z-hvnj2YVl4?C4P{dT<&IX<o=BOI7B338$T4*E258uU@DV{dY-J#7gZ+EUw+v$+gb%
zjfJl&>eiY@z*E2`0PE24=H{dsPt3odgRTLH2FrnAv&H`0NJ}!{sz!_TC6I4+bf8#&
zpWu4rP0L)AzFUZ$ogK(Ol%r!@CJ|}I*<$d9h~W=hZ>{`kw;fsU>Zx|<M{LQ)qr}Sf
z&Fnh#_KC{!+Ld9G(3RT&u?T~SUnZ<#zo}rjI8n9naX*|p&(yK4Oc`gZIT^AQ77avs
zl3Ys4%C`l}LC2jL_J~h~mgjL(-|FszOq7tUEBFN$_tcIXXKUAOhq(0*V+`zUj{1Z1
z&b(?mY$ken7(=f=6c^ds*=^$L3ACE3&x*jK=jub4A0Ei6pCU1(_m@_Q!KhI{o>lpN
z_Zs1!_AdbVKw{TgRw}cFs(c}ihJhYS<FN~ad$#Jm092cUU~_UBL9puU8ZvfX`Xk^r
zf|%m4Vz0)GeS&kuK>P!c=6+xC4sm8YIsI*Y!o)-VH&?Dpg&K>e14^x?!xzF)S6T~R
z`}(v|lo*EslLR0o2XP}HVwWCp?d5hhezar5)yJKkW<#TngXU-O+NCExa{BJ4AouR-
z7e^~U`ZPbb#Ko0A52@ahh{W(=aUVh0OzIBoEBj2`lCMQNz3nfxZ#_T0`seddL@;u7
z`wTk2kB+XRg6W$b>{QJUl}fiSBbNGf0Q(+ol?4O5<hGuitu(RInvHHwE4O#i?ELOF
z)6xaNRt-APXeW0D&@`+pE#;j7U+3k>06?{L3dts}(_bI$+X9qWJuM6VQJ$M>=h`mt
za-?6tbv1YN7lZgko7JsK!n#C^TDMgOc$S6!x#w`!_=7vZRHarn&9+Va&TdEC>TR2n
zrC-l&)&S*NzYceGv3$SNAf8ZRf8bId`;<zR8sRdtum%tWuF34F-Md&clXBQy6ML3F
z7E@&0!<d3cxSNmh<n%|9hU8wt&}n9V-Ts&_V5oO>-uhMS0Gl@?z%sg2h~3f`ZEG8r
z=Js0guTq-;UO&>Y`7yXlp{~?;Wb*@tD!Vb3{ejC@V@mrJk+>kM&b{p2$q=A4qU%#Z
zmWQPJ{-adruJLQpF<yfB>Ud6odm>+7PY-+|5^k-gj6B~=TG;Vk-R-&Z8^C)$65G&p
z?yLf^0<hPY$OhQMSO5X20s?^SHwnbV>HxG^bHr5T^;P9F9O%NYhV<LoO850Gh%FmC
z?TkHl?Nt17ASzF}+)$R8mm~8z3T)Lt`32F1zyx;YzTHtcRAJPz<rHnvZtoqqfl-U4
zZip$jN4_*K1JdC&U))`Zby>_T-V|uIpjd9i<ethu-A_ENEHAeg8AVgE{yyX-JQUz{
zZ+lzDf$II^lPf2$1DX3e0nVs6Pirk7;3+vx>c;F)<wdpYA=D}b-{6-^BDN2vlm#FV
zGd!Lb@p}A&@k%W~=2L3q1FW;Rucw)8rk>9D?ryy0bbWV){T=PZ0$THpgS$`6l9B|#
zOeJ4meB(hbG;dSoX&#`fXliQOQ_b>}=tDG;5O**z5R95es}+WvO>YV0;8~vHDR7)K
zF&@{DP*+3a4{QmW#6O`;2)*kM;gS$WjDs@!cZoRprCX+pnVKhn=Vkr)F+FB!Lt0*&
z8<qDZod;?+8+-w2!9#}OqGf&7eb+fj;;%mHL#QMA99OsYpV1l*d51s!@O$zDJmn!(
zcq=!Ziya=0NH5o*KkMC_2w>bmaCbWY*vG`P>_{9Pz2CAOlZRN{+D1mhksr$<@3D$A
z)m3E4#j&vqkf)kF?_;VnSH5cQ1~xtHQaKH=X?$cF%}i@acjCg&l@Z%p@=bln@cW6`
zFwYD1TSynKP}0)EFDC~~EyH~dOFv#Rya34PF|}Ag(>7p>L9L;AU6Z`UD571@_-JQP
zD7%!->Ty#VG6wA}c83K6uOF~!ff!Z8EY9tW@-|(OT*DM$cEYXs$s_BPUmXo)u3X9A
zu=+K7(v6O8qH_CA7gQKCw3I?@3#%M3TJL4HvaAn#CZZG2*SspZ%ce$rgl^6~(NcCE
zwx!Ll0pWt2wav$}n){@Ae5850sHn#-NLb^6t!<*Eb+Ie%RScxm=dv!H<#3ade19gF
zeiofZ+s~<UvKI>OU)+7(ij%E*GJ&}_6a~@IDWHFd!FNm-3wDVA$gFnT!Qx?!ymtCT
zUH_fuhBj*rY@D!|*^QmgeR`LEw_=0a^^z=4eElrDaej~s&sbJlPF``ZDgSb~-lb=W
z7{-%dNJqLpa5XM0>~{M4<rR8h&B}!Pb2@1d%>({?`D&^!&%E&A{^<NBY#d#|!M&`{
zI!9lIXj^jk(L7OCiJr<nyTcpaXY2oK7{HT7+gtW8+aYP!gelew%~D6Jfhi;KS+1aK
zhlx`zsJ*Pnz30*!k95PbeB)TWYnKhYA?KqUD@YC_J-B@!nT*MUu+2y0ELK%e=ObuI
zk_}^lNIO)9VWq<kba;?@kJ9Z?i@*9cWEfyuNIXV<VPWAP`*{s4zsq$_p@Nr_@e(+l
z`|$)UITj#~FggkwcGqC7+*B+KNrGgLKe<06y@P;*M!aS^23ny4OegDMZqj0|rTRdb
z2Q5?^#tTeWIhH{)T|`&=4gS6ubT~^}H!?X8!i8^s3pkaX*Pz;jxiGo89k9`m{WDV+
zYBVPdR8Be^rS@T<{utVNxN2&&560&Yz!?O{T1n0N?7!B?7)`#=v`3l4wHtS}1wXg;
zu%4At*3aZ_tKq@VKd_%cu#)-F9xN4$3sd}+lplV^US%V8<qY41aQNczX<LS&N=;V6
z!2n0+i<wrm=h62NooZ=^QYRV^GWy_g&IFs_9msf3`&8fop16*)1zHajl2(XHZ(p@^
zE)o6ATTB;VBs*VuQz$IHPsc}-%0H}q8uAL6FU3@ZZ9hZK8zy@a2Kn2n$==4p@P(oi
zhUmM5gu+wLB$%=My4fL}i3nvEA6fpV1pw|;C`_Roi0b%OZlN^kKxTFaNNa_@Q)q^%
zy4ICM$h`DV_OjXUFabS5zCK4>0Cc4De`K;Eiep6)wu2%dU`>-oHJ-@s2T)~dNknv1
zat(y199C2Ba`%%XjLisI&eQ+_t&_iGH4fS9Mxt~FE(-NEL4p<sUD^8L7*|Il=xxYO
z@Lndr&s#jge!z%>wvH>>`^z?Ahl`b!_i?ogHEQG)wA8TxdWRx9d{Js}!P$m{r_Y#L
zBy+xy{&T_r_>oU+oEg?6VW0hK_WBcG?f7QRmY*{{XE$5aVRAKm@1q(^OkC%D^vPV=
z_i3VU=_*S8<Kmqf#l|DqWj?@iunB!@-K(-cs4iXo>Y4YbqxMn4y@a-%id~sS<^6X;
zz#IY}gqnC&eY<jJo=<KHf?kR6EXkWA0DXXy!HsFZOrj3^JH)zG^8hU3E7^~hMkO}w
zfScgD(*=kPNR|)$V`afrd7Vq<*w6KKfooJVU<2F%Y!duQ=)f$MzH!fG33;L%n|YP=
zgHh$VG@9uQG+K1a1k4Gg*OJ}%AksnM;gu_o5%u8$r9~LIA2CiQi-??DHg{0#q>tuW
zEK4I3NE^Ig!X`|GBTNxKVXs}NK!uZ%O{*9M!iME63oJBm1ANGNsjYQ2D?L+1G!Lp}
zQ%r6~kcSI_$_;PTQj(8tSt@E8MimREnS@NWPFiOFl97vKo>@A&a6@|HaYi+alHikS
z@)v1jBGBe|Id7vRg;Oir%c_c?o+^phF?i0?UIb}TZP74tCEtut<`(;mMRsjb^iSkM
zaN+l=v$I7$v`D*UAC>dxhqi#>cV4xmr=^+;fXEPNmK^@aKgPC>>vkPDSc$)Vem1&Q
z!!;Cff9p3A#R4;$KX!WL{b=H6U*gbHpgLUGLV>{;Xt`s2pf(1}QE7ZM##(~$D0EqW
zLn4-Yb8T}5)5sZ7ULZpW6r<Yi<B@@Z5r7rI9CkgM5jF%QCULq6)br2XFX{THKt`4v
z13*Qrj&z78D9qJ2dPZ1v$eSD0RXQHR1pq__B2$afqw0uP%qBnA)x3Xo#5Cu2`bXAs
z^~DsXUj;tiJNJ{Q?dB3qr5cBPtKGOm2CPqwPrVjKmOfPkn=1<(kE#MUWb?kda4J+7
z`u(B3QknIth{DC$;)7g}hf#s^?n*lbgf4a;t01>^{JD7aJ3pjf2=WC}u71^a?99kL
zOJ=#eRFT5zWlhlL8OUyF>JsbyM4ur|-Ci`KEHgYGHms?NF#VER)i4ohBT~AHHYqv@
zq0!LLvioO-iC9{HvKGIK*dQCgt$#~M*~Ig`Xh}468NZ&nrKvwymPSsPFq-N8je{(P
z<<T7B`U<@22k#bLZXs|zJWtk>lYBjmEEQ?KBBM3rU(Zf}%_5<d7X50KdqdIsXnB%#
z_VP8|$_<7<{E-c1_-9WDC?NVO%?3xAQ~r$JSTTTqpGm~MyIlb!FJSb!@n<0mkDj=x
z=`i-q15KYitNQ(MVt%;r%~avh21VDY9g%9khLDC@d*zEaX8aS_rK;KNYKv+2GZv`V
zqfy!3<yb#%Dn%bb!(+Gwb`$~&C0o(hcoEubo8a@rd-;MB{fJlYs<38liFy+2qx#d9
zGgPqq(fq`t$*o_NxeOIsu@u>#QYkyKYD20Nf0r?Gq1MUHk={^?PaBbXYyq<u*7uHg
zY9|A}%xI~=pbN+@ryH_XM5R!H{ST9Nw2-^G*0%U~zD$;UO2j7CTST{DqNyx`%x<Cu
zzuftNw=3h#M@1k{*;IP1$rd#!$=!u&E}3-euNoaClOMGeN6V2f&&Sk|m9PUju320P
za&Zld#Rkt=iQgF{<JQKe_XKiv4vvDB+d6)&%{*#>;%7P<T;U)pUEAaG=18BPz>3Y=
z46=$@?hq5^c1H1Yu`5!)Oui*nn7UX+E>uPi24WPSWN5Y5ttp~q2@O)Y-WlJ1XRP?*
z1@}FiEy-I-{}`+6&=jgN-_plow9;*S+lrgHJk~MqCK-D!O@>%&&R?+0<}H&;nXfV<
z;0Nt3G6fj5=4e6aFU+wwF^eHF6^rj@Ow|?D;yLL>F9@D!8h0t{)rgltkM_zpy<|AA
z+5i8%DnE7?Xpt1&soJ-#&0gTe9F?Y=w<C?}qzlN@zE$hr__W&BwMks}e!fd!i*p#A
z>MHOV4M?)=3}?P()pcbe_>{m)b{jxT0=kWA#+^f06XOj{cZA78ujM}u;3W%Ww)9AM
z;K*BKzn281)_i24s0p_^YG0*38X(zCuaC`~_)$bhc>jB)H=q0zUuq98*^BTj0-xIf
z&9ZX1UBpo;)Cdl|fF3akax5Rsca9*)b5VFkFQJl6B@D7Hzfw9{A`C{>yR6OP^_I(!
zKJKgV<4w_sWJn0GUP%=Azur9lKfQ)z5P>#pN%WI0)(%k$?KqaVG|AO!=E}-<wLvR3
z6a`hS2UWU?F`_MOoeC7;%;;XPK@b<<pcA*?c%y#@WZ!=ksumn1(YgbQvv)%PpWMqw
z48ahP1GiMil4Bmyq-+{a8O-*K2xEqJE7DtQPBnnXV?_P^QbwQI<O2YJj5(p^f)Ka`
zYIIq?FG<CrsAYUe;Y5ozUJ!r6<`|WKN;XXbX~IjtL;C--HN_iQihUpUz2~;KRNVC%
zseOFI;~4-M3bbo9C@36?VdS_mH}nk*GWUA29Qj@+IhE{3N%P6AtHR(J6$bh5;1(cM
zf)I&pNI$Tn{FuylmLuEG&yURc4OXO8$L!*#iO*B2-JJC66rH=Nf8>6u)T=z)^fz&@
zEmrQ9D2-6Z<KSw-wX}s4EG#V>AnaveTCt-BRJ`@Mss0g~TNxc1sfo5bg23lwLB6iR
z)T0Oz@XBkRwYj-Lr@Ba|asG8iGp>CF(i!p-ej*p9|G>5kQOMU^6nV%np6Rg9e-h_S
zt?qPMX=!O{>MM><r$=3<M}NF9=eBz#xLrZyVpPXp;Avv`iDv_GdR%w9%f08t{kur_
zhS~LqOQ6@NSL}A*>Jg|mN4hEydRb6jK>5QQ6chxx&fjIsY-~P@XcBK91(AeM#&t}X
z@!X*ykoU0i@+8rSfh=J4QX4kre2)6@LY7R%*(pbchLo2P*IvIbo*O9`?=@p}-x6@&
z`d_J0G`ubDll72!TVrEd<f?ISnccuEkAV<P^!_CgV<61}hSbO~H=Di+BPmP>gXpj!
zcS<26BO_v40=X2(Z=UB)(J(rA8v}4vG_&&)xvEQ3171V4V_|kWGqcQBhKc(JIk7Ej
zfgC88^b$a1kDnX4<LN<a#djHqE(mBRe!?aToFFHpCUY%-0V?xsoh;lTzE#E@yd`%Z
z9**Yq)3L|@v49>wlg(q|TsGP$CwfNjoghrQ5(5%{=u!gN<C*WwL2|lYr_y)we8LDa
zWBs*H7OT|ExojII*CBPquFuJOr2#f>$g#GG*T^9Ho>|*E08s?EOTkTdWir4E(hLyo
zBU4GnV{bCbZGYL?{%im;0OBa19ZG<=giD6~eZzqRcntYP*BphinW3e)tH6({H%Y8u
zp5_#6br~Ps%yHWV(&m}o7gE^bck|Vhou?;&{DAyiF(#s|^DIZZ1`H|?R)*6<`L@VZ
zIgkzN`q}w)kYZFChvzrodKLE3Z_%TJJ(2BtaqfD*^2{rXiw}2#8Cg$%WbVYtyCO3v
zF)?@6nN|}{cLdxImwj2zc<vd*;NSvL)hQ4PM~~@&iD>3~i(8=IMKZ)Q2gxg_x8ABs
zP1Od~wcvw@>*p325)`y9t_O>R_5JlCv;u=U<rys?kbwe~JhDH{IBk*8H`_pz7?@f8
zv<n7qD`)-5ns6bcO^iOMtBk|KfF~l8_8aaJp5n-J*U|n9o<`jqBocsgfK0LTfc#%c
zUQW1C2XGhaK*o*E^X*_<!LJ08xe(O~j(xO-q?tEvAL9>Q2WBIyc6VWSCH*P9kxpZ6
zcWlI?Sx%C!8)K`f<x9W(2Dj;f@$g?<tL`yo4g!;e{OR$&xg*C%Wa|0t1I06~>>eaj
zkbzW^4Qon%(4MK1Rt|43Yc2$UaZtP;?<_)&1<MRozMD0O3&P%#mmsKgRYi)UC3OxZ
zvp}+-Et2}1uu!)>3gm9Hfw|)rAAVw?UlHpqp?@qE?n_UAPT2AaGYa)06#Nr`+S0qM
zjBN-F7Alq>&H$Gz{rR&ED4Y@j9ReMpQhRm}k|)~<-WPX)xG_EwH^*bKiXB+Q1IRiW
zH{VlAEG_~w$#M(YE3^F(9Q<71?j@=ZPvSxt2PYxZy#I}Lg9mt?JWV&|_c?5)yo8#T
zU<EgjauZ3iURLnlDO8mlv=T=&9Zlh)6xNl)5Q5P}TD=aRuL2>Gb*0K#)E1lWhnx`&
zAwhm`zlF617X<4}fyv3>f$>9HO@pqU38+EeXBkP}Ht;sdkWf4DwMO8;gfKlGL~kV#
zWmo<czIdR%00oi5N^X9BT&?Nh?y#~tqm|KPw^NsZO!m)^o><$&T32*L3N0XQ@MqO)
z>0&KL-i+vjo9yze?{}&#D^2?E$QQmD51si;Cou`&8UiKH26Uk4QRN-(_n!IVs8=+R
zT7eJ4g$C_7Nc02!4q^Tr464IHbL1dVj5;g2D3U{iAVo2XlO!ZNJCM1M{V84mG(Rz5
z6;_8H16i_Y_E=~|bO6QV<fz4q=w#NOqDg?b2{Wt>A`+nGRD5TH)B+5Dv5FyV529Kp
zg)(|0EQW6?fT9jr$ne!Osr1MHN8MY7Ww{1zqlgPo0qGDG3=ruS2?^;Eq`SMN^8rOs
zKt)Mu0cimN=~8KsE|FSvgLK0;H{g2TZ~xoJ{<S@hrHco+^SWlvIp>@ixp?ob0U937
z+)+VppKwy#KI+RQ)Tkp#P!-f1EcRT~N{P1{tiUVvld<{rWo#8M)`6z2)@WjLA3HT~
zL#PjUnUwc<VOYKA;nvJ;ZFNIxL=l3UrpsC9aoBEOuk&y|W8S6v#FyCWopf08-H$rJ
z+xr_R970FoI!?J6b0aj2btpw*%1Kn1eQz=O6e^e4?qDiLn0z82#X42&e7R9rqHQu|
zeX7$u*aS~3IdKOM2Zz7yv0!Coa^t}*pbI&#5m6Zj@N$RgVi(OjE>Y=Gd>$=dcygJr
z`Ke9FAXZ^kLBYYM-tnfM&vyCfaA@(YUrL1>@MKZ3)+g!rVhb=;8=RnB5Qw6plD)=>
zLr)0GB!!BP)h<gECs53w$n2+>^7U!$@AqCQJ;|#O^SPcWw<<&`YxKQ+X7E}60*N4<
zVgSw`NdRzAK!uog4fiX+MtS|(*+~mVwYRl(<u~w8gBS>sSU|daSS|7CU-hzpTr^}o
zfVJ!9=S8^Xm0=Z+B^$^IfeoO=qf99V^E9)M?s~-uAJ50w*x24);q#16EF1^Ek$lk(
z(in7ZhT34KAy=_l()RW(Sa;>|97q&HBwnz8xnpbQ^CB>gbu-iFA`uS=WF|DZK-lun
z)b;A`>6tPG3go^jRgAP2QM@?{^CYMDO1#mma&mKXPs6C20CFo(etULSh(`L4cT026
z$g=IsQ*TZ#n!XGsO%z&+t9ku<ifJ%~qxW_QonyXiRl1v*ajOKj_P<a1c$yfO7)&e1
z-s266SsE<Pw$qWv+VoQDjvK6w>YeMFl1b&++rb)yAa-%~zMSX8U|*i4Qu%!W$a8ah
zAFpKRUi)|-gg>0#xw#OQ%?<h-{fL6+HiF6yq7MQF{fE5)iFH*;ynv&r)FX-~FH)W%
z$fx;f(p|A>vd6?P{Ef!_7@^W#Lv^lDKc8`-EGLtG`5r&vlw3EbKxpV2hz#6gIOVhy
zC@j2KpLDSL)}0lnF<yXNY0t5M_iuOoxsBsW`>`J(*X&QS0kl2~pC>he(yfZl1ro%$
zJ8qm7a%+x*yg<<!<-`T%InK1P_>>A|FCPz;_j5+3VvO%%oAJlXbJsnJ!;ge5L`Rrb
zsKy*<hb>+#8K3y$Cs}`rC6kD@K?b6r^reYxHO<v2FZu`-9l%cBdN0Wywc*AO=56aA
zkCW=L5NeRH*04g2W@cte{|LzY^jXP+>Od(9dp<=G>fEY6j_Z`WFzx>l%?!RaC#=#7
za?ZPfj4hh+jusGGP~L;1ake_~>WsfoCWf9fkAItJ6frUo2Bc>O3gG(!mb{O>ClRp^
zo`>|G|CHA`u1miQK@U#KgB^`N1HxjjA8HgrbfiJ<mAZ(^wh*^UbodTj@cnD4(s{eQ
zip10{4q%{`x(6}5uWK*8;U2lmUbXSQ;_chZPs;k-_@Pwe2L8uB^m*NyNcMU#>(qpa
z_vr#dnvAKX=nro}bqxeTIG7pv<TiNU5J@&<E1=B}xQbv9*PLNo75cD-_F;`Cl2!({
zClIwVM-mA}efaPtTV&H*k}RyTZ@0crSUBX<ttW`udX)6FEmA5*d@4fx2<ZAuNSJ;N
zpaVG8qHfs1S|I~$=SVY?lfQ{poYjaGK6g-J%@He$JI7A|&n4m_2#~%*;yD#E*@FQ;
z-ySFZ&c)`x*~W<;hB(bJNORxEBSLoEPqX&;2bGnTMK9nl(A|9J@(ZWEtlMBSkdMKH
zui$^FKHUGq`MXsA9Tk93T=3BrUoVQ}qxH}wmvK{HEhPLHh%dHJKD%wS2zlHbrn4^?
ziCXfZC=W^>m|4U8)EWEw`uw%d<-m(%Fh>-=xHaFF(2GSz*~vUJrwZCDH=S2^%o=>Y
z^F=+_>@r($^$t*V+_XS)umFRQOfe*P?oNYRB4njkkXWt6^=L5RU_5f;_xAjZckWT*
zAfne5FdzAHN;H|+Gk$Tao7nrAaPY*YMdlR)|C6e>YVqqQ>)E0I*RS+UOMc3RxM#G?
zu<#7?K*wHBzKj{hgMbUx*FrbmJxUWv#9t;?<_F<~Ivk?idm>T#>)iHnfLc;-5vs3<
zSYP9D%2LhK1Bt}KMLYt@yHs@(n$-(3Rcd<Qm!8qKLCjrrHwme*<2HN^iJ5=<gri=~
zjM@4!AP?@Oifv3+7z0sV%%{CLn6J;xJS_Vb`bV$-IH+M2Q5@_pSl<iUKR0DsRWdFv
zTsk&(5PZCYcSLbwGb>;rUihcN{e+z#ya^M|048bPBn)`P02DqHA!w+AKAgn5h6w`P
z6+4hs^OuU=>V%l}PdVpj=!yvyoL(yzBuzNvI0ys&{QRUyH#i6KdJ&Qf;GG#;pyx%N
z`s+PDQ20U&vsw-mC=&?UXQ1Zq1f%`-Cx-G)z;5i&R39qkSf|y)5vhP-9yyC8cEl%0
zXBvc_cQ<@$+d??Xe)!<uq5H>CUkeKdP<Nd4#}lr!jS_sjBm$aqOP0wvBHD;~bBH0_
zA%o7xi-OE2Ko-cmgXk-W0Th`?>boo#*sD2M4dY3WR38E~T=X6O=T#Oc7D9XuA@<=}
zNHh(XSVofMN5cc1d25yOy<;6?HT-Y+To(V$J<iFs%V;|WH7Pi_5kBvuQ(Y;1@f(S-
z@NcoT2?CXkh))NbfoeYZ=ziXZkB_&l8j|_Std(M0pc3$zFe&}l?p$++*Atx|x5cy#
zr2g2$13r5xtlgZ>I)IEywQbGKU(x&2iM$fM1!8N}53v0iWj@~l+P+l#Et@#Qq(dpp
z1V%lGd(=&a9j|%|%5J)#Tp_H0RQ`X`05L-!4dK1ilbcAX^n&4?Zslx+%3h7W+S-4z
zZ!o8@D6(AB{7%ZR5dnpI>JcP3n2=r0hMHAYawzfBmgwe0xxHrN992b@_04c;jR-DI
z&RQ8oGtUn>3tu1J3GH;&(dG?nz~0birftDfu~P7b{!QP5gyl++bi%YGwV@SWay5hb
zN@ixgVcs)2TQmmDY!j8%z2>KoHV1YQlKvGe%0`H?x1gY$x!N2OgPe&M^fV{zK6sWW
zfj}XyARi>CvrDHbbwqWV3;J2pCC_g6xS7FnNDCZh^+QlrV!P)8aATV)N6$~1%h7o!
zx$0=zhsyFO>gGLGV9Y_gv8;F9_9!Y3eh_zk6eJ5b-yD@Lep(871-_E1cg_*;G3Xb7
z=mLh>1hNzp7ka%s+qrcr<_1TE_0rOB`yqbh!~~2}OEnv_a}otGwq#}oBDo*v2WHF;
z1<oofsofS(H8!85$aifN!e4nP4Sm@MSyq*w8JW%G1u9q~F9@*WAV~oAg709D5Xyw|
zpA;)b&Z8?}*xm^p^NgepC_A!;^bj{NFgHKtH6yMf?15CsY$23bDi8%cw}=M9J`l<o
z-E$}t;%S#3P`rWoa#kLDFA9ERP=p7K0dF1T*$+Iotf7XN|5NpTc{QK&JaHvzYru>}
zud3oX)`6^=8AO7V!Db+rSX~EIGlV^JH2`zqF;~0pUDKP%*vu}XP|wWqw)AGG%+?uQ
z`?2lQy?{`}dzIzbE>G^3Z1XVifjj`nc_g9R4@c+}8JBu8y#(wBy-%gxXcY@;H3+<r
zjDr)1K;Zr2Sz1IL1Ay3>ISmn!WPjmT5I;LknC!y-B#zO{crkBp^&!E>1Zasg;CDH%
z@zv@TDA7XdC8ml~@|@@7$hsUEE};9W3{SJgL#B_iH*#KOF@TS!I`|i>rl#708ER1`
zAO{xthaX89e|xCmu56bA*$(R{)6ISa)IXVk2b>vW)1Dplr_sHTVCDX%pLQzt$Az+N
z8i4x)md-Y{RM=?XR`orXpXYy-{SK9rA=G)YYQ!|}CCU9CfBBeJ1BsE7Vn7+IcsEJ#
zs}s3Uxoo3i9h?DT;TcTG!~g6Z^Ury76kw;-068fnjx@|twM^9HpimE>q)#g=*uC7K
zA<REI0*Pl}A$Sc|{`|wUuBx>^=fio=`q!7|PeE#McV`!lLorRJg{=zuWGIX2MRo*j
z^%CtWq+CGR-&)KyzG%-ex(9&Udoal6QZv?gU`g4-)n_9s+sOzFYj8cO2Rk=t3WcD-
zUQp^}K#q~6!PJs8^F8vs%1a)rSp_*&_e)3aw~sDT2i6%v6^TFkg!5qwMdLFI(eomO
z8XbBQ;0o!m>1Q*Z2I8487AZpw>Ey-mhDjXiN-jCaU6)ILJ^*q00#sy{F_&q#2Zf_7
z`-@T5mmxlmzkVI2sePDV$Tc_us=)C200fnAS7L(nx)XNaywBOTHA508AmL9R`XycF
zn8WmyN>DmyJ_q$muHLbN31=@lq86zY%U<1=QOEw5`)`y@V$Gi$ww|2v1Hck*Tx!n6
zadq7m-nB6=71Q+jY<@q#lp_<Fb$-B@6qi2d<-KPybZ?=MoT;}+Yxyd1R~`}H&0#!{
zBLOB=LR_3$3i9iQQ0>;jy_U$4z9IB;jf?s`zrar|OqahwW%RE9@dQZ~SPanZZ@0y+
z+bp=btT0o|$y}3K{4dLo$l2wj6!*kZqwml32q+Y$2lhb}f=;ediad|`1qd{%fn&nR
zSKa%1z7`x7WVa!qmA3M`On`XCo3)eJ6!lCI+58A&;@izB_n6p{K3O=lHYG4gJ;EgB
zolizbhrD3|Okf8r{KLKTn~swXH_mPF2HFbn{&*?qVV0^mqr?rH2hV5wpW+J(!`;4s
z{73_3;bR_b56BK=n5adVwB*-*o-eb#8bk1L2!iT5z<_C>BVHZN{x@xcLazE)M3eLI
zNRVmcnwmEZci%1N;{V`5%f|y-zrfSA*V(ym2VrFC`nFI#{UAd^RwZX!*UyKhEAY}m
zDFa$;=@Jo@Y1Kc@S~ox|d9(VTYSezr?kiIA@T?NBIY9XZeVPS>1FjQRyc?_Qqiw%Z
zM<5__d*iA$K6?JMA48-Z#pme!izi2iK;CZULsiKKW&wriz=loLFGNpXIQ&_t-noOs
zxp#*lyX0_ryx1b@e%l9XsS1B?XxV4!X`-R3!bKXiipk+2I_x5v&&Y>$ivgo`OP-|J
z-vD5bipt{vRW_L0VEu&OYll^^6v|{~9C)5kvt>4=;A=*}pd#z*CJ>fbL_^I~UneKi
zwabG!q*G=D*~I^MS*^$gFh~qx7tMrO+Wv*g$}z^-D@GOe!B9^BQ9SwayuEm~IkBeJ
zL==<~rD;ijnuPhXzx5U4?jQfv0-V$lws0p<eS>naE5scTQvyvTAcV*Tv%Rr)EID_5
zKxg7{p%S&Sd@v|a#giMZxnurFnXKYtkoIWh!pNMX^5(tX{@_c6^fMp~>+g*kcJ;l*
zc9P*H#mewh$g(m?7%pc>sMi5G*8Hf5Ovd$R5AVbGDFsVEw()|tYdSxa{}}jE>{bqC
zlD|c2@7(y?$U!~A=}+Ek5MW~pooar6t#IdD{IE1$H}Rw;-yGu!W4=V-wAd|z-2ST<
z{!vzuyAxW%p&}FV2?`P2q0s|mF7{~tEZmgp>gd2Xf<4)}uvu@&W@bhq_E=dZ<&9hA
z$TOlfl3McQ)s5MpR10V&gF}Y7js&w^NS&8*8+{<8%dO?>ad2_z0to@hA}JYYK+wim
zhZKj2xlgoyO=&b{&u1(yEPVVUv<XTZ8!v}({M4GhbL4*KlRF}&1{}UMw)l%haJP<y
z%+&=v7`Jz>9cTrjz0JVKfqr&ZF}n15d%GSyPvP(!duv=fsxlya%W^YLN??4JeU_@7
zE$o^3Gz<&v0Dl0-&CP9>mN}!cTSXDXN1(0>Vl>7Q_tC5bSJYjXA&qq4LQ4T9F7=nC
zKP&>}qJTDeYpzmx)eEeAM0e)~UKX^8r_4h!Xu4q4_o-bD6VTK_eaUy7Ef#XuRan3X
z!vjc`2-%H)4RZE|kNbH#$xgEd70uxV&Jq(gnqn?AEo+U9lJ7~#joMlT9KhQ(Ze>HD
zGPK^DI=!m&^+l-WjjlPTRjYSD8S;O4>DW_CIV`^ieG1{Iz?$nh5<DQp-kA(UVGTa*
zR(K=URv+O)YxcgC<YK8!bv|QzF@Z0MA4qc*AZCH!W}%2^cT>-6z2DBq5wbqr1$v*!
z95pL5bYiU<rXhXYCYWx3T4nM!++PNeh5dUU^ipp8W;<F?5I{4W8@T;&`oRpH;)_Ku
zYXS%QkSl{pz++o;%=Q42l(5n)68oLGY8%{vu8C}UR%W@zDj0u^V?Lib&rOm+AZPw|
zB2lL&|2vWGG?dIF1LkAcEBPfvx$C%CmlXVw-~69iK>2I`WmZ3ahLXfPgcpZy-^GIL
zU|t?UG5X)4yD7B);`5W^DpW%0^;tc^(DLs>E4zwS@%N#SLIRBLzCG;9cA53FFZYPs
z1blBC1v;*s1&~i&#=(JBDyyI&uN(HIMS4frlLC(gW7bb7I^TxpRex!K9H33GuGfYp
znHaWeLU}hMDI_(48iyK3=<mu5y!h%gbkykP+?|1o1^&D!-==~EzU;5mwS#)(Y(1})
z=u}uDovv)0p;v)EX(ep{RSt?0dS^kt!Ehf6Q6bZqYJuq|e6mk*YVg~&PMCJq6Au3T
zogApw-GR<mAEBjCbMTPo{cFrST4<2UdqFb6I14@TcH5x$7Vzl&cfLlPJq=|#q!Z`W
zrUI2rIB*T?Nh=XhE<vv<q`A#fHe{0+WkRTr?uARU)jPOnW{Te;(DJr@U#Wx<i&S4T
z;1OhFzL$iC7SQht3dS+T`!}KGRS?u76S{k4aJ&n<LMeF`DQ!4k`mpZMR_rRh@>kc?
zMtvTT&%=R-!gvb2|8rGu5nF!kGL?Y#pMRKI0KLCvlL~=mKgZgLJ|?t@x;LovmhKpX
zR7im#^d8D1WQv!_g^!J7`!@-<ACcIDnX?OI^M6$q#_^_9J103g`QHJ++A1m!DCIdR
zQpliV%n7#*B`n4oZ^e@hX)!N-D!HB+ItA<_9dvSjcyq5TGqa5ouC^r~0xT7k2$MZt
z8>~#)ob~M2B|k!e?9-3ZAIX=tUpXiSA1Q&bFo*dkEDYq*Cc6AiNfYQOT|cVNr-&@#
zFVw_hbS2Q1m)uBsRN7c$y8X41NmP`C&RsB+7SZcAZ|+lFO|bLU=?}8IM6dEa+8b%+
zf=>oUGpG()j=Yi%5naONq`vOlKoCQ9$rj`hT2t2FE1~U(25I!QZx>;S0?0+4q{=gS
z)M|@7>M1?PT{1{Q;cuf4p~JUJAYD}R1|7{e+)N_^C}h_bh9W3<W{ZXnPA?uR3tD+j
z#`3BofLX2f_ShB2F9A~l39FVGQXvRd@w0T`l1oP67zRSW5gemVrHLEwzJQACq=DDf
z|0#^YOZ=@hcvl|6p4=Mp<R;0aPUtek+4ey_0HlvVDwsr7m4B4Zd)A#I)NAP8WZXq6
zshib84)jDg4E{>W%;b0jZ8xr6j!BJrlxD%6hLMt||MH^mE;_UqK`OoNA6i8PW$z7-
zy5H;`J)ly9=rq-b#dvRbjvp8$!S$^7kQwyOtx{Z4yQKR3ct@}5-Q0}DPqUSEuZE)u
zyP4a2l(v{Bx#O`Y9iM}TsR`1KCokxSu?DD4cyt75r4*r-oA^a)FJwx_Cy#p^A;quw
zAlHB#JCj(6*yIUbrbtSNNvR~YIqG_J4jqzmAPjh}o^sNaV9qI8Je`4X@tKA3Yl~=q
zd*OgHI|jlm$2<xjzm;A!OD$b`w@ZcD3C?X#(7$-879Kx(Pcf*nbd1|i3Go2r0tVNB
z08^W0XhmvpzvU|;Tq(HeOo;+SAQ42Or976Enb|P$SS7qn>5nw0hcJm*gb58{0LBA{
zoXkuUjrWqzM_|A5Kh>QA9F()*WnQjKUpMi+`>qtB^JwD1t5&?P=#{CLMlp0A&7+A{
zV=(3ZUcn+hE2oEteA>Va4ei(Cc7J_V=!XQ^g)2tm&kqzSoxT6K@%M;4)Q0Y7kTvUH
zfEEj@ZrTx3T?{|WuW{(~Tl5!Q@&0(JDL=N!7^mjXbmv_c!x2!ry{S-*%*79GxMD;M
zH8@0X#K|}{uVdWDztE9mq9Kl%c;QUdEyu)sR%azd68+)BT~t?Rg(7L)dM!y0|B-6!
zr5D7bo_FPkt`lkw1U`SGfL-e)Q+J=O-Wpo64OGRnOw|^H4djg@9R*d#eA$CPtZ>|M
zXupT@`(3gox`KVh@8S~$KI)0)O`IG1Vz$ErLil$1r6gaND*sem$e1E16zMfIwU=`4
zFXn&Sz?al_<*Mg6lSRZTyRv&i5Tz%kXEDEHr{dM=y3~2$`o{J<17xp+maXMX?~%T9
zQ;Oe_$0mCC{31VwS?aLuMGf?GmYds38QJ2)^8(&`v<Y$R3yzm@AG{;2|7(e%psIvi
z>!ci1XqlXX!qU#Jf2a)*S1@_ZEddI-JKaD8$)DG_Yfyqd;O|eVPHTKBGHSe@Xu_{x
z8~`2s*YAELd(PnW^z^$9eSUd*qb?Pe8=AkY*NGCy%h4$VQOCKh5h2!exbKL^F6n$s
zY{%q0t+b!S%s1h&GMGm#?v;z@Vi>={qB`KiJMSS}@{r6N0vd{`sj0L_aa~^Nl(eg$
z&u9mm3Ljn7aO}RWqeR=r@49fiz<8i>!LS>q2lVvzX2?L#u1`4rZo5;F&~E(es>y-V
zO#FD?bVvbhL1N&$2N%?4AFlaVm6YsI9j{VxS0z>t_U!fU)E2Dh(7bwgzVY<QeDs8~
ztY&>kFEGhvnh-WJ`ug>&c~1ApNX{oJh>?kW(7tG#9tj0JH|NGTil_{s6R(dmTM)T5
zYGWD4`()bGV6OE?msb1Ww0J#d?+q$}J6j_H9jt-^8D3R4B<vp8#qd56-!(2vUg%pd
z!r;lgW?%3{`P!ekwg=qUGJ?aUFYne!o9^$~kw!dnbs84vxbg6jfS0V}qP5<!8Osux
zXBuBwPkAa|nZR(V#C240d~rrF!4o(#d`=jDPVzcDYXyq&a=C12rJR4_k@G%Z9Zpr<
z%=x813)g0?SH0G>bRM@DkMpiiAk_~z>`<fn4-T}lnqT>``I|571UNeEK3?q$XEM>_
z{>`Mtdt_>0NsoOAouBgbHJV~t&z_s~AE-Kl$Lm_R{AyBT;}6@Wu{0c#Q9PMX5NAd$
zY}m%K((&JR7Zf>Z`OlD)$;BqPA~D=ejNb6|iBNct@_MD|KTFIb7|RV^KLik1?5<g+
z!%sb%Z;rx_oT_OkTFPRwOedp#h3mh3uP`F^!S14`+KhPpvX@mxgUkNvZNrtV2?=;m
zQ#GSLd(Cwz#x+;Xij>0`JYjxj(?BSVnLK;pb@o^GFPMLBZQ2PQRi@=?$5JI6ET$a7
z>7^Bz8aw&0j;v+Gc5{Bkr7YQT4cS<d7+A1JUE1!o6+{`&*Ips`#X-hwWvb0+nLbLA
zWp+n=k`PvIoi!bu*OW<eKYD(BU$fmB>))M#_U;MXp){GzW06Xa9eA=n)~p+F)$0t+
zvUj?>Arsb#H$M@C_d@j9l5V$a5QQ$&kXp;_ADXX*YP!(*Uq+{m+)t)#yX#>swnL(<
zhFrw-4;trZ1`Xi}-dkNLHXOsE6*Z?}!%Bjjju+!1SV4GPEGg&cn~SQ{GblxwDGtp>
zmWftM)Y}sF;VvC-ONi<R-3Z3@Pkj`tAstisBz3Yiv||6+P`&JF6QXa_x*TfL=FwNL
zaLACRv{=t!B+t{gD!=L4b;NeqfF&JdGmjsY5}#>nYb<;H&5M1@nFiY4b5VMa@AwPL
z#;MPDk!a}Z*Wl+gqD-6e$-VDG0S4>W!ZPXIf{$fgj2ec1H#fcEYGRgW%ht2~ll;}h
z`fwW!&a8<Lx?vO1RP{g=s?~#bJY}==J)6l9oBrWltHohPMiL%P=i=NC|JvT<EyRyY
zk|B$E3$OB3VMEyQxYjN#lO+GW()W4eeSZ9kll-P<^LI2&n?m#+r!p!>bUeEjS88wJ
zJ`gW)=gN8Z7DkPTh-S}q<F$$(P7;v=ins1vkhCWKLE`b=ERUGV@{s^1|2^{Pr3IGs
z9OA6dHS70-|5*hvKN7)|at>tBa2&ej!7$3qO#1K`ru3Xx&3Inf2MHhQdrsD#;JV8w
z?9oU(DkFCByKgBU@bdCz9!<8eR4W%jr}TyS`OM5rUOqmh&jr~mswf_E=@8Thss!S=
z()IUE8Qc-7Zwy`dJ|Rn%+HO|x_+Z`X>YmNMl^L_PH0o!UaiW<#DOZorR@V`1kt>%1
zhQy0x<$>GM)s648I;UZ@GE&KpGm8Pc^nT@QGYmBKICD;lOD{saR2JWe7ltxsf3nwu
zwdoDJwPn{4b|SWevcSP1Egl!yau&(xhf;fYSYPc`uD!<W;js_g_l0VRa5tNEg^hok
z?xVzxqdQ?DnI(Gu=S2hd8!&C+&6^Yz8rWweyQ+S(#Ux|g+ehiV!k)Qv<zl$Gq{~@M
zLuPwPxq3$}M(q{DX*^ARWDHy~YiQ3IHg36#2nq@&*ox>4{pKI+_klCP_K2ow8EX91
z6TmUA5QaxZRNx<Y%s0H+4a=rAM9e!R#lXxjad%aUy{hrte-7zbO~kLxuyg1UC&gi!
zfQ{Os(F5<PB=tc(Mq|a#(&4bl=iVm3P7>XCO9EFU(8G8<m1j0up1CZsUo-PEIDoPk
ztXP!&PleBYf!?0m0eq;kuN%v8e1`i$0|QM>O<ygOK9=Qg=<U^NDvZlJGs?L-F*QD>
zxq9>Pu(VzGH<jRb9k4dr(Nzp>`Voxn!+VXh@)zhCklEK8RL>dmrGNONK&5xQeY~e<
zLDCJ&5PkCvN5b;Z?IUF@18Y`J$FY1jT5;nRFl8(E3N6{o>#|$*ba970M(!MWO)m+w
zdUE)_eRrC?z2SQBXR?V**G<@wkIL(huvd}n96C`El^|h4v-D2ltWr0lMNt>$YuLa)
zJIvMhx0d&Pn6+P3?~jZedrvi!Y|?^(DmUDmmmhaL4+~&*!tNirkJD;lG_p*@%4&D?
z0`Z<(9n63XEwRISllD2oJ+&YHN8HuBZ|-QBo|x*MgjyDU_+JLDZZHR2di@90xXo8o
zFF6aj{^UO!Zw@YzoYbf#Uq?lycYHhvS(_8t#Up`K3%JI#KS;`iMDWlWDT}Y?u5spf
z_mLgdu7wu(mrX^rRPT&aq26O)2%|^5tsc<am9rEVE-~f!q4LIG!0O7E#P&zVE&11V
zDjVtlp>}!>+XO!y&3BQY{D>LQ%6L|j{DsEPg{)jA8}x24m#FoE8W%c=<l89nzxDlS
z`r2m;2l3ka+CR_A>nT@%3JH?rmr}kyOqgBeeyx2zkoIW2Jt(F+j^+j1=UI4n-?5Hu
zH#8;BjYZ=^JO7RjJ6?)CM$w|vZpHjbf5Ia(@Qniw^6l{7!CW3XHaA?@$E}#pt>`Z(
z-=O3;n?nU>{a?^)XV}i?st@$2IA#eZzsf@K2KDDV)_A^Fv90%?f1mzB`AmApW9GL?
zkMXjxeJ^;ZD)p=5*rz{tJ<ONu)?fH$fA8KJ=X;u?jNFxky**Kl=H_PTE?u_;-MF|Y
z3G#8rE>%kY>L5!0_M7eRXhXP&*Fkxwbi%AIyd&t{#p#;ox}s~MwFp{_aSfyMPA)D$
zM{YWUu?qi=w2IuFV)3!aQ+Fe7#01!PvnrA5`gQNa&Hg%<$BzkcAAErYj674cjt4<?
zifU?+lgyl+E61}9QkO|Rv90u&!C@Mn&%z(o8SL&(wieu-44wCVt>Z&W>LKB6zqnVO
zS-f&=ZZVNTb>u7YbW;eBU<mgH^elS35|?nW6Vy^I>!8KCA;h15dfnu0huop2EfL!S
zCVRqVapr&ue`;483}pv9lnV%YS{z3Nu7SV*`7D3(nffdY!n;y_WD9UJOHz=ru~Xi4
znU2TP0YC(gwr7F&p=*q0g7=E0u(Js%G3_=%j+JnU?T6euf{BTG*c2W{hQnvgbp=ZR
zHV(+_XW$z9f$xcVkpjQWk83OlZbVolIGputJ^20J`{7e?FlooReWbs5EE-%bEJ?iz
zNT8s|y3BOW&CzNq!F>6~-eEPo@#?FUma3{f3e^m%3E3$vCeIh>VVK5BK=sc0__UoB
zvy8~GI_E}x?XS~|z-c~(AFV#Cht(8oV@7})0V|GpjNEe^b2vB$*rxg1>I4q>d;(8^
zlV)YfG6k+EueGhGm`}a|5Ny>oS$uZCDUEMC9s46s;)elQ$AAnXGXze?>H*TMv?|Kg
zi`CYqsX}Y^21g8-_Rdkg+5R3p*6OukK&;HKclzK!Prd<+oIn31tSb8;bjO!dOFE=S
zE`;~kNK}qGARXMg$qM`cpuOnx09Qr7QRA4h2^j#A2opY(tt0Ejl8?LZxZpl0tyaBR
zVFRC6ZX=MR6Hof5`DKM$yQT&fKh&kylt=`h&%Mn7KwZmr(Wgc7nZ3#ijdp=5YJl5(
zmV|Rd@~!o0>?qj_r^6Gk2h2G_QeJSlvpreUz<Fqn?cgN*b~$i(06qk4B5>gC9JxH4
z+6Q=eU^@rC;l5Rw1s2z$%5&yhLX>Q(P97iF@!71^fE^U{|JvAHBLSEzIpf1FD3Iu-
z+w*7s=L?zU1P<tw%V2AR+G%>xg58;3Qs#6t^z%i_Jaf^MFrI55#XNQ<gV-(?c7!#!
zAFp`(x;Qwaty0$gCdXm{&9X>4F7vlhOADh#xh`spZEbipcq3FnD<YzriW%!zw~fHD
z2D$~|`u99YbG&T?OEkQp(#9t{wcWKW*$Q4N?{O$xXx+=pBQ}=3)M%m|kK*nd@%Y%P
zExy&rXB15fv)6o=Jg?UPF!XWF?PSZU+5O(4!@{ILTJ+5;T;6Jx+i>@R-*Ewme_;I+
zU%vU+D~w)oH>2&%>wpB2)x_xx?zWDs@|+85KU7%EXDw!NgU&2zM<r<d<KyG0T{}lc
zY}unUH8q=CTGYG?^1i>DMQ_Qyt2Lj%+ruv?zy{uXTm+9V^CSY0$~)W$4+77}G1y;y
zxI-|7127@*QTjn(KHyfR1U)DE0KC?aI)|g7HOW`EqaS5?Sw)qI0+=onbRDev4WI|`
zf9)!~DjV-SfR|zC0|C0kM29c2?Cn)oP$)JRiEkelWu+(T#<ntQ#8)}2_W5K-Ezc?@
zRPS^MKUUi%G11cd{CcIO?FzKbhdRvpa%2uh*b<oB!s6n{$jIWaUq4U1RmyPeWUz!t
zAd{h(N-#h6(S2H%I!uxA#8-k`@4wuTsLK$2Z3F1s);MESsk^WYKaT!z-*KEQaU@*t
zv8sa3?v%bH|LQM&bCj3E;F0F0YxqQK$W%r(ufx8Oh&8Kn$;x{pRoc3QOhsQ>ig|!{
zs6R*Q;yF2d&l$!tu~%^`d2MH=r;%r#_n%!wo;k$bw58a2#W^{)OmXJ0Dw32`c8v#*
ze$TySP8p>i0f5zeT?Oz%QW|4*5`Wb;{XO#EnmQ>58VZ#?7^GKmEFM+ayrAp?=zI{!
z0k-Qj8iUu?)fxLvBj1Zl;QgfUSAI92&k!jzbN>vXap%@B&5gH#p#={UCF+U1maO!Y
zilFZt%nq7^T3@bIs;=yHK;vK*oK}O(EBZHve|Q(dstn9A5QAXghD7i1DuOhU|5;;b
z=G0Rr=IW6@flvj;yGgl%)k%y}GpP8wTMm?SD&H@49cGwr&6kOk5ayJ<t4K_UiAhcj
zmJkX4%6jI#Bm-t8RjBAZm_nT}qi9+THw+R?0e>M8%UYOvFg5BieqO5N+LTSmU5FkF
zLR+^sV`zf*blePJ%V;Q2qcXx(h`JS{QY0c6lVYrOvXk1l;uax#S?8{Ku^(ajwS2Qe
zj&bhOZeYGZtF+ZG>YI14VwU!M6|q)(-?7t6vIz>%slF`JP^a(nY7SF5P!@(u{g^2K
z%zxy<S-|j)4_JNDiPvvTOmE#NFn*jXX9&&Su?8XnN%V~L-X%v@*1e3b`*9E1;4fUl
zblf9SY+3S*U#;nB65S=?>IsT$t*$8JBnmUl_1Zz;Ff3a`9@5rKek+3r#?^2lpA~V(
z^g@+@I#{-JgMx<^GpRl9UL}FMwTS84HMrky_&8VS_Fj-!`rYFPt_#T|ZE+>f0j^cg
zE|9JNwivfTc_~u!2jX2P&f5QknpS3xTb3tbcwyBdY%^l+EdShI&cWgb7t_V7AVU29
zIyTlHMZ>uvE_CMIK8Z$Mk>%NG$-3DZCT9tO;`e=qw&AabJ6*gq^J4^qTmAJp{)CjV
z9v8s?t{mH_S+f!g26tPJ=1RB4GxB&U{uJBNH|F$fhe>=X2T=zjxXdgpc1rX7QYC;y
zAGoKx3%{j5r%{sI$EeYFpLs06U(Lubao=Z9ML{Pc>YRM+ZdCEUD0kkTS)t3^k@9<-
zG3k<d;nX9Y6y!As%)CDnI?~Y%scDtux<DYmYP$)Rxg}`ONB$7;V6x>-#TmWUbOc0R
z7r|jdl+Q2-+etoM7l*~x=AKN%AQnY!g)DpooW`epb1>#celj|qt9D^AOf=1}<-zYp
zH>LiQM;tgkwW`d&sU`uj?keWtvul-uJ<)oPBl`B&)Ps2wmKMma7s5v4AY;jziHxC2
zs2J>_e`pBqcpW=ZjP|p`Hy|-M39eS-D?7BvEQW!V2}oKU<M)J9>)n|bAZ6I|ib9#*
z3QvHoJXS{dgDb0}gH~81Vf}2e`6@qvptEmYI?t)qsX;zveTGxU|5Zj{t;fxv&e^vN
zf=b&e0Z~<9y`rDiXL@vWOq&$<SiE=p@&N;tYN&h6XyFcS(dy0!UhE`_v|mfPu7(0T
z5g_Jz%D)8bF{hLix-SIzlza!clYy6bcSvm?%(x*>1RQ|?`X*&(DbIUZIj5-e&Dbhg
zJ+Ue%(^7gJf&blQ!CkAWP<DNDsUbM5!rftdoQ43Wr2v)kAVcl3$YEDAZ}#tSXT*XR
z-wVoW*RI%r!@@?qA#ZV6i~ie-mLxQbO*+A?v_M?(k~<IwkWlP}cXZ5DcqOZyKut7p
zlJcWv;M$z*ba{Oq@%k{2F|!@MpDmVi6B2fw;;^G#&}5g1pq_z{eu$(zw89OJ-evg!
ztWoro%Xo`L1Y?toIeV0<W<gqdA}VFsqQJN_>|l<8EL5+~*OpJUyEMTgu3->$Ah~96
zPDbikxaX3R`>W>Q@w{@vdYd)$R}{ZUWVFx2B&rw6D2<LA+TBcOV`X&9)w>h>c1QPz
z8M-^VJFXJ_oiz&=t*`z1ba8&OzofUT)XLP5Q=MfURH@Zm^+;d;yfxi^*_|6Rv;w>H
z6Y{l1rIQ)i;iHp0H)LQKGSlde2a4y|!8&1${I3?ka6L*!+I;q@)7g^$W*>iLCww}h
zSBbrT1*1>?$$BxwFxdi!hg1(@d9AIcxGzc?2TzV^4jn9|p?1zwlHJq!;lpDw`*ad0
zxYLIUf7gqh?sVUwjoq1qb>sImuf%6z!fxda)u%Xycf?PA&_Tk)ghrl7WBf;55E-*B
zvB~alh&lF0k9Dmh!^2l!y|V$TRP2X;ybwm;+p}mxKOCq$fGa6m?{I+~wIz)Ju(8Vd
z?ht+y1NXsezyG2VclTW=k>hlI0wd}mDq&(_nSrPT_*jJP&0X))vZM2J2G9YchF*yq
zUxc6%f3!nhR#v;rNy^%8u;>l&(zo+Xve?+z600Ac2%jC)+}+(hJ<F%JdIs`$D!=Vn
zSXgw%?vEZVjn>y6vA(_oY<J8H2r6q|jzg_5ua@{nbgNp~*BOe{Mh{OOh@>I3h8bwO
zbxG)L6zpX0k7K7?FEE?Hq&WJ(K2NCpDmZoyY$ALIEH|LGx^rMc%-cTl2#$q@+76e6
zXADL!&k}Yz0P+QBc5n_%N%J|dKv}MGarf7O1_px!tZsl2C)TTVpE5`JkoDU$2;tW*
zc+9U<_bKtE!Fwi|a2R^{7A69=ZjB?2S7Rg&J+2gdM;nmZP|P@1B3mz-=pB5kX2yJc
zt7;L*2`ly}KzN<NblU7&QdJx`Ch_Hc?*I@CP~a5qnhU>0c${e*xQoH9UIZ`i8ti=E
zH$no6oRkT?gQzBBLcL>4uV*OE+a<@zR8~o=M=^q`%lqGd<%$W__t%1ng*Z%drhVxY
zQ?JSh0e}5R-`INQ<rti0FxW>P0*p|7^>@i&-+*XxR$F{@iZs(boq@oHIN{T}pvH*j
zA6q4cEZLjxC2iVA7-NCf_~l1P4ud)0Bm*ck#kaB1(YFkc48~{C2Su_4_!rQ0Ug`L7
zyBrQf*w#txQk3gsYy5d~QL5nUhPVZs0zk?9`DJiMV0yshMtEGKZUM@vEm%IYyeXpE
z`OA*Nde=Dq2|!fdo(a62;9f58HSBN{TXO5MkukQ?Hgb#Kf7ZzWgK{hO6wq#t{L;UB
zk32tCFcK)yAI=a^c3&4%p}L)<T)Pg`8LWyqGTnE{@CXNkspu@zxuawaMTma-)~iwZ
zJUq#EJasc}QB2vT+66$Wp`#6Vq#p__+oMO1==)WxT`+}KV50zX0w}6h7L2tLY@_6y
z6-5)28Y5f7s6m>H*C1YtDWa7{b(H*AK?h7)-go?`er|@btwj>$PkP5|oB<y0t~Q0E
zv)j3=-Zc60=nRzAf*BOOt@_gbg>FYjP2-yTUPe}K?#jw>O>DJY<)*5cSsHU7GDg9g
zx;4lI%vQd}7=zE=&u_V$7!bJ66eXjeoH??U2%-#ZhaSdo^S8&A{BP>2AyTv>%^!O7
zmf`MQNjJZCoDH(7wfyNCSdK(*dk8pJQ;V*>34Wd>$;yZ^*?j#w1?-vQ&X#QQ<3<&r
z77U=&@nX<!&s9OU`ImwnUXmhs)S^X<xVe>E(IHGQ!^UeNyI9wAiOZW98By>HAEpV#
zF~Oy)v7q4ytpH?275co;$p1joa<KDhuLgssEag`3CiXgG#JET+;6zbr+rd1ehTkjl
zwS&+2!za<?I0IL4=`cUdvH*5c|M?oL=9YFMwwRdM6}mfMlwL=XcmMfql^Y($|1%b5
zL@j*mfb)LCoL^jwjrRCO)YH}VaOwP2BPXY?%y0La-CKHEF6BG~E+Hi9JJ9}s$*U{&
z>;-7@K0gY%7{^((l>0`<gf~^&eq;8r-2I~&pQAJfJrNNRuufJ~UKPZ44DF+UbV>m^
z6M1>NbA@^NvlYVfHEm)aY(RT8BmwJY{OTS^NJQvhmRU+7R^KaY4GKommd<2}@Cj%#
z`r$QV%N1m0Wz&xZC-d{!?--%olZEE+bxk{U*xS9N%4!fR-qz($Sf1_r%(d2|cRc{t
zzn0<%>=w9il-L055Ji3;@Z}o$Ly&=L6Xa%h0ghH&^)|bSlan&h+wSh}E4jnK9Jg<6
zwriJIvqw!g(55|#eY^rGjWf^%>5AzeQxLi@HB?9L?Ls8QKHN#u#)7U}d(Qy}Sn5}H
zv!m`aaj(f@{VByECo#w4v5~%KGXS0bkiLu+{Jn{(a3eyOtt>1a<1resW9lo#@_6r6
zFYd8KGaS#f9`n1;D~ksfOf<JoR`YLx?UeD>eHmR`wbQsT33xZ5%HavXey4xPxO!yN
zm{U;>1;%uTkyQ}czFLo=k(s}4-}rEUd&SDql5olcDwTnAcg#C1A3c28KBML79z-!O
z)Ap$OWpLBWU`WUSrQN$RFR<>C0JuDR#zsa*pTq=K6u@~U!DD!4KS^7_K!8ENRgqF{
zw)G{pP_aa}lJ`CjPw7_q9qspYfT*L|&E9&kyxFiM<lJCo%r}`MsA+kWAiz|`#l^J=
z%M2G6H;)`~SUAhGoyqf089}KdTXVrWa1Wlz*b6dFwIirkXAftBINNi2sfrp4A}Y8)
z+98uOf|mb!e;qKiaABX#L$1;{GgR#l_w8uSEgg<Vl~n^8OZU{2X^Zc<_KrFh5V<my
z9^mQ#`A(%+)q3<jxa6Z{*W>pYp0~8$6Tk(J9!zsb@e1NTz=8O`yAO(MHFjB(1@7HT
zjgLpx8S6)&0uqik(&qO~yBy#HG+*6!Oj>W?(~uCtsh~(^9ZL&~0P$pxGDM7ZLj+>U
zs`a>)^!80xpKX{6rP9NK{FV?|V?;JI9t)pV8IXx&?(OY`>XzAQq{81<)*O8Gr{>F)
z{Eds)$Pc4kHyF-WA+%ym-^YMxOO*7@6hZ>&{{y)isBQvT)Vyc*=g*6Cb7Q;bJ=g?S
zn+~ZC)s=qjHU+!;{vzgx^dZ2mOfFqJsHFw|?k0jC%8zoleLgYW1vi5~kl!05{{coX
zde88U&cPUBJG<PhtnN`Jgh{4@stXwTQSCGrGTi|&aPf#o5XC=ElUc!nrN^oFv@oqB
zSdPXbdmd~&L(oJ{^;ICK+b>CCaIob_$;h;{`y4rLQo!6HP;x8{mls!5K#_wZY%3pE
z3^+X0Ig5LGc+Ahw)6>(BE1AF$&ubJE`Ra^4*L@k;U?UcMak4djjrGjhzBpqO{A4hC
z8N!#E&%W1q&7_qDO;y1~?$DV753J{N=p5Rltf;71`1&ofsAhX+^C7A6ypDyA&fuYg
zkZb&v*?!{PqGa6xQqSTr?>MX=+&`l031od$pR3w_PU+n}?^;y#XOvNJ_iFt4e9<*~
z?{Su+ijv(yeg^BZY!<Ije@aQRfqb0w%uI^w*KO?VpiL4u=dM_LOH2B%vXR?}lcrYD
z(}T_d8oIiVxX$e<?liYZHOF|dywlh<8Es_Q_umVfpZfSSf)Jf>;I@0obfmYRz1Tgs
z#p>99=<(&vIM_59BQ(Pq?{<p701J4*@I-oKe<F&F$xrb7t@?8+H~X|cHT!;ED<3`D
z^1ENXzhuXuBF?tZ3E6FcVW)|8v*{luo9BR!1VA>#$XKbNo*C7+0JP}tAGX$uc2wpH
zRQCq^bG;7x*6+1$NC#K$L85SNctBvuYV(&qxS4=DWG^&tt=Jt3C}EVIa=}1J2>{;=
zVF<*Gq?74)sDVkP6`(fsON4-FeIghqhLkEZ*%paHH{p}j&X|Mw=+S4X`i=J>OLSPm
zJ)!o-UQC?YdX~1nfx{U^;XxkJmTv<4LWEvJAxR=EGID!k1C05>!Hi&TZm#@94BWZz
zBV|=p)QacmRVpef3X06EtckpH1N8&H)UPxQ{Q59ph^iWLE@W_#0Ge=j_2Sez29F1@
zn*lnf%;rUBERCu{YvH~vJ{Iw0AQ+kx^fnGHfvj5HWFnaY#2B;J3&d1BXcc%7PO%N7
z5+*l8<+kO$y`>o`@7oIv95t0XUSa$Vc~;&h_4x-wtsy2i_gL!z!=1u7;9HfvX8#4I
zn1&t_^Fb??j|z#?eQ+9q$RkVYgd|+qHR~BuILY#?^x^z1kA(k)Q00Y83&VeG>W!fI
z{Oay#=ed9-I$n+Ri5kdENql*ZjmT(PgE0ac)tNiNAHe0-lfS(I34db+J!CHxTL_Nt
z>}3WX&n{zjpj61X_`S50o!Y>5k9O^so^p%|{<i@GwN#c(72N*a{|!>xL7!eOcnL!O
z@E5C0MB34OBM{1Y-xbEHBupbU=H?-YM~+2hNvg-f<GBW8^?HfJ$U~>hjg|I*9CaF$
zXyme!L_atJ(RaZ40zGIA-sf7V-eWMwdT-=EMEEH|x|dUibe-ya{*HoUUWUfNQBv-4
zOI9LcmaUvpvJijz=JkX95tS!JfLHV`=SyNibc6}!_m%YkFPH)67Kg3NCmF2)7{53V
zGScXz#o=-_*uWssVpklp*(YRqi+>fzCL84z@N8bXz!<!{C~fqBApu7K#26gIohlAn
z#!sx)pB*$Z%vidU${g4ShUb8W1QzqBKEEra!k-_TnVE6!HTo^Uc8)!$;dg-b2oRMt
zh~{>0Ebtqw&tZWBld}92(%{O$MQmEqfB?uQXS0trjf%n+4_ITVDa6_r)iQ5hZpszl
zW{&`7^kcKl4J(42=UL3P+#9fwbE}WaexbncLInCZrRBREu+0@ZROesW%mbO>0@hXD
zTf2Sior7G1{dzqLf`k1mW&XW~wy=*OXaaiAN_AH1y@ny(>gWHmOZT)nBsmtpeWn<_
zVOkth>c&%@B>Q~oU}v&Vf(R49I0PTfUH2_gh<f@T-n*@RQB6mZGH`cy7m@V*Wsjmx
zbpa7=w7TJzAQD031SMa|13o;UyGPD0Lpl|{4zL<Bz}1^|&7j4o6E)6g|3Ym_9i+#M
z;ZNgn?201SwU8BoU54_y<-(;5TjAL?l9tJIA{;;naP4-G7Koh$z;&RHB&Y;6(Ip94
zPc>CMug`KC{zT<Rk3zIgZ1aQa#j6&*KY_!CU(Wuj##ni~Z@MY>E`z0;b6IkWwfjy;
z2|3|SRS|;j2Pw{*rwRkYmM9^KX<ES}%QYfr_ZeD2)grParCT?!_1t-~J?V$ktRfg8
z;)_m+A>JjKGE6Oph4?dA{*j;^hfD;mGWO)pt;0-+qox+{#Ko1Ue3TS)c(??3xY_F$
zU3l(5d?t^QExC0S@(q+;e-@}Q5)-k;qcToM&Apxmzp|cChF+V)!hFm=tV#DDx5oMc
z@@HT$d$3+H7l2S}>bRWq;X_%Lt%gdxewL`KSN7K|J>tyeDJ|od77{cl9e{V`aUREc
zl8vpWeUx&CqI&1$%e?J>{d}sglO@spMjbSQvA%GTO@25TTbSuh8bN2tP?lEp*OIq|
zvd%&Q?Ei`jcrj*Y%!9ax+2jV~#>_r|)hSjYAE0wDL9OCdQ)|D+g<G(q@VKG#SD<40
zfktModSgnf1#J2Zg!FNCT-JlfjSmkYhf;@si7h(uW0~Ucpq5v9X-T1mm+SO+3S=E+
zt=du`t_EEJ=$L@y7_r)8_M^`4E3z59muikgg6`{+02EI%cPMu*<xdnrw+-cYCxPk=
z?@fGuO2R?z?GR)<5R3;Nh|a5xKAAGR6Dn1^ey~&9M|M$C2Z>U4cdv53yoUS0f&47R
ztgoO|wYrHU4wt=AYet(27vBIo55c!oCP-j_160-Q6piwda)X>#^!YPFc6Ys0iz;JV
zU~3*O?s2<OssKrTLt7jlQN$FIhTP%#ZpwT-4){hRV-ZOg9TY`)n5um!$g<bV=8Xk%
z)U}TCiYLUM85bY7GP*bA#)AguqT4J1fP6RD-b4~@3_hAW%|88S;oNE3+^?DCcuz_2
z;OX>;QU9n^R{-Yc1i12s+$~}NV^Ups-%M}9J-*02aD}%b_BUJayIE+^J#fwn34|CU
zI91cm|55*&*8fta8r^oe@gZy@Fphy(7{>Sm|4WrX_W$(@HG%fimQzL^tXm5Z(sC77
z;m6&&_JuBK^w_E(MbvQ$%cy$V;s!Rt*npzpmCmu6oD|r%=Vyeiz5;LwaJy4;w`KTu
z{vCBE?!X54`KPxb!E1AsmY?AU$=@3VH;*=9_}3B?fHQY&fi)ApG=uWFjq+#HkK^C0
zp+f#kZ}&cy5Ue_$4zAk@J?^&IZPJ*h)Ii`Tan%|-LY$n5KYb-S{QMV%IUh<Olw01V
z+S{i>0_0R^6KAS||5sl$&kUT<YiNZ4ZhEltYY(}7ypCI`);T|7gVN`jKjo{2v(z<m
zT=M_>6>zYnrsd=?6JMIksD<x-o7T|Q{<L-C-8BB@by<N<N+f(o6Tdfxhy3S8(SdQ2
zaL1!=I1{oVM`i_b2>JKq4=b~&p{zXO!zg-{0{+u~d|`)x3mBv>r6>P=_Kx^$Q8<i(
z1XlivhM}S1GvA(aD2bUEq8rO}6~E9AooRp!Jd=C)4i$q|BAz|JHpPS=cNQHMIr-JA
z$q<J(%3}PTf`s<%%h?M@uIF3H8Ncc~q^zNV=qO4WF-e0`ii#p6-iHE@>9|;Qq5oQh
z(U`Od-hWHhp?h}rt~{y6_$+%orqk0Crsn(=pd?T45&r&v-1GnS0RDgb#k-Ve-}QC<
z?_awd|99&G1B8S2MHo5<-`SUFJzZ~TznqX=f3<eW#Y{!zWsN~XyPA?xD2B<|oBd+3
zmf?Zc_$Y*WU}=k53>(w0xyX4yTH?2LkzBP{LOa>zm)w=)5AR=dxqmI6(M-hP?5SEK
z>*!sI^*xwXB@~jMVaD!Msts+TtX62qaf%8HO@xJygmfcF#QvUV0C~4xs*E*Mr}bfK
zZqL2b1NnYNSmQ5MUIKbBz&5gR`kejQ8D0m#p4PvPfqpKbowjYoq4dKs{^_@bif+Pw
zRA-szA*YdoRtd^51!&+#-L@Lc=Nnuasq}OmVSiaOw!Z7Ojf9Fq%S|&;=xBWbgG;KA
zaHkBWmRficCv*c}msG>Fco3@VyrWu8m8ha1v;7BWH32#cy;`pgHQ~X(fD9iN%hS_t
zgJzi7ebi#k_pg1|SFm<<buHIsHi;ZvA)_V6`el7DtCzkl%A+zxmjn&X&&t|5%w%BB
z>5j$i161`|Ul6`F%<_a`KNcFu^&{$7G+$=|^*e3W1p|FXb@?FVcPS4K3tvlI^I3B7
zw6n9rEO4qaSzYM~8mWQC62T3j#uws-ZPoHknxzg<K2WMR9eZmI=@r34rR8~556*fB
zWw%MrP#icsJbZCo_o<LjD7_MNsd!n##?K#naQ!52(LsPHK#k)+lt{&@hJ0FHUVg3j
zV`ZD~+GRPEq)5b+?9@wR4S3P37EHLO>tv5uW!XgPZKO$QcD7B*k+gwd7CPET*XFFN
zA=DXIyjfarCgs|VPKT;_8^8;@vCiqpYU-7rYJdOp@}~mLY(r+cJm@6z5BfQ_?_qmd
z(5<bxA39cy3#yr4VpKGN*~h!JQw{@5F7JH^Bb7WT#Ri~HKb0q$ZSmmBc92a>>$DSZ
zopW6}`{q=w!Nt(GSy`9ReBaaA^hsy?O)ufkuuFx&f@iPiCC7ov3jauCy*1(@YyQgg
z`fix*{qo@|N=Mc`F9DN5(*kHfc=vpoXi|E4IUL?9M3;d{tf*@y5S}uPD~(ha%=(@F
z3FDqCOJtyt<o@Bs(LS}iyR1?na8bP`_~EHhiJdFkTG@R&&DuIDz|)N6y~@SZfL;MS
zFvGZiK}C})uQ94-OyPI=ruh_dE&3CPq>Nlrclz!F6N>keqD`Z(_&Yq5bMtRSH)7Jn
z8g)!Lm{5xm<6wqAs(GHlKIJe^($XXK`MGH3Ps_rQk|(zf%3#u-g(b^ah{aV65;WhT
zj}<*3Uz`=vn#a{B3kR2)7j87WBP&a21u>@V8F_h+n`S?URcPFn8y)hPdD`QiJ6Ry@
z!B06w<IRRKZX`c4I2iLOaM7il_k5cBfX@WH(|1=x<P*|utq9+Ssz%35%9tW1`czSA
z{JMQc$&?ZH_tgT`zA~5I7c~Ynx5MpkiQ+ovjvI}Z$eT1qwoECeb%&V!J6re<+NXz2
z5p~=uhFt7<Oj3$JtfTgN@#m4*%|f;}VB=!uf8c>-5bc5evIaDihJ%)cnOI^b5vC&#
zr;-+ghu?gVUXzbI^sp{k%v(sQ+g64aADhOnPwqW7@vADVZH|WAGCHX$G+N}gPd5|}
zR9h~&Fw95^eE;`%#{LTWQz1FIR$ivKBmo=xCZ<icVDAZmaH9J5-1OHYME5>J>rNh#
z2jNLIWB5$aHP6D@TDh2=!K8&;5#eHPY)`Jso<p-ri}!=#+MI24P2F6zLtd{FJE)q~
zTgd1a!0S9M6*1*R^!U3ztCTcjor7iD$GPO8FPU&jIBDG$g_>f1Fl!fR)A1GAO7b-S
zF2a<Z-zTM(#)60qD7=K}Cy*c?TWebIz~xD9VJNkQzWzsedvUyhqPJ{Xw3#FO7j!(6
zBsq9gjEp8yp9fBhbEf?MfSlmN^0MvAn!K(DG<umz8lsLVP87(qn7s^c`dhR)*w|8S
zDNKC}Qvziw3!MlLDX`Rk{(P~EG6k!J_YXRliHy&j5zkY<YFM4mg!gZ>=;G<>T3m*q
zCK(cDEIy^rnqHBRHzLw>vWROCY3=8L(G|5|OX*)LMTDDLOHfKVENl@%4eg%b_3Ncl
zI?=Q^0H^5cx-D&$WBnS}b539%Sz0!5Bdw<EyI#%eY^w%q!cEOFS3;;)>5&KR8*4EA
z7#+>2)v^$p-9U$EXIED%T?g0l;b@hrK0`L|D@_p=wl|xC4z8*qYhS+ru@7WqWLObQ
z%*-yXuCxi&uJF`~i~FY>pm(0>9%Ay}allUSeL89KD5UPuy`YvU3QEc?kIMN4=(6^K
zSMG;JLB3q{#hXfV(Ldc_7dymwYXlb0I`Nj5mvie!DI!nu7b{{ztj1T}6sa7zSXn<-
zMR=&C@(zRl@GwlL4oZIz@s;s*n)KZ|=gyw~aB8|uF++BBu*Aq8q00zjY~-3J?xeta
zo*|#2+edR-QA)2V|8tdX8AGk{7g&DCA75=Gi;QDT!eEzjUz#!NQ~7J@5Y7-&YgJPs
zQ&<}jQ<Dulhp0`qInx&P$CzmEa{i@SjYe(}Z35X#aEdFJiE@4<?XotJv$XOu+ZOZH
z%@HZxJb4o#&ox-dt*xxIGFUU*tPyinFJC^qJY4P?lp^&4{!!vaj)e0J47ZTawaX3C
zbUEORz-$VtNfT&ftD5lblC!`N1nZW3Z6&4kUu*_r_V??ySD~i0W=zuUOZyBsJM9eW
zz(O?)v~P_EsAD)dl%yS^eCXe(!3@)X&|!zqpAOaAU{0XDfYuQybmK<qt9;GuM`_(<
zvKymdO{zz8hu-vBI+to+v}?01W{`mCy2wQ;HMNwLmxtfBxu*q76Wq4|fzET6^5G&`
z9_SqdmqL!yoIf-;nDwZU#<*bMM@wyQ-2jx2ir!k)iGUT}Tu6K+<5DuaBO}~8nv|%p
zGMX=2z|FP{#-xlpRWt%nFjspS9)Rx<qT2b$AziIYO(?ZySWYbkt+^POhNf2CU<L}i
zsBDCZv#nGd6GxORIEN7G;=ybIfiKo6SpYK{a@b(zyOxsD?Y5{%Yh*i&JTJ|8tqc#u
z_r#lKebD`7S|AnjiBoruIUQzY0a^(_M1s6Vg_=|`J9&dkH8nM>etE>IS<e55toMM&
z`u+ZZRVrk!60);160%EHHrcYW_s-s1BzudDtn3|%L?L@+WpA=`3(vXf`};r7>$zXA
zQf@x?=W|`xS?}{c=jiEC?xRKztiYU%Ajy;*dk>~y*ay2GwCy3J8Y_txm-P_J;8QWK
z(rnb(`t<43?CdQ3{X&bl?9FUtEAsMDC&<FJUJ@8pWnTvmpngY&M3-)ns&ah8K(3{I
zDz@9W0j>{<;F|v%qghO5x_DZt)y+*1k?%RJGo>zdIv$GAv>Yi0tIPqR4q?95;Ps>`
z)ZuZAy@)O`DS*QpltuDpecx>$S-H=+B6PdA@K!QXoZ-pfEG;7m?VC@*gx^=Pl#1po
z$4H=+N;e|)HhWMwlx)p0TqOW&o}1*s3w<O=hKpnr&O=?de}Re~<@ZGPyn(tfwR^FO
zofmN)rO7{X{}n~!O((uIi#iKXq42W5b`hAcQya;NA-}=Sd+~<PGe&}kOv;?+hHUB|
zV0EQz%yQP)NJFRXdCWP-B+C6F0P*o<7~^c7&e(k3Sq{@TzEs<vN3gy87Z=l`d>YSu
z3o|!`9jV5+FFx|2KRheEZiRsLyQ>I-CM~EBu3nsF60MY6UQ8)cbw-i?`+zc+Mjbn-
z{clKs_T01&C_Br0mK+);zth89qr~%{L@F)t-v#dUWMK?pql~hEge=%I-aI@$fv))<
z^7FTkJdpm9w#?Az@NhLKOK@8j98$V-Px+AdvHV;}{@g`=mD|vuHUBO47@JL5&A)Xf
zq=ZN+8YGEMirNtQgY%ddfvyjp6Pq;imQj`Fb?Dl`O?n9#Gf*b{TWNp$I&GME^V_ZQ
zb$60f^!&aXl&nj|tw`UoPuR>VYivxj?NWhUg9ffa(v#Z7QnDLr?}UG(#$Vkw;{6w#
zf2EU(HUS-k8QcmdDBj5Esd!2^9NZ1(hJh8sgY*5W3%gjwPlmdS+b-`hkT)pa`u_>v
z{)OdH$HX^px_);{xL=r<%ICy%s9MjN3~5jxe3H)OqK}a21=vEbB=Xz0zx(;=$*O;_
zbl?EF{_pj*?IObNcmSI)xe2|zsx+auD(szg@6xW(s<FmA@`o>+6!=G-cs5j|DbwQH
zHB5riF|P`9hb`_L?2f)TIi&xhRr?6ZI7Tk-%Zvs2`QS!$ZTyTja&3<!Cn0kPb%N#^
zJ3Bkjnbum(R!eIf)MLe4N_B#Z$o&1qsr1o)_llMP+N$HVCOWT=zWKZz`$_YUN7KiT
z&|EBE>_6f>=0D(}PFzqT3j&rBt!pB((b;eeIEop{14h;wVYZI^mH*OQd>1D-%0m0z
zNCYVlwD<tS3@!j-llXpc={F`omYwVduM!@MOGpe+fbSgK`lhC9#`a4O>0PAtcxNvH
zEonWTkAZ3<5MDB&#C@Nik9YlxydUk?dkjYKA4LpY%xk+lJAR+vBAz8Db$==MaP^))
zJRYLAv9KVxLi0ss_Qu;-0X8<a8Ot1@3>#Pz^k!y^Ij}E3;pA)FB1&0?>}|3wBp?;T
zAX%=gruGt<@4pl?SO>wE^VU=vZB}d}dPWF7`}XG|Oa<`BvoKdbV?-LQ`p@ODfh<0)
zgy?1eEg87r1Yt2-qh+EelMs45c>!rjk8d$&&&2efbS*6`Kv>AXw1m>d(h_Go6gZF7
zSe_811JwwXM!&rok<y0}rLlVU#vNK(V!fY?Xf_bnhK`wW*q8Rn!4mbDSN+^@-p=b>
zh^AeE!SmlQai_r2gAZm`YL!%9I=gnbIjN<sy}q#lMr^ea+*^a+pFuIhdc%--t(ko{
z2s0<GZzkX#J>UN}L=O>ga-n-XV<2!{K=~)v$I?nYyqgJo3Cwy(9(T*Z%ps}hKOQbF
z>dJ}m4MlsqHi*{(=)dvqzti5bC|0zx+IaD<th!p&^9|ve_Lmg2J5HZMN|-Zv(|-8+
zUfMqb8S6GCz}s}>D^@LBB!*$zY{;>Ja1>JI^2((J3>HSHrXDK@Z$poj(G6#(nIT=Z
zr$F?3a-;@M50_o~th~xmaj5W|e6$_Z87tjc0Iu2avHe<@EuFTLZ%{f^8|8Z&Bzv>&
z2VO`prF-$<rAjaE(=d{<FgY&p8&-U)#w=Hbl)E#slx0@>i3Cemt=X`;N9NdWPWu;u
zxDU3sAAUBY70??>Gfl1t)w!P&Be!e9)Ygec?vzzbeG+Az7XHk4$E4*|%Tp7A#>!-g
zyHfp68xLT)m2b-LmEEm73AkRwcM`QJzh4Hw<!{mnOnj%gKHwCZ6sLZR|F52>jz(Yj
zZT(GA=pPCTgCNKo$WJGbYPGI`wmLFzXs1MMw1|a2zd?Ex>aVD9!5N08ju}_JLm2-t
zbztSy6YAM6uV&r&g}BIPx?D6p9Jesd?=3m<(Mdg3dPwroEmCTflqe%rT`X-J){-~W
z$Bvh5m2rg-E@yZ%R^F0Z%Y)0@3lD=MQX<p=?95XxV|K~`1d_)c6wR(pAcWWAL>I7j
z?QNzm|82K(BnNpYX`+o_zCd?`aP^2%<4YHp(Yft_))~m<lk+>95)&#86DqRV#JI`F
zEN2s(sAe_8LH(Mc3k+_5PxaYn=*;xW2_oXF&|X_E&jEfLO!ZYuE@=+NLXP~AQmHjx
zzO)~<yexcvG;bt5609k46)p?uV;Byp9xAsXsI+4!wJR{NPJlf5<{v=Jj_b6gwR33l
zW|vl~pJkCv=+%}$KiS*Ufw}Dv#uSJs!AbLbno94rlM{DMly=^cPmIEj{no3kjpF=#
z@CLsN=zR-MBep9Dh<o8vJ4#kq=pbAB(~_G*Ruy26i!h9WM0h(b(Sb$GVD%%QG?>@g
z9t*8*9i5!QV?!UtyT|x3uZ_O1dKIV(2Uy?-Sg1k)5;~S3{y+8Tg9mY->{l*aG}@JU
z{1Xjka!TS)YCX-_AK9DC4WzvLLW@?O0M0dITMJflZ#SrQ2D1t0UT&7sO`)U47v~IX
zz67%@j+wuDW_EmOr$*=!tD_qSAcSQW5jXoK5A(wI|KD6&8mj+%gl*-CPo)s-&`QC)
zs%Jjq8{mm1Ke`5{Po{w|^a;3a0OFJ=%mUh@bH8Ss6<QJ=;Dcgjzx8bK&(84)IPb;Z
zi3_(v#*(;7+`}b_$xX^xjy;+Jw){)#X?kE(i-%BI;!|ESYYPi#e7@=~@ij*YS%)~A
z2|opDFOsKudTSRL?s`dXC6m@K`O=<#0S|d}G|}2Jgw7fNhlNR%W}LNlMo&ViwAzI9
z-=D&-<G#xik-oBhjy{vDV!9mjHdEqX`IsK%s?L3511?^v=r4mh2F5=mX*nSd`j!XP
zD8R-5wJ04u2+sEy%;I=obQk$vl8Gfau+R$Q3WNeoz?Q&EDXpmitYyO)bTIS>a%<E{
z=_a0DTHAMvTku92JA{*9=a(Z0l40Fk^6sqB$MMySSpv-mm|j8|C9p;0iUy96F2mqW
z0u167&?b4i?6f<Qod3y9E6tZLl>62J0*t0*)CYAkJJJl^G`Q#!Z)*byOH%%SkM{9n
zTbo$HdHjS)jUPHVILP3M1qxW>l-LRigv&ia|JT>oPc#^aLvJkLos1ZArtvuyG&VkW
z_<rGPtgS`j%9E#ROl5$%x``2}%9J;%rKQEs#Z^g?BYKVDte|KQhSbsGsBJgUXgjg;
zw8TNxgX3UtA!y6`(~;;4`mP?OYN5)><%YzVV5Fd<=owyQXFjRJ%n%^bBudhgVUG+B
z7FAlFhYYtK3CW*(o3FmYB%BTBjGm>#V<Qb~t-Hfc^0j_$j}3AN3{#-t{U7w?L=nss
zx|vrdP`e|5Ye@CHhaP?IEg<jc|6FC<;MU9uOImzI9L5{>q#Kz!;*Iip%ZG@pz(O1J
z9jBJiS>6H~gX2*=Z1QW9KrGR7=8Uh?1;ZBwleX9c=+)r)Ub?s<iet&YBzwbX!^@I#
z)&o@)mF=RkOHdgJ<GB!gkQ4`o6aF}mQY_5?*=G^JFuZcXM}8d{*>s9nGiWF;FR!Qo
zri}WH`&H*CiO&>}7wUOe-SLmP$eVmc^g!#ob^=oDK@Q##Xh2#4N?f!M9CGYn-cK9O
z7EO21P|eNFqf;sBfZOwwAQBo?d~b|TNEjI!YJ0iD&CMNV3ThcJxC$QpP=?Qow!8ox
zy4irM0I&t1s=UZgNWKC{-uQ-`Jv3#wpx+JY=dS~SP&MM8yWjEq_iuwBTyD3Up-(2P
zYrw2uqGD5?j!_TIYiMw{v$F$DyLH(9G6ot<m5emh){ah2ZX6!N&^|Xc@T&$^2z76o
z+;_`)VFUsX!s7kzqfW_PSBMDIofQKLhs-17sQ@npvAYyaYu2J-$YMQFOLb6i(+;&?
z>|!S>|BHqoo)NG{1Iu*ojcn(Qti{nvJ?9b10BymWCWrSBM#TWo$rsqDQ-a2n&<PHC
zJ%26qyIOGsKycd$Ld3)4e0N%C%}t4GTc=H(YRR2~CJ>004o<}OBM7^P%&jaelqueQ
zNf}j@cmWH3&3{%43jAwo)SmFlUnvAu6%@Yt{&z>8LckrIwANku3LpUs)cNV<4KN~`
z;>swgeV1M<XMr4qlQT%O1ofLgV_$s&wuvdCl5HWRdmSN8M+wm<grYu`W0wD;-Ci+6
z!u{~_d1N-lp++#NzkS{GuH92PjW~KXwvyc3yOSlYrqS08TTEpHvkWzUD1)JSLNkm^
z?Dh6u$^VzU^TCsfj?I>qcFZs%{wX!JmHSYp>oi+~j_sTDCOapd+P5p5`dqnpqChMO
zBECOy716xl+eZ=qQ}2#5wxdhC56L2vlW#9v)rZjExxo_-BM*kX|D?4%=Xms_^@vP6
z?KPh4sT;->W6?c}sHU^5V298pWqDzuMrGEUqt8){7x_L(X`!KWJro3;gW)nQ>DUWM
zupgkbZI>cd<~Pw*Fbok`fYGrZ8XG4KIiZOQketHhK~J6sj(~4KBON&QQz9@@4mRAp
z>cW~+KWcm|We@v4V+$>x0iBEa22MFFY%Q_9j=X%&moEW`d%;0LgX0jqfrtPT@O$vR
zgwCx)P}88D)pLPwvS!Qs1&y?3oILai;BCr9k`OV0F|QoA<kpCvw65t~Fu+8G1rqZR
zEr}e5EZ}W7y@-F2?xVU%YjAPCHy$Uzr^C#RO2fQ=b$N`*qgrZed5&43=&@hIXaFq{
zxYIo^Jqsb$CUm;)V~kft8MUm2vuEeEO~3tu3=~JEf@#h;7HeT~u}U5VSWCzgbxwuQ
z2Ds@J(n^7+ZC#@k0KPTFty_}Jfz#vT%-8zDei+!}DBwbJc^>Ozex4RtdhHq>9zIBQ
zuC<``J$CR5q6z`%M26*Mq`udT2Hj^13ky3NnT@}|3^d3gQMlhFpTwKZ_rKqH>;N>w
z=xmCBpFcJC(e5luLw!WQo%iY>BgcEOJhYM21X9RI+Mx28FAoh0b38w@_;wy?{yJ}N
zlv3OOw@<pdjQy*+mCS56`wJEmll$%_?UCa)oE1c*rj2@DPFJd$`fb`(ED{sP3w}Ps
zyW1&VKqu;hUX!hX(^(rfrlOm8xjHN@?yCwf8@F=K*U|iM^KDK9soj?4QWIZ8zq$Ek
z8j;PR{aqq<<D^fsmg=asT<bYh{unjnEq_Q8>6k9GghEJ`(PJVPw{5pR18h<w4Q?6D
zyHe4uGoSoh$1I^I1_B#*9ww+S^<=Q^Nnf#;`5wo-`0kE?i>;|C+ToRQ*%n(_df`i@
z$qIc}if}JX6$%Q5Gz^8q`~-cd#1v(CC1VQ;0Gw}bg7aCYo!H7^VKrXe{a4{3lr?Kg
zjkbNZu9*uGA%6UttONc1@+DPCyn6u5!As;3AR9%s0zEi5IL9ZahGT5Ot>CCb7b~x<
za1lzMsPh2L=cDh10!9(J1k)U_K1e!L<PAv6;G%lAz)Z$gv<Q@MrpaJ<0x_8l(IJ1O
ztNXg)(!)zL7!Rq)upwtd*A;Xl%IcryL}8#TXQ^m`CVQT*%LZN2aRMJ(#@X_|D!l}y
z`6?<oTg7OlL4chNnGbPc@mwmWXd$xq%VJQ479fbXB1r5N`BD&3ukKo~-xg?o?{~xR
z6ZdC=FHv_R=d7ojHul}@K2wtZIXRt~(VxKRKu1&SK);t@p|7uBHqCUY9QQ)2prBlR
zB8)kQ|8&E?Zu#n+;2s=bk+*eqW49r;fUd;2XoDjo12BI_qS>!!>0Kn=plFME)$JfH
z$A-ya;irGp-5@LnTUX19wMK}Bnkq-n@0K}T?2(6TL#EF`BNd(bsi2_zt+}=JaLHS<
z<RSAM>qx?9KvP3l<-4Pf4|_1eVfYL=HY^-eE*_qSN$W7z!XZsqI%vR02G1PWv-Cp6
zswFu#FwAvyev#KR&^m;!ddZNSZmd1KscGsrM<&65cQ?CtB#J7@|DMS(X^gWv&g+Rp
zhhjki%wXJiyGcxZ?fUinZ8cp&XflSJw>w^AW09O|b9eXn<i?E~WbJ(z@FfV_5E@H!
z%ec5+w)?CX6?so8x{gRb-iYrh`>j}7becyCJW|oTk?+*;#h|tI@|>*7(Bh)8>;SB|
zEc0?ge~?%6v_V-fM$o4@S0VjaeAxkrcH0axoyf3*VeD(yk6YNmV=H!K!FyJ|;MK@Q
z(Mk<e!Vn47nB-mka+U!Mm`Lz@!<s!(WqA?Xknj`YP5nMY9KV2o%;)uO7tAAegw7+0
zn__)SyEFPM>RJ~X4-Nt$H5}Q{C-0jndv>!JR#UzaioKy_yoDK-CE4AOsUV3PAoHBz
zdu!Rd;}b8bXiS`9_-dX=AogaCSJZ)hp2V9hF4S0M@On1va!Ae?v6v!^Z%V318T26y
zeKOhyE{^_Vp@hyhrJMH>ZuF`D!u=3Mz;Hl@eZ7$w^%2E;5Iba(`o}7)J-aL);^iX;
zC??nTZ`(WV4fzS(1?B!*0sGUc=sz^<txu_Cd;d?GQPshNORnPK?>N7eciEAVy5Fxb
z)EMTHZFWfLG%~g3ZlzfY6!&fixHQu=JB^g{^Sy+%%)SsGRKWmdL*d9dOkXCn%XEdD
za*BlDQEr9bMe{2t_}1jzZ$_!0Z}rb=BV{C|qd!Q=P>V_{z6u<7;|C9&*VS8gp+TKx
z3%=+Gs_cO3<~?PH#JWK2Zo{K?vWN>HT>p`$5JgIa6b0tsaC6qBB=*~QQGrl^cDLag
znwmOuc){`&Uz8N5F7~(EOxKxCLenFoy8lnQ5uxqG<BhMb4P}mykiEef4x^O<OQE8Y
z{5l|lNnF)&zSU6Hn0YUNe?DMw5nRD8W6S>QS8mq)`=BUHq`<e&W!gP5+hG??kcga9
zwVoGsc&qgI=4-7-+{~XR&$SW?1KGWVa3?Alp=PNX7u?U)J-1z26~)@g?cC6qlhbCa
zgt#M6IcQx|@e-U(N;FG*FVUDJN;F9BMAIhMuvC~X(`&?QAh^k}Es}$3zmr0f(x5Eu
zW-m`i$5@?_;bH4>U}9h|Y|*?yYxBus2#VzNqYP-(=O$7%M)%W8lnoXl5A?cu;~MgI
z>#|_}0aVHC)t8q?XALU!!6_pS3(Hz=-dAMFFQ~^SVM$+lAUl9V#?)D^Zz=9$>HlBH
z)T>`+Y)k3GiNqk}T@g@~u_!kQIN}YERRu_Mou@B@l68RR4<8x!a0~P##w>0W=n2n~
zM8EIW4v#y67>egEpodn_?;bOT&F&sOQGBAk5TTqWN77<9-Q-zhdwj65wzVY;E%e=k
zf0{F*J&=X@6+us$Afc!-wBp#$2fo#YGwBE7w%7Xa%N#?pI_v;-$#y!QeHAnn?_+DU
zMX9YXo5<|usa~Fg2-6W(OlqIwKrI+q15$={O-d7nQtxfPfNg=ZUJWq<{8<kniVcP6
zY!S-R$Bc$PGLQZ{`lZX`gM?yc*4DYz)d4pjsi?eeADEaZk}t?qxhD*%w#d06RSOjr
zqN-#PV{Cv4Vh0Qec5hv?%L0H(oVnMle?DT@ML@N%s;*9huuYF$eYqyv2b@AZcFFBu
z&J8DC!u!&CYOt7?*x2@DYAPyX1M5lr189DfVbmL6KzAdivp?Bs)8gw7sOA^k4t>I|
zeT&xLHc%T6BhXMI*G&}{6@<@)(-?+4;ljx@KG()<XnqglflWdyMQFn!n-U2FRChf*
zJq5*F*l%fvPOYMl7(Q25|Fn;KZj~4qd_{aB+B<rG^A<V4@UU{0lnPUwl9vxEj8qHb
z8k_7Fo<gJjQ*yh7<xanhGgrzy8^B>DUX$~Sm*{#OEx<oN?2~*hpFhMBt6fa;Xv{o0
zIRSN@Hv+z5kkCR@xP4{Ti1!zJK0*$g^Pm((<ny*$BUCqt4=K+ny~dymqvnHT2E0Vl
zKTEzkzblkLhFh5TxVIMe-8fV)_<{2j|MMhbN7l&i+>tZEMt_70^HJR5E`D3*U&x8_
z*7kgnc?AU450AwZ2~mBY1LcJZ&2M^(f{YCCIfNKWVG*<(!$td5Aj*w~dKrvdEm0H!
zXi8><K2h#ZAC3Oq#*Z0WXp#N6`8?HtqVT@A=3kP4a%unYP)l|%Fv|!YxOSuzLsLr&
zYsO`Ll*4)SMSG-1F-+1?1!E5SrvrU4%nOe7`WMKZfK$Xm@SpKR2?UG$eQ8%J8tg3O
zRy>6=W~f~R?wGqd@TI|K9A5Di@$_ueH)?g!h5vQ~JWDGTw69Dd2mwD>`|4ZC`M41O
z6%IiZJFrrzS=`;-4KmybMj$06g&^n&JXs6MKnb*N9bJ^mQy$<t`Hmx44>9W>4d@%z
zLR3z|<mMyxza#I1dTMIMY6|mZ>`Z++HKA0B^)CE{-lL`%Ys)J@W>T96S5b_enVAU;
zyc#(NUWC>cyVT76c!O3@3c_?!v%J<+a)e^9+h3UW$l5uY`w*b|a*xdJY{?O!IdD~1
zRzjJ@U6w`|S68PEMjQYb3U*|Y#afe#msj#EyewzYCp%I95|9d6I(14Cxbsk?&z1=r
zp^-BDIGAj*Z&5t>K^EmLRNDH*Z?mqeGhP%hp(wDA%6CfAmIrX&@DoW1&#L81kU7)(
z1Kz2t>z7xx_6rzkyU;n#a%vj+_dUFJJAod00T%{BWGT?pQ?UBeTX`erV}>T0Jhhrb
zneS;R@<zT$5#C!q50SE`KC-WXlh20I$&s!8v%kM(YWw&E82q;|?`mf(xq3Ptop^Z$
zp!td9Uj>=$#r@>w)tpiFTWD{&#%7v3Aj5UH<;$s>)WNhIwiNk?O0=2efgKuc#lD+<
zWChFndlVceXYmiOdjgpN`J#Nzq7~F+bmTj!a79e<{hRQ{?p*<Q<J3E>YAO$qUaBfE
z(!5tG9`f85=1yN<E@z?FLOtqUu%~-PCZ@R8$AC1k2%qe;H>SVxl%2rng-Qi!&^
z>ntxa(`*q_IQI<TJZ9tkgJw|FxiB|l54mJ3R>5oxV}PkMeFgNab^E&ap2HI%z>+f6
zBrhDQAH|F3jbz><MbTZ@JyPf8=HXdOF(Svlp2Gk`f>FVRR_0oL$3TW%A(&{g>B@jg
z%Yp$m=Q5KU%9a157@kflXoYTvwB0U<Dtfe5CSZ;?`BlI*<*d4s{RmSy$1t-9>vQFM
z@v;DDHB2Qz({8tDx1`>T8aek&1&|9|R7Jw>J_f01z>!rR5Mq@1uJ;S`owob$QE5}D
z!NFkv&xb+je;jeQGS_SQPKU5S&ss3{eFt7LaD#AB)0e<`LcNUmb_Np*2w@+U_)D@F
z!p#W@P#;-@L<aoF1&B8aAYvEj$wNb5Eh4}U7E=Vu3|Twy4LxJ5dC7=TsR%Lp(44|o
zZ!@D^Zns-+7V-&(YGgSPF&w?1{{j)OXaapY6+*fh40OnlZo8QBGDf@2Vstq9QPyhT
zgV1*a#16t?&%X-(B@AK2;bMeht8$;S{B#srlXA@f(^7n$8lLsuEEzlvVHhBkF2ZJu
z%rh8q(~_UO{&Ejp>|N{3+f9B#VZ_-x)2nW>SWV&ese_k7gl^2#K5nLn!vD7i_y602
zoft?y6R0XBn^Vg|YFYf3K0A;5)Vp*nE($5IYwdQ3l+BQLA8?@IwwvT~0t7%H)>WDi
z(@$?LxeA>GvEOb7z5`#SzBmPgh(M17HJ+HFsWaK_HaQ9#vi34l!{U0&tIg9rc4UDu
zZ3dwB!*3TyUaqaHOTvywuZp@2H*^(wDArn9E`K=3E>T2I0g_oCJLsBq^|H0Kb-I^0
zWNuVaqFvYj^JlWRL|4e^=_yPE)MANG0Q&>BDCqeeLASlpvKMtK)a(P-P2y{oDyyrZ
z|2r+eye0@~|MYY-vB`CkPyZ)*dv&6<i<N)T)Sm4HQU)`*37$x><ek+#nZBUM`uzQJ
z=lTy8H}g9j7tC1Kcc?Y^7c$A#hs#$`V4Q91M+tn|<Q4Mm=x!kPn|NjODC?t?s-rjX
zeuUb&+Nh|&UYhI3Bkw~5oEetLTr=rA;?57pFVoU=+OIymf2SVFHW0Z(dgl%|VI<aX
z{Lm-AA26~-d?iAt_!K?5GI41hHGp{@_1PW@Gb#;-F=Misd*I?Nbr#tH;H!Z>?{1Kc
zi3J!(gz|UqH`0U(R>Ri;yR9)*f)W7KL6Ao9W34aAT_F9zLl`9Y->buGSt~vVfUjr*
zWx_USb$wsUUVD{8Jsy6Y2u8Nk-vuU!sB1Z4lA7j|!mIwDhlHh&zw0vu6Ukf_zQ<YF
zt@|MAW*^8_jBgZr@Z{Fl)8kNhTmEgOwywg#vRE6;SBAnq;SZDlISD}E^g3D7EjdN^
zQrdNKHIY~dr3u0AMFv!F!<vA>rkhd`cG@RF;kng#sM?&>JT&!?$NuCE4;xJgB@P77
z`7nphxq)%|mwnw6MMqvTF8mK!4EYkO+CQqJtX$H8@qmZ{?9=M{6XSf~$0kzmuI{@%
zj2tNWSf7VHQc+H$%i(_>r<*{cZk6PvYV%br3=IHr!}@I|YVrCmja@-T{Z3-K%F6y>
z&=;N^pC-G7lN#LT$5hF~<1@Bse!pN2-U@;Lg5`~|pv~kv3qm7LDRCv~Rm7htX43+J
z!&e05GZLV?dT&QUpywu{ioqpAsA^Z$eQADX?nU(~ht~cdg1?Q0qCw$Vf`^w`8|w<K
zoaZ|=loqR&;SKnq^|bsCo7?n%Tv@-X!sx02jEq46A>+c+?8WWJ&9hMxGV)uA#cl<K
z$BQ4hF&=Kz-Ndx$+1v9%49aQp4Gk6Rat#fgNOjL4ev^9pjg*Mm36+TD)z=ejUZV6w
z`X5EqO+!=P3haqp44n;Q1GkG0<W92Gr$50pbMq1!9URogyPsRF%Ofbe_V%V4_+o8w
zP##_%<CF}<YN`pCAGWW)Dhd8?a=-j5=_RGI2cAOHKb=F*g`<P%zF?zRs;7mefrVvO
zNQFf6&tL9vy^})G2V0Dljs_#TWnyBfjXPi7<bSY!TG}1?O3um2p_%uaxOVnr|1E>p
z5#F^AxVK0D1lJx#{ai}8&LmGZc01*lTt=#p!=ve^q-&(LLsJ?+VzIECg<~P_<g~VX
zohTSPc5ukN=Y;&^;ZgtgY`qnHr<BOF20m)G_Y#K7iINW;;LUQ0D=W{N-x<NZEebB2
zYKbBgQb)3$RVb4S^4LGm*eu^L&F6LW5hT9vx!5w%vEO~Or<{nPrSqqA0`~)jx2p-4
z=cx@lDT%%_{}yNtc%%t}A_5poyFnQ*S)>@90n&*Ne%F10IwjYu(^zVZTYGMmC8bZP
zPy3wgeT%Nw3~zfZtA|jnF0Y<kIOb>g46grJd9{fpzoxCC{zG_NMl17DbryIpjJH9E
z5(LU*M)T*Og5C0`y`)`)sLJ!dMnyJyxp_RNTPmA3>Ft>H#K6YU$8Jq)H8szD4v=xG
zT1&U=eMz0i=h<l?xc4gjWKU|9;j8L<0lVyw3i&%ia31J&`E=}-UArD*9AP1MmQzPs
z`X#H0mXF~)FE^D2VkC5q7xM7a$XylDhv3w_svb#wDIK*wEGW2k`BL7124FqEze(?E
z#zyoxZmlZdndDXLm4C?ZV}C~T5;6ag_+&3XZLhwUjlwY5>wUeuTJT<<>3AO}KW70>
z2Q0+uH4fK5E(^>O44pSh&B5C~3FAF}yw~~W)(g)oYspQil_iz=nIRSJh--Tbr5QpG
zhlT{VPhj0l7}l;uwLi6Wa1i4qGZr?w`bd(y-RE{fz4FZ*4Bkj-4n<#~G{CGLHi*PT
z8F^?`qcgL#+i@*vgpuZ$C1)`#Dy^|4w;6!T$;>exKNFDeb1#9pyBlibjWh<h5ci#2
zR}7Rmj5LgMjFqeCJqAkaP7!$|Y9y5b$AwVVkdI~-`T0G481(~mpdjwHA$OYrcQ25M
zEa9|9HR$V?YIT;dEWO2wj}pr1%hG0>f~rm4TmHjSe?r1hn8TEAAZV6TJk5OgSDAe8
zK38`@isW`(yPI;VnVGVv#`tDZx(<JOsPAPTOATzaFJ+b(Gp<6nFq6I0fX|FLg2*p=
z;R?9gIY4#{(O0Cz)ob=50ZU6D*gP#S=)_sh+Q%guP3xe~*fVo@9J6pXs;HiCCLAPT
z)H?I~*Dt*mK^BdchSncr3-&o3-zDv0Cpdq9P3r6n%V!8XHNWovNRbetXNW9#x>E-B
zb)RV)eTm*;6Lo*PpWrN>#Q=nOnBeh{40$kg40NMi({<Nd4Y7m$ms}Com^+~a=%_fP
zB4Kc7_+!Ga>o@L$?&(QovocQ<a=0_F`sq94>^e^FclQO!)sHRD)Ly=6jMkx~)Gww4
zH%)!xVCRH9zaeNKDl<nGOZoX^Vm#)~9dUy$^K)k?qbJWSxpk)Z<w@pWg_bAMky-eP
z`uWXorngKQKf@dyzBKe0CF6A7-S?c*F816HoL}-r0-$`#`v{i-OLVD7UY7%s_kcal
zZTFIArxZ3f`-(vRkDi}5=le2KAAc;Q%|d^&Eq4dp^!+K3oRXsU@B^p4_sYZNg^8!t
zRO3jJu1Kc7?m_%PTA1h6?mP;bqs->!Y4a>L5>|jI<ST07wu3TTyY7N`Y1eFkg1Twv
z%qrwdf?{T6^&nslaPm<l3;<___ezHA=}CpoR{!W-tDt!Y&pN>-r%LALRZyrU97-r+
zjyx9m%m`Di`q412lOOS;3(tI?HqtW7lVv6y<o0NbWQ%x-R<jf`hCLSj3d}$V!XVGi
z%L{?7jIhN6SpXZ71ic}R_*T>g!>?4&V3Vvm)heH5&m9UFl=Wd+4Fe&P-F$rF_&|v^
z>ei}mn;qFkFu9|xEqlcn=k10dO_HGCV3?=sgckmU8e$ggz{jzHPhAuMvMXTXmtta@
zoCf5uj#(p~!1yNv&(~c6E3uow_uevuL1G*tv~>*nE_B7<>Y-PKGLyRT+S)rI$(CAN
zo3M%~EX9R_$9Llw-&saAfa?4H(^(?J-51|@S94F*J<p3TX661(V>g`zukBw+d*2Bp
z;DJMuG=t3+)05KG@X$(S;^-s4Ag+z}IuDrKyM<p_GoHa};r_MyU5<x$J1lHvvusq;
z^Fu@bhfgnYVuMZ3{8;fWq+)tvNW22LbbO8uYAL5*7_FJH$=||q)_eU}TzauQcH7W{
zN?4Eft0jGy0sF1NjO^AKw7|y@emtG%zf0P|EtK&(M+tv%#+3Zwy|&Sy%8Y<kbYqX$
zTf<h;gklb&{>+g|28&j%>3NRHuKAQ`pY=+?yY1IR4<?)Eg)+RpYOYGkMkzD;pHkp5
z*zFXOsX@;WOSz_nRLq`TOE+Oy8K28v7KwCz5=dGydt`psY=2FD{ByX3<j3aM(Fhv0
z=8vPwQy$wa-5c`7FTh5Oel*(EbSzj~8uSy-uL9}$Io%|3(lvUg$>pr>=KHqdoiBKp
zys8S$n-kDYRE`enLgq{|JjF|<z0dpyg*CgMrra(#duOuq+b&re^z9=GX*h~VolgJt
zWOhDJeuE3gR?T&`W8=WY3`Z>Wq--1K#tF8sT)AzdN1ctMqg@gp2Sx7#h)(uQ2Ck=j
zdnpF9f`nh*&zS?SMb3NGKZ-9#aA(m7Q8C-=3x-f@BTY+Zt#2#)W5wKV=ao5mk3RL9
zg{nKdgn1}+m&eMB#5FPkOeABT_1R2n_r&KHJWYF#`bd&DgCs6tvEgYN4C4!&tA)rO
zLs_ZJ#7tS>SxVL=>bB=|t~$7=8u2mv-u*r7pPwe%rdJrIlo)Fp{XSLd{IOF36+?*+
z@R7>;Q$Ho($<AA2&?d#B-PYNr`K0=x*4N}ul5Lq+7N4oM@Z`&Ov*&1(hsxIu&@1<Z
zUUn_A)|#y7aZc{MiE+{}lLoe1>qW!<R-(fdq`<3ce#L!H=TwERd;|WQpEv9OIG#lt
zR`?3Q-Epuq4O>kXE_oppHW9MR(;V^ws$a%pR0gWaS%bQ<bV*0=L<jPT`~bL1gH>$D
z--vTZPo&QA?z>suNPAiwR<<?As(y8);ICd?zeKno_e5RvM9Cy>j;Z4CCQ_8*Lj_+}
zRhH;TMqeObr;nz~wcFavlH;$BT|IEuQj878Zi}k_n8lF0oD{3!M;VeO>B=deire0v
zlToIOL=Of=eJrMC%;;vB{Qq6^^<wXH`>AkG{mwrMOQnmZAK5w{SLZc1&?%`qG*2FV
zli&KdGr+=&C#ff$J#w&jXlryUz=B(}b~Q7rNAs$Bp9u^#vA@-`d&H4Ub{r=+x1SSV
ze`BOSS4SOLwPs&8nyt;XLA(JeFhxGrwx%3jPp=L<iKBw5`Z|)=g_u*i$R|@5I`Cv8
z!-mYB7*^L?v)iN(^_;{Vk?+8JS3BYtO8>3}#>mx_TDtqG;08c8W|blrIiEas3mxKd
zUj+t3Arskw32UAQN)VqtEnW$yKtmm7(yF$%iwi%0Qp&SZ8|~y=$naYlj|J?}M3ww}
zi)8l0iy3RIoJ?F;d6%-`(;e&m<CBM)WI#vOBw;YQD}|5s0m?pr*NlZTRsJk#dQ11&
z?iZ#G7vyp4fB&d6mC2u5cfmRYd7-h^1^Yg|YkN_|5p?N4#?FE6(Tsl?AdxANY8JJ!
zZTaYv^6t!UH5yZo;};(E#4Gj`ETmrR)`F?qz{i4Ysc>26B0Yf^$GDOaPDCMH$FGxa
zy=u&~QTTwjQIIbG?hezU9r?_Bwg*RO$gp7s?g72$iWu%fS#eqD<7QL~)&m47Pg=u`
z<J%tk1JZ2Pi0yC<JEp7axw}cDYt77ig;CH?om-?f_X^QTvZz=_U>L^9<#`tFtFbdY
zOvm>%#HrHi-l>uZPMF^sG1@mv9xkf*dtZv%jHKZKV?9stkdq{_TIZY3$I?A)Y!0uU
z{v?awhkH}#c%o=HEoK&F%l*eUsh|M7<qhYx=1!w=<?}s8LFx3h)KFC>4BX%Ei=hpb
zbv2UwR!Y%`exCp+vlM9E_>kaxZG|9Dr_L<TB3}entg<s>*iLU{R{I=)1B|LJ%cy$a
z!hjsCwB?!<kE>?PG9GKI=0@_d*|IVt_zo9IHPtn22(Nt}f3?#FrLOA(7?va7+S(c{
z?621_E7&u2rlE`(8!uMNcyo$%Pa0(mri#sYiF~N_-nz3%WZ0aPL^46=W7Gl?8Ucz5
z^%0>M;vs;nv{Ec_27f}OU9r=bh{Q5ER6gpkS-od~cn}II1$rFFYPzN#k}U1Jwa%~`
z*%68X=7F$`C@27OXEg13-cNOz40Gg3`~o3P*HgQ_$An_LhB$+tL|E~Hx?(8NxD-+2
zLdeAiDs6e8w&0nH(S4~R)G+3RPB9bf$U+5%z1~&~Kan-q>&Q7%;b*A3r2=421bPPa
zUX`=375fI|T<JT<s4-HJ#;Er<3weZF8sZym`ck*BC}rWEPCXazpEHsl%f*E@Gxt8P
zlGX|Bbn2$(v;uKxyP`+v-H6TK1FQ+G4(@L)`-f-y?i0)%Oz6xAoT)REm*{#)kghV%
zVqglRZcpmD55VU-1+aqB<min$fZybW*Ru<QtDLC5GXai@B%tcA`U2L)P$rQi?3|mN
z+gIvE(!Trgv^@JqE}&j2DvT9vxqK!y{u{wa4R0U>1wQgv1Wmkr#B1Ro2v;AyO{slM
zm(w0hQ0<TE|JjjK>TxPad%_SKWOXNeqVtcL!?GX|n*U1)(<lj-?y53oG&1T}P%sq`
zZ^aGI2i}Grd+lT04l3oykAFEo&>4nyCLG_>-F)Ye4+Q|^<T)d+g%AwZc!s0^w}b@{
z30@}BJ7ULwq55t<etL`NFBt%u_@T5G+c%Lg9|=A>SvwF+p^tCTiBYGWHUnXQ9txwf
zr10bl`Y@58x95S_^aAtP7~<p<(jbsji`R-i3vps3B_oqbbj24_^nM-7>*fu-FOT;7
z;r1@*bLQZFoyaIL5OS7uwb6YvAgOE4V(90SzE#AjVRP;u_|c&~PM#Jf$ChBg&cDE@
zI)srx4lyzDXsGKPR7w*DibuJdCfrVz6j3BpQGpr3ykrO+)5=yyj1R&QZJIx;4N`N>
z1{Xr)()p-zMxWvaETl5c@LJp2M%h2-Qa9jB>1{KlFDhV4+YoIqN!ohTlnCfJi(;~k
z(4JrE{RE?=YhE(+AG-L_ulWP0UGDj8esI^9b>lR-C$&BS-7o6Y<rUW`jnld)!Fh?V
z=yVByBabS~1}!v~L*6~Q${yu(U;!TS8AhpQ@%sLbH`k@?67Zx;H=8_<e`E;Dmq0O?
z_ihT{Jdfj%fjhg}-hZhipq-OZP_(C^s$+wVxppG{d~hr|@AY>g%R5CXU+pIPjaxN$
z&`XX4HBX&#%h}9@J3ltZ&c_&y&248VjSO-{1T|%Apd@&h!ol$I2s}N!x(q512E^r%
zo-STrAQxuXW4_94gJ@}<JQ?vGpfEl_w6E{)1$u`!o@#}+mDBAoSzjx92<Ic<Saj<N
z&WUK)eX)Po?tHoe!!uo7dF5QXR1&$@s_#hp9%tfOpI6KIv>Ajpo;DCB_{RTCKkaPO
zDfrgfp4X$}0(jZqrmuhzRn%IKFNT{`{L<d}Jh9x)c-emQ-RmVGgbR8bz!cz4>A21!
zeH2%ZePcH_o2r*WJBthbxj`(vX@7%lpJNBC6(nV(J`rmsd%t{xV3GBT&xELQi5*!U
z1BOvcG?A~8QCL=~tZl=&pFkN1o)~;6ff`!*XF#I>nS#I*MBNJ_52E+WR3v+Ffz&U%
z9<=zGGKuYTQp09lkh6V8a(5h74#kC`HQT6G&$Z`Z?`3yuV4IHjT&}~*Ox@(zJFdI-
z#2L@A$MILH1WyE8kER`kV(NpR_%d6JH$Di$S-&k(d+$<<)4rA4N}H^(SD&r=vpt-^
zuqO6QND9s~Jxx3p)5IdA%383eQbAH?qM(()fs4TY{ytRE9!bF0TH~l!2NO5`hp6o(
z-B_#w2yzT#(d$}F@autodGk9NtE=2BY2+jTvPGx@a%86d9dWN$PAK11+xv15d4Gp-
zkf_WZp;Lg%D!l<o+Y5)4HCu8d+(j?7j~FWe_=V@DM{eHXK%R5C)l${_Q$y22Sh&g-
zfXEO;dS*)6yTamw`-FH81_nCgrzVED`8mzi^$QA9F5lH>SH2zyxW_Wt-eD{^*O#e3
z1iD8?1;U66{11@>%OB$=T$;#H^oiQFOErx>UdiLxia$Fb9jB=BYqWn^M4?9u-iNx&
zM1o9+mH<l<kxO{9JB&84D-48BIhv@uRf3)&6R9GPqiKgmMj&&B%pQ6!izy-fIv{rg
z3yN8@)Oypr!Q1Kmq979bn(f{F>A-gDXCDBLABiV#r;OfCQ6Cw6FL?M8ad#3PuB^#d
zV(Q~Zz~MSl5`>!t^hTWlxC2ftcoOYjg;PoEtgR&~)LAmxUISS7Iqh_8y~F9iOJn^0
zfXR;v;k+Ip^<wbuJ750LPJAuCvOKX815w&?%IIv`ox2d~0hrc%M0%G6iM371^Ulc?
z+5A#<r{9=|;DRD{;n*t333g#*%M_5m_pb7FXTwo-Sa`&AX%=2*s|o-a6WWwDUGnT)
zfn!_qlYeYOr@5l7caa~KZD^<)ZfAJ($<b*eyoH61g7Fi?Y$H$JIzpDv?%-&d_C}s7
zYr&Z#V4Lw(o;mm1C)3qNwk$Ja*glK{7N>F256>=>M#`-#ki!1^4(VAhS&j@J#Sn5I
z=M<WnU+m<}nGQWyL{kf}_#h(oI;=?#2rCLA)OKFY6g<eLsc%2^?R|ij7HPC3@63|N
zH?UDJ10lV)MDJi_YpW)CxhFav(#32`gMf3va~!Y4K<t3?Gw<N#GcrE*p?!xFzH^8a
z&rLQ$0d@qS+4|gT<^fFkg_Ap)uf4R)pJnlc^ELpi=Y}$2dz&s&lkW)tjm|8foxCJE
z*E<-r&LFe`6p&$d-7*lO!qm-qD}GJPVwX<4C5i^XGu6oWMChB3X2-_fD2m?KfYpLv
z^8Gse3j0k~8%UYlZrO&xL|DudFun$e4Hgz_Oxh0SL{;i65aIy3+O_BLaL=^J#>X1t
zM8j?wWh1Q1q7Pu}FwyG_CL(NI8|k}!QUFh`;HI#p=PYMwY5yGj?K+LZKfK<tj7Vce
zSn=ozbjo=ROj9J1lSJ$r-K8A(``)_AC0>Y<boN{HvCV>&NN$iV@CZ;H#F{;iot<Z4
zOdBMqpY4^({70W6Bz_uK#`wLzeOF*@m;yT(kNfKK)DV^VGb1tG=3V9uRbwO+%JVGl
zrL#;%-CM!K$1QoqbF!8Z>plIs*Jbq<uZ`{VYs#c(zLzb|{t*9j{XN}FQLO@34I~uF
z^`enrv-v@`a2fTF^!g;03JEru@Ww@Weu$67o<a5k&!l<sbR7`;#YY<ko>z7?2Xn{0
z&6NEg-j-f9*ZBVVZIKnv2JIAEUQPCg;doj{iusBS=kKnD^K$nN-X)K{6Mc*}Vu<@e
z<0_FS#{1=69jVNJKC#bbV7d{C=^H^)jVy*80ibcPbjs!3Z(G1c_1!THUyZ$1xbmm$
zh3+`wLF*-(SChOuJDr`v;c&9O{Zgtle?(~4(B7^HNe)|de8HfS&a0$Uv$5Oc`|W-^
zXtWYuN0GCxKNL%Z96x-bn;@D6)E)p85D8*CXSXIT8^|rdG9D0mv}DG|`vm1M%`z#+
zHll1y)V+i9&F_twmaUUX4SR@YuQOd?epmbblDf<{jUg2yANhGf3I_&27N7qd)D!mX
z$HnI&it##I$<T)d^H?g0-`Wa!en+p-FLCTm-^%?Z6F0Z+Z?BCsTO^301J`W4c2jKs
zjC3~BoUcrHTxnJYG$o!1Y}Pvcq1Z?7AJ&BXe;mqZDnmVdP&f-<7NTv*HND&j;X!H`
zH|c)h0i8uTQv1ejn$zA`YT%jhRL1m+4zxLqM_X~0h+#>09=`lCUi|}-4}gd3d>I>a
zF;Hzt`>LS1L(>221t`ECHk{Zh!58XJV@g0O6?W5=l+@e{8}BPeXp5yzdf0Utv2i(#
zucmu)e-7Zv>}@Xj_JAjtIf7mlu#)wT_Xjm>G9JwvpwPys0rAf)pTA$ZVd~l|+pfqD
z1|spQxwGLO*pv_M<(VF5Qy?{)&&fHn`Ql%oQ>aK1f7oh1dnCSz*7Be&6YoQKbZM41
z47a`?%?fcDCKKYZ&zpZ!w=hCzrHV>F>)9&ibB4FIh#L%YVd*RTVAA;a#Zm=PrR<rS
zCmBiu%Ee%X_>L>S_Okj*JWsQvna77%{<A-C$-4z22V{B*2Zv>_z8{w8bp8_L!W4_E
zUoX8oA|O^9KR9GPG0jmTrXljvxsuS%`NU8_x-@em2~<*=w?{8QN7UE1f;O(j!MESs
ze{hvhV_kws^-{|N5-6^EB$jcR3Gp%bzl`@Kea{x4pfiMpv(`dp`sS_BGWId&%2DUa
zQVj=y_YzlEKk)8-JT(A40vdZ|zxdz>wY!n)Qo=i0S=gqGx=%XO4Sbc44)u6tyC6wL
ziY4T{wb~1IIJ~i+>sU_exmA+cy%=hDI?`bo_szh7*4Q}i2VIe`?g2AQ*4vwMm`7xw
z60Qyw4ixUXnVUNLiuNuzJ?8JzR#wu4+#d-yO@MZSd>BS39L}B}+Hb|xs~>~M6%-=N
zizSc2)dr@DvUzwAOzuTr07WjZhd-vk4rS1r3+0j^Y5m63X|I`zI0Gy=(MF2=!i26q
z+jISoQ{i)&x|5S=c)^}EZxHZ{;G<>3^?e@9vrU{6ldu*m*J6|B%B|oXAk%wmOJFyH
z7)4FrXF<vkzC(4pxc-l{rxsDfo|rU<P2B+fzU*@_6$oLsr({(ZNy79$UgL<g(4`}J
zU(MMrR#Rw$wK~fNmInMJ`HY`+UJ}w^Du#2v#=dkea^DHR9644nu=QAfyssRr!5>GK
zAtY>P^evoSL+~*8bbu^sx9^)Febf&aMmNar`sYt#F+$bd#MLCYNh8KhTA?&T_;UiU
zIaZ2rqV-Z7&45XdGW!>alc%_9`EgYnUhEZed3MHW*J<LMoO}E7deWyowk`2YGDcnn
zdfHw#xz~%%QXqSjNui>C7YIlX=({4l0>Y*~K4fx}?5Hm-^<loqxhqgYWdjixekhus
z9z!Kk6~^e$|E>DdOT-zOR@!Dz_a8Hjg9dSY3|O^wnL2WC`46q(-rl!teQ?v6!IOLa
z;VE8!4TgyyHlMqzyW!3v;{(%^_^w00?K*Z2QURCu_4PeTOFP?2y1$qMVtuxCc7i0Y
zLGVEwz5wkQ-In4qgd8)pUJx)ll>fHnN$dHdb4FfVP1kSXj&O%TmV(afw;FeMI@u@$
z-MC(|y;!x~Kk|T_@JzDk&I4^_wY{yT<IRk-ql}6$9o^7upD-`11vjyhyt9z9Ggb#Y
z#0C2eMWlXIB+^n+9Q+9gQ?%9m_%CKCEAYCc=e<e*0s#Y8xsmIO0qL(M-}W2nO!JEu
z+ylLm^i)PQn`*x8#QD5}aAo*G_8(GRwyi&xHc<^j2WQOP5e?OOXZVL0!`gNmBZ@#j
zgDMr~56#VIZrbSy=Zz(I5+<!}UR8_}$i!mR1sZkIN@=bS8YSmKvdaxPN1GmU_g{b9
zixcO1)rnfz0ob3VymwC9!4G+8sHjlBv4$quaVNiZ=wMHmTAPYAcF^kzk>Do-Dy?ej
z54z1>EB!eROFy6u_k#X5^3mQZ3=I!&j#cDURXM@fUo=0mwsP%q<fQsA1H=Nb@zx*w
z<Htqu0NPOMU<Z{Ot}4(;0+yw=udgqo9eIKAnyp`-{g<uc9iAXLJOu@sEW{Z-5?k5I
zXj0MY)vxAi+YY4tRd!Fk&v^I;i?wRFZKf<;2=s=Yyz8r(_DYH5c3P5a%az?j;LLe{
zlJ&8SVbR`_k(<?dq$;J}H+_q=_4#G>yM~>dxv*5oPm($3@dxTTIwIL7Q0L6&wzvG*
z|GHg|L-_R8%Z5p=asa3l3ES58a}$GQ2Dqf68LGsg3MEtb3(I2I^n{8-VdH>7nPrnp
zb#Dnj?ao?yy6)j#uODz9_HexaP)n(5X*J-FtW6DNg4`Z~CoY|G+g4bw49QExSKVVK
zeE)tq^DRKxm-<ZifcheQcGRQ1UXYo2Nq+L2O%hq}yz$d8>v|zQJ-zqdJBq$P7{C#(
zZLDiqun*0G$>C`wgaE%}HYBoiny1oACFhnMLN;&kPXuytQBX9n$I2Iw*MOKD-<fe7
z`lDHp-9AT0DMy|+d!0aY+=N-oqzWh=1jzx`E>LlB4j-yP_p|RYWxFb~dwE+>jgT|L
zd+!z|a9Wqu*pIomxpnI7+iW!f4>2h*WAw^sS><(FeFmKbDOPh5bPUp*Zj+Msr11ym
zg;7aER**iC838@0PnIQwvqm8He<Fs1^#52p!7)X$EF-SQ#l=B+-~>oLv;eg)kvzZo
zP|NV~3KMy*+l`JZ{Xjo~*O*&OOpNf~8JrS0htMuSyGa|0MO@Plk30}wFY<vVs=xq&
zRi<u3kzvg^lx8ZYYOLn0q>z&gogM_LPy4UbgXi31(MP?>9MJ(T6J$ybKVbJ^Osr&-
z5kW9T|KK(F)$IJ7d;<d(W$KXEf*_)B+F$?d7J^YCDa8W1Vsz-(jFwU<UqWWHL7Vy$
zf2jWn4%i}5<&qAhNPSh`^X_>Zonlv;?)=_fskq66O%C7oa-M&b^EFK<U)T3Q7=E)v
z3>-{I1xmS~bvRvQ3f&5a!jO!WFxyKBEcyk=(wsqYpYz}lWu6Y2nb}M?t&NnN4G5no
z8yV#^g+R@w=wbN;br`bQ!1R{4sV+z49F*Vw`bDW^<Vpi*agaA6x-V<2?{!yO<K$^k
zn}I7p6%Yf0%e>jVAD5{LJ}(4Wuqcq&5a2$Y`AD4h$16o22i9ioWu;|=?BK_|32zHi
zh=y6+N>4vD1G%#DXrjX@`QFu?E;udwSCh$4I&dQpqbaZDzRFNNMLGfk9;y7E_80sP
z<$>ZUu<(Z^*h3%l;DXP{+|qNX_o!Zi<)^XHnYx|AFg?uyoDbKHP9gDF_g`{nEKQDE
zV8VB)D7RRdw8xYnJyi|>?#ZRGvs8pPf(ST{iJ3NI_({q}aB`M@q#@5qWZXFeT2Bg^
z;Mda239}p>?JmI)cyqs#imd$tR1=Fgk{uY0dL_^U#g~t8weQ^&*X_E`la`Ml>+P3i
zYph*qXIq`lSXs%b)^wBn2#Lt0ZIjj{{<G2N!_^It{kq<>WPbphl1T4DS6GrMs)gf2
zCGU{v`kzkn+{@z*`H!rCuu)kDL=g~q*R54w0CyfY>$`4j2I@|_F64cC>T<R*tt<ku
z>@8y13_;!D>XerJkP5YQ-klX?{;_`bX%UE}A?*zLcct1Nk=hxnh`J_zY%#{hsorXL
zL|5Rk4HkG^3NiNr?>Y5C0RC%g=(<(Y@-@X@O2KQB-X}xwhp2^Uc;cjH`Vokl`m$Cv
zA+!Mp-Hdpq0VM+7-oi@fglGi?5Vv9A(?PpV&rI(>trQ1`hcvkMk(6lA;R`<$c<lH7
z_5tC5fe>E|FQ1bVWF>BJDA%;V{%p7x;WEGqRDd93PJikjO>a}~F%0vRepu<G>Vu4A
z7_a+!;f5nh0Ag%;uKN?&2k=|Lk-K|C(!#@lX;_1Pn1{VY%&PtbkOa!x`ey&A+Ek*`
z<vF7@HW^;g*Ph1dSIZ+c0fPuc^-!QTSokUqIaq$AM7N#C&}UukZ$wGL%fFv&lPlg)
zC{Bk2V8$dYcU$!RS--1G@~$eRq9;OI{_H8lM3G}|c>IB8t<!&<q&9dk#WI?H+c;2o
zrmnl)nw9Ml7{R8ybqCe(XWFg3MsKBdVZz%;rXKhe0DdI|A)&!2g-D~$3%}&YskXYh
z`iVoW<BF2aRzk)SOq7Lj44G8tN(*BQ2m)Xwh1h?$y~qS@1$r%;<u;08MYNSJ59a45
z4oS2YPnc)d5oY#X(&4<v5NVutyRvF(Cv1M1Qf+AT@V@>ML7R2A*=nMeL;vVIzn30(
zB|$tdO3t|T5x%olIw@Ri2F$mz-XM6gNumaIw0F}V)66~kj3lTsu(Ha!BqQAeB0-Vl
z-Wtn35Qg{D^Zhx}<JCFG(--N*rh2&b7cXc?{b&9j?E;Y<aNIVm>+0lSg}@*%VvC7p
zL&fyHB|8YQgTDRg(EIk)RP)fV706tp^K_8lNAj-fWR$d;D~u3bM_g(_mSAJ5O!t^K
zijfh%yC6TmwErF$ihFsuIJ3nzBEn@cI%dkPABiLOu}WuFgqj7S+yXcR$0OsZe+Uf>
zq<Eo-Mj{^oKw^Y&cv#putA3GHD-}GAm3BRk)1#&BV+!O7iwM)wI3QJj;)85tn&!NU
z%dZjb>H^&Qxa{6dpZJ^~?*(J6Bs%?_D<KN`ybC<FR&AP;(^HTvpT*x9BUzyGK>H5Z
zJYn8i2Y`j=WmVtl)^qauy29dK8!Kx*m)~zeXMFvtF2$gY^_qt5^Vbt{(1-wm2}qi~
zdL?&9=)-idl|(pv2tWseSOy@L=g3J<o_8ulf`DO~oe};f9QUz(emQ`dC8jRxtnang
z7w4lflZ;H@xPu=;EOF-FTwadfe0IJoiC}*l-4Dx!p{5}I$bmdwCOR8RK%mxt+Xd>_
zym*E&^GiygtpgN}<DxNy`MIm`7Ti1SHz@N^+69;1&3PVCPv5+=UI#Y?5cHgK^fPZ@
zXyTcaoW4%qE5Rz(oqeBkBJa^`1ymFy2~it}O2Eqis|5_jX>0G{2?7I9UTev>zh?Yu
zw;WP_F1#d{Av0V-p9zn|r2CN5^8gvvbg7;kE%<`PtEDLAx<HyU_4H}Kih)E#H@z5x
zUNP*?&r3smroe3XWSrUC#(`_0e^CAa59${5iJ(4Cim<IKJ><4YnL3n40h5cD3`zl@
z9p+ivH}@HZ<rMD&lOUz0v+1)9`-b~Ud!S%hOLEL_0{)y<6k7c+4?pIyUF1r>xhK_S
z8clIpQe90!@$j*jMve94$?@UJzKHjk@6ydav%7(C|9hUdftklqltP^fU;9dQ0ow@a
zxjspG@REh;#U<dt)Sh+<pX~~tbv7GjtHX1JK-xc|tZeJ7<Sg#Z$-Aszzd*||Y8apv
zTCA<1QD@SVPR477Jd!>tl~C=9+`NFKQ)aHByWd}M6tdrXS5N>|XUIaK5Us}QrXv&Y
z17IuUMU|i!;xtUAv)`U}-+DelhfK^lDI3v2mJ&%csCQbkO+bl9r{TEgcba#aF6iD{
zc3z|MN#^X?7!)*j^I#Wt^~P8Z)q9-t&TVVX&ktqITN{n~56>I8A3pr1D*pe_^%hW7
zu3gtK7=TKNl!Azo(%m7_-Cfe1(rtl&bV+wgNC`+ONH+*bOP7GOfWWsl9N+i<$M`<S
zc*Z!Mv!A{1xUO}rHP>8owq3N9{9YgY8dKKP>}cRSJUp!W^l4712&B&BYiRG04#0fJ
z{7}X_1zgb~HqF?5UTEDeAHBo+ds(G?bPT7Ljx+Fa{Eu873G^oz8qa>?&p7yzhb#0}
zYkU|q^jLjvf*2oSK|b{bPT_waCJlh3wit4ia&|DFD?4?%xWo}T2s&M{tBn%E4InTE
zjO%1MsrN7mg&Cl3^G94G6q+%UP69?hzIr*~5a5l7>}{Q;x7#L>t)2DrRuhH4fkWa>
z4&z_#U;8undT}AdJ#CN8mOhuDGo`yIG9}VWOC#vP45lfY<{EL^o2Ti=#~w5v=(?a#
ze|^-!TRQ3s87y*H>uHs5SR!%_C{uGATsCM34{ws*7p<1E2olS*7CjB~MZ8!frZ4j*
zZ#oc^;U}W%s+IkWsKze<MQ*hoJ}iD@hNU?Rlwe3yCU0HN_(IF2{C_5jScDN|mVCRr
z3>L8y_Y#?se!eeCsOMn3uE7l@>|D-+#=MzR@b2F0r4`H`y<leS&@c!hYP`>@6!S`c
z>;@3{VOSdf*1vI%cJwv9RtMs~-Ri81%L}LdH}mM%5`kbo?sgj&bLn(f?nlkH^<<{6
zQ4wh5!1W4vTP{OLfC!hcw#G2`E2f0_@yyKE10Eo9o^IR%1RDmQfsnh~Qf@r}xf;OH
zu&GWTPfiX4&t=RKLSJff&67k(Nz{P5YN32p`zT!;8Z#-7=zXGxJ-J)O+P1H|deV5f
zmCIsXT-?CEEvoKc%gP#0^8-+znuzs-A*#&2di$-l@XPoVT+JwW1=8;eu2Mx-@h6};
z;o|{pmsmS)Ijt<UW;xcmw*(2IsUjz2*?VKRcE?}ID?nZ?yq6>JdA1zVhkYuPF-py9
zpWm}!0dmpRc3vMbStC{)5dQkV!()%3x1H}0{23GGjov2F?LnEQ%9Rh$5VegZ^i6<J
zJlsbBkRp}Ksj9=&C&-RbOB!cfKGa9ChsSVPE>}u67vpCP;OMfBT{arVcC`BH@au=C
z-GTU<a2U-NCIDn(I0yjk0Sw-JH|8;amb9w4%6@RIKFGuH@O0{#{l3?Gfr>#NBbI}G
z(sC;)Os-7Lr<314*H;amJ>a#ln8@sGGfl^U{ajHb8t0S7c2!yeP;aIOA&5R91VP}`
zLA<8UpHOpHd6FOw*mdEs?FC)G_l%TFpT~A$;N{N_kgtHwh6HeUI27Pn82rz(!0sr6
zQD*$ahp#4w2Y1q@hW@f04@Ps5?0KdIbneT$02dnuG3ZP9&D$YGfmbF7E2sVgYTnnX
zEZd3hVZCN#GjXvlCw@YWVC~yxGkb@q(3c9{#~{3C6LsJF_}38}3wzg&U*!(S6CVM2
z`0W@S6k=77Wr7e<gff%}YUgw0i|Bj0#87Pkc@&U2TykheD9B&e%7s;Mk+fB<jN8NS
z1at9rJ&4}gqONlKIO^)@f#_q&2$FO2zz7!5(Z;op&O-a<?>{D8MiiOR(b0(MG20*W
zS4?a2^Yag9d>i{qo~9J!<^5UWY>d}7fi&kn;TM9OzfLgt0LQHt&Ale}utnwSjEp{`
z8jS&AgcbqdJcoR#&^}4<JOTO5gU^kPg0EV1Q?vSnfU~VT6lc>3?E}#ye!wC_%tc)w
zfuqJC-~g~3I=k*>s3F|3_I4v~Q4jW0-Bt#cD^~zn)LEWhj6r8<CbolQi`Vyzuin<$
zn$LApCnr=@Nj(v)Vi27Nm6<Er<KLfb)Md5naYF@KBHGqCtYchK=q@jLJLEgd38`+x
z^Z;yP=pqecF`{VRnhmn=fmy_c#ztTG;ysV~))0snvHxlPAo(>tk{onYGI>d=L5CmE
z{Nm$_*U*$}vu<7Plnl&_)vATRA<E&sJ*A<K4O2EQ#!isUA<$G??ZQXJI91p8(5Eo>
zl$i%sfTPI>{Cp*i-q3f&CzegmF4_y}@hOZ?Jrq~3$(&pZ24~PDzaWwwynn-x*F}cd
zfbYx5*tZK~((_!Pn&QN>+5_{eYW-~V#NxC8PalTPW3z^b58E=%%ox0%PJLWF!GT~K
z>j8b9%|ofsF70i3xp*(E1;j!GQfTMlR`!eb!3PF`V&tsyaaXPfYn@w7D@X2IP&<5w
zVku0Qgc`~jy<i`grI!!CLc6F5n#!=g`sHjZT@!fMv(O}-+uiWPMTWWCi_0bapUWjj
zjA4r{E+S~2iu%k~n-ebtU&Gomse>+L;3VEJe&!WCvIxTISJLi6fM#}z*L5#*U}I^E
zwfhN)h$URO5f8cGqmwwSK;<~J1dKQSo?71rM)}7O%`;iEE8OHzn3ulb+*>B{Jgxe*
z|7$<&F)8QipBB-W4q(5+4%pkfmt$)vwf1R1!WGe|gAoh<uLKSU7@u!A0;RTy&*8NV
z^ieDKPG+Jx*kwZB!HCD}fsGxRHT#hIvk6|oQW%)-=?V35I&3OYN<xfe!Hk&W4b#T<
zt4C~tvS+f-RsC+7nKHh%BB6e5e;Ort6xBxJNq0-`S}f+B0U^0?llWv)avWf-_mty{
zMjU5fFy3B%CAD>vtKF6AwAS=mrR)k0wm~G&5I_+FFmQMPxfHN2fc`Zz31#b9A0*Rl
z{d!wc=L*>|qCAfEVfMXYb8@uUGJHmK=!%Ge0z*pBkA;N?PzhhP@*7f)-(o?2Nhd4w
zj8_~ZR#7`?zY$q(e6Q$?#HgrfKT8wDIK68(?9F6Oh(UJHhDW0rKm;v+e0G&&M81#y
zZFYW76WS1=`xrkWyRTm3CU>rG)jQXPgaM{YdwN2A-T}Z;8f4#caegA*49K8XQG>a|
z{Ic&ItR<t;Lop!MTR@Y6(hUXdzVBW%m!)97yH~dwVs2rdw{dV<OQ$O2vx0)1|6mS^
zx0M})pq*aKahKy??v{blzTMdn0HF_O-dE3L`k`U!jE+arn7-CK##Ew!%PDQ=0$vXr
zD}eMmRiA`<Xul=>htZa`xu<^*rYNNIEKnB1m=HuxKwqLOH}t-nJF61-f@~IbikW!v
z`b6-<07S4W1pLcc?hYRR0yA(tE^daJl&&j=iMh2^J=%nO7A*ugM-^s~<S@c2B3G<^
zn0(%RR({^5@b*Ob{4W2VQduOlRsfF4qISJ)hRE|dG6T=Hp9i<>08WWPs{##2_c^qa
zUFd%iIPW16==t}nuGh!NG>^Yd?{|LoZ)B>wds^#TeSWYNm3#u}g)+gqP?vQiGlC7i
zH>+|n;8W%E2A-4f+k0Xi(%z88YYBf5W<V5r60!6Xzd3z%r?+9BLD0ha-!n>)k@@O@
zX<4xmU0dsMo{$`EnL2|D8l_)MdGRb`H*$XJZxm9`i->;#Og~8ga#spWM=Rr+czN+P
z2InItL_qGc=jHPkDj*pKBh%CR8Bj}AO(b8Zy3z&WP>7w6!~!KfNdo#%pDzm1Z-oyW
zN%z0(AjWX4Cq7|t-Xwh$PXEf<^>9;HRFO@#Ir*hEjQGQ^IM9+(A$}Zj)_7ZcwY;ux
z(-<wa=Suor9=O_x^vojM$B+2pCxGf<YCta5j!N8?of1O)j2#0LbIwD!7(s@DNz!o5
ze9y2OFTXe>;ZsRQj;^y8Vq*q42w%KQ(mqJ$U1}hkInm$WZhKJ93YLLNbLh*37URG~
zmJU`?O-@W?EyNCP5%}}->Zh0P&p++G!V-AfV(5?8+SXR0Tbo1O$-7l&<WmP7R@WJC
zP&cb}!hGQM8<`h*T%mCXv_Pl9TGUwFcRAS={S{q3k+^ueGdgX5;O*yM>G8Y3SY_hq
z+c))`w;)H|3P3G<D|nIK*J}HCr6OU)ZslNOqDXBW>N||lj`?kk`L*?R3P~3ImM5fN
z)R?M1fn1@M7R;Q2V1%@)DoA`)ymbG=C8`KS1TNy7WEEA_RrxO;g=L4j1Jb@8nx7+N
zZeYN8sCSim>CUr7&<JPSuNMM&2U}ayi8Vo4pc<-y)PtHmPx6XIDK(n<>7&<HBT~8p
z=Y8jYu@IC3qz`^3U6B~s)IpExe;;}IuYt(MC%pB{OSPWxb!a^>0HVA0!PL}LDRnlP
z*HuL|+pHK^YRgMY)4u24%{;JZPQj$Rb@y*o!+sI3>n0ctuMvQFhCwrmKygM!Q@;!c
ziMqCS*RGedb7eunlBO`|hXB!1XdRb;ps%-g@PxPxMh#ZAbJ%3(x#Sv)axC!h@SJuy
z5_|ev4~w{tCFRYk;mb}DFh20{l2Rw3e*gUy74gdlRgFEnUf^7?vXYk`Z*6P43GZy?
zc%&-N3C0_5XwTgo@#|PvFkTCWa}-bXE(wXAh6YG910Og1McQ&S!5mCay79q`uS>0o
zCJ8Yy;8cXf#B3EmVdBz>2VQDZQxhPzs;aVFGdTAh3=AF;qOZAbI@at_372RZVb^C`
z+`G?Dh%ObGB`;U4Qw)j#AUm_mm+T9q$OohA`(=*teNwR|Fs9C1<9)(G8?1*ImIhfP
z1JJ5f#C?8nUFk{nBc10kB@6Fk><f^lHS-D*K4!jeXa$)sU_W4wg5ZX)z}5dMns!jg
zHpvI|znf8WbB4QPXR}zoKe>u?a#{^b;feARhicH7zTZfl=d$!1|7ajAD;v@f{|n}G
zp(pzJbHZ6+ppi7ZJy@HdBAK@stVieqZ9`Ktv&QWZyMCBk59l$?Jhb7M@W&up!FDRt
zpi@&+OrIHn(l^R?Z$R9rlu+9@e)Ajjp9YUq)YU<Bkx(wBm{3>4awjn*rT6rA$47wo
zVnw>eBj(}wW48h^1{fZ-SwWCSeLQ4)4v#tRL6uh-ZAgX_xKfp^JCF#2<gbL*c#OC}
z<VGj}g!O)Dd`}bhCfI8mtXd1o$P;uI!e-%e^}Z0@V~Yjdf^-E5Rn_K}+w&S9wuN&?
zP|IJ>j`#OJ)4Ma9aHYu^FD*k~0G_8bjPGkS(ap|dg2bpGQYKwj1J~*K0dd=El;!1d
z#tnRTy8ZnU-_t}&|J<UEy}i90*~A~IEiEk|_O_JU6pul~(cRNi>R&()m@omshXTkM
zF?Rc9TsJ4<6BCEF^k=ABf6U<k8JTPR2EVRt`PiN(EA`{Y>9Mu%CZj<_Sc#UzGdCmS
z5vU~8a>es|2p}|K<iF{NfTF=vQoWJ+HYbd6%zR@E${?2n29-fG1_k<$)M!2?Gqgzl
zB~Ts!;UW`WnD{q|ue?Jc!s=qYCPx#LA;`Q&89CQ!e6aF{%%D02B)(_w3sqCTve&(y
zw-Sd1{YOWIjNoPWxUQJr=k_2m=h5(_amH>%`clawyjIG7%W+&MX;yI$R?_q5ghu8x
zGz<)9!Hs`{x3?W+hhU9xTK`bc*0wS)%Df@EcOrkDU^7<s#$|xUH{l-_Ku=efpPjuW
z#m3CA^aqIL<ybuKb+_nN_wOVLZo%OYZ`z4R{h1*MJ7{FQ$sx%{g#koY?YVyJg0hAB
z%!GeWYsCU9$UuXn&DJ60yRWfCP_H11Dr%gke-9B^%|t%!CTQcU$1c40|K%1Sbv;CD
z`u*F!%s@_(9B12&??!;--pT2TMl9!2W>o3u3{&*dS-&s-vGWV|NT!3OOsu<x_n*lk
zPN9otI|->Q034s@xP*5>&qk*V^@{})g@;5^y|RwZ-^ywYx&s);u0KCJfuLg@(PDT6
zktW$NARmu_z;3((M6h6}9`>2JPe_X)w6<;C-MK?joZ#dWel_LzzT{oauW-$!11Ap^
zZc*WPITrre)^1f#NPq4gftbcsWW^>@&~`a0)MZd7#ZG(pr<>o|e4GYysSt61DwS7}
z8G5}OlsFZ?20cZ*&mNk*73Bx_5HFN(PtfJwJN4K_&>`+BD=2_vjSzug-G7T>{OZK+
z{4D1=r|os)z?Jkkds<R;#Qjd5j_L!aSASLyIqNDJ^HLostmo%HR=b%HPS!#;tgNi;
zZxBqF*rTSaE4w>8<eA7}1FJx9>5mqh*wBNs-vO`z2=9F~@AEy*^_5~xFt&Vj$B;0H
z#(C@^nOn*zo2=jn(9!+BZlWWnQA?F&gH8z3pNSna(+tfV>Z=AM$YICZ3IPxAy<>Rg
zU_khd(Rs`Ok5mWlXEuRf0l{q|dFG7v5`>Fkt^{PyVEXRWS5nK1#uQS!AiSkIL~|Gd
z7+db?<YZDo0f>nUz7VWbRjhrVW5H=R{XHxaH@F2(rtJjt!Ym92>+1<jzTmfZMjWR7
zU38wS*R*`9=6M@m=?kBVJlKzG+E)>q8ul_{y0?uoabk`M*t2<gP1>7~mcQnL270{`
zx#UgI#CSaS7!5?V>Os%me|r(+?Z_p6Rl+bR1A7LwBQa>s<sYdk;)3_W4;*0fO9j2>
zU0+$&18u7txpP0=^+L3NIe?HTT^HNem>dgG<Yn>|Ku~0EUQ%T4LV!Z*G6r%2v>Z=k
zPE%_+>i05$&`{m|@n(=!qwU+)IXkc9`=9b0hjQZdCV|KS3OyJIL@$Vj_JYL#L!$2R
zz8txT=jZ34&zgo$yZfGoh{GBN-cTxqhrgAI?x7t(DzehIcAi>)5@Ivoxg{ppoZb?|
zK@lp`Rbi5N1FzO$z4%(#bk=oCs&u`qFS-PH5^3%c7;8E^a}IW&6MN_bO^K+cd#;v^
zPEA>{5s=;%Y|ruA_VfCiHcc=^kagePq1L^5+i1fh{d<u4P^__523wiAa{9Mfo$$8!
zcLBaKlF{7{8y^s&aQpUk|6L$Uok@C5hd;at(xD}zeLX!CQId|IucBfW?&p-!@p5uj
zS&cFbI|jEzb}hD#b>#aV*3QJpH#R~i*O*1}-VO*3Lx)CV*&U07@4XBqC>RJ8e>W|U
z>C3$mBDz$yvb9Ckd;-n*;Us+Ha88!^J8cdyc{27{s*C^xJ_<3wvyB|Nt8Bph`||Go
z-d=dgx_JXI7eGjm@9>GD$_3$?P4+o446uOqdD1P#O$cP|*CW^IxAv4HU^FtI*|<(1
z^7@T{=C=)Iw`R34nkw_tGD?tPe;!lk4i`4M_>Gj)9!n3i%ie7`Rd-rG|D(?L{0Ga#
z%%AlQyT6q?*3yD!x!I|I?;O?5pZ^v7e0IAA)f^v9z5lh%RdHzJ6apSl<X5q*R)_6x
zkiw%BRG?Xulnu7j9;RgvhqAwyOQlGP=DQ%b-_LW2{}kyVXTx@nxj;tvnluebAUd(Q
zN3qJJ-+Z+>n^q<+UDpVgK$`>bq#Mv;O<;c&Y(Vl8BMo-hN~D$Y={#?{HI<4~3L?q4
zOWCoD(bQ!T?A}YgDI_EgJTb69L;Nu(+c*At0ZAi`-~3nq+lY*La>3Bhz-2iFBczn<
zf`a-2cLd_Ukvt2Y0ty9W*k5!}W%CG&fvM0s^vV?WJ5tKSs)_qI-&_-dsacuuKnJOd
zFHY-us6CRkw7fj3{?VF)S3tn$&u@dwfgi_JH$I85#Q*)>j5X>?8Q1lOJLDPdLmeT|
zsk-`w6|k~XpT@TNhoaz5Giy)S%!;6aAU)yzCw-GvUp-|TYXZ~Vq-da-iGcxQV`bP8
zNFtk~=P!7-t&2iWi<24kAE(y6^7C4{8H_dl7xVj#*>QC2;{7LS0?+eS5<z{kr^T=o
zR+~u#2qXi^Kw7}n#btA*v2kkawsm@+;JJ=-*_QA531poMyL~<f<uDwxoZ^WYb}a$9
z36CwkE+dukVXABFPsuA^Mj#=_3?Rcs3s=1BO#w6H^VP@CP4!9$!ak_>V5FKKY9q;#
zos^vO!~gF0U1bAFPOTR~Bx%HO71^0tS`mAo9TpfOk*x$K5V)du(fxzY%fT=Kq)e>A
zq=OzmxXu!kaBXdSEh}?|ELe1d!2ZJaNocB6wumAdA-cD>x7%!mH?g3Pj&qS|x#k%k
z7neqZ(4z==fX1KQK!6zSCC#%(5fY@>KiyNTAu=ywToYPZU4=oeKoQUZSHOki2YOID
zXa(~|{e0o@F79;FdMy4jfmtFbOrPCC$HMBOf`i%k3hN^^CEwfsVeWn-Y0b~Se|AC$
zT-%h7afR}F{jujrRMqYO_DzH%k>7Cvt<3?#f~`VSV8;6%Q>+I9-401DhVcmrFg2tn
zt<~SYO^+}AD1ut>y5GC*zZD-pk|I=DNi9$`>F{Cb9fY|evmgf-!j2AM7XdF4F5*aX
zSvk2KJ{W;qP~AB&J`{N*B!~J@0CpQfSEk>d`Hje<7ep8BA|$^ClLSsq&bBf-cgZA!
z_YDmV+1V6F8L?8B4+N~42-}eqYIGQ4q#o<jm=TJl3?-V3Iq{qV2?^Xr2ODCgvVpGW
z-ZN=g33A3NG;g@Sq-R_x&+(B^A<w(is%}w2W|KQ4IW1(W2?B=DUV^_bYIkhnu$=R?
zIdon_krWllR4nskB7}y_oO{YV7az6;?Q2?d1RRC*gT-I)TzT@>QUw5_gSK(qKMu%9
z6Jv{OYp3kKMqwBCpwAY(eqy6#qcOk<gQyiDb@UCuA+l7D=rM(;H((we_g*GPg(jxQ
zH0G9xM|}!$ES-jy7LI5HTi$K4Oy(G=$`kJZ8n+ZG?pOCfiq+?c7E+9BuWojgbsi=g
zmA>EkX{>}X{M3&J@xo#AA$?IB$2+Ci<YWtkxVC@5d3I0!f0rwVahDcyGt+P}%=Zmz
z4a&rJEz)pgsImzN6zk~BJ0Y=~*4~y<A<Dx+m&XefF;B2PnLrPDVSl{{3Voyyys)<I
zgUdS94=f<ZTcfGX9w-_R`pc{`b}jF=gjT_EpAu9Ncc0rat9K*4r<CqP5{2FydPNGN
zYeIRxb&RnlFzE<}UWY^&A%6DPA+BK|B_jVfJBEaJ-}dc@%}j$Xw{Kw3fH|(HVvP`G
zlp@BMiZJNZH#4!{i$;zHSq*X=cK-sXOS##jNztqlTA{O*XBvA&gY(~8K?EBlzaCI2
z>!DE2pd0M$4EoC**VWVAt$<qzBoX)Y7DLzxusLOZ3G(w7P1b)f_V5^2h=jG857ts2
z&I^jvtY~5e*Y`nVDubFOE)X=tBX}{{y`ji07>2MJE9iT?nhOpzZPM*HFpTSv)VB<7
zfOK6bS6*n)L5Z`VTc@`L*)W(edemQNX@NlWzQ?*3I;4}pNx{O3n&0IEtAmUI>Xb*M
z5IQ?)j{~qC&<JK`TxiF`E&T9kk7bXOp?nU}%~DiLQ}dx!Oo%yM*TMcih4ecyBX#w-
zHH^Jis8}GQ4TQ9Chj_3+vcUWh1(Lt(`_G?0?=X(6=)BKmpN2tl9oPHMYykUaGo#P;
ztsNmbK?Phq%m988d3kxP4*WcxIS`Jy814kwNl`_paY4$@_k(6^h*&0Q4G^cAj2$Zz
z^zUF7r`@Qe_27>4M;u5T!bcOLNPV3Evs_VH2OFM=WvSc6TBDe=9w!Gj*J!?=!0~aj
zkYeY}-u`|*L#LY3w$zE#LWx*rsaF!Sv}Em!NnY5LZON2ptEQ3zURN2oAjCyeQE9|t
z#Qt>u()!<t^27)<%^dka>E%2+iG_uSIbU+Ll$TGqa)#l`4=HtN`z|36Nld%ZVGM+k
zi{H#T*qWIc8_TDe0R4Y(MV&Yu`4US+NciWd&$pOoeSdw__xGy@KMkIg3st{V0}#y5
z?5$HGWt3Af+0>$AP5We$Z50vYn~R0?#OleXLP=*AmjoF~)v-MfK1Mf`Zqstd-#mV%
z{{QWbafrSWpXeeaellMkbyiO@FXI<d;oZAow-Y93PN!zTq?M1t90e39a^+K7GtDK7
z4tpOpRlcdzfZh$NIjM118pBIcux)7$z#<9}n`Cxd0`4#fL>;jyBFUo7-b`TP=ch+&
ziXapJU5)fJe}NoiSgW;rO1#1?{md2_rR`5H9s~@{A>We-NW3v-9lPxQSDzr^H5dp&
zKY0jA_DS_i$}FYF2#L9G^H=;rKx7yvFud8GIDq%E1}%blrZfh+t~tTxe!jVFt`rn=
zuwiCPFS*utGi^WoMr(oQkU~(P$jjv;1!{6eaH~MOU`ZgEW3<OszFb{ht*PPO>8*yt
zdCa;XI3XoPE?Ws#lo{QKK0-YtEX=H2bCGzeU=|<9j|`0=0$g0}Oz+lyc7RQV9AKYJ
zgoSD!S_~CaT>m>PS9KV~6=72<+aeWF;RL>Z`f!jzSS(d}Q8R`JX6YMe811kFMSuf$
z1l(4{%2np}(S==i<h4)^V9&{E!3nq=TzP?HaR|b&pvY{ZSq5{aDJdi+qlpsa?Y9Qt
z;3vlHB6>pf6rRn3V~q-pXu)_8y9GlW**vupt_j;c*Nt%sJJO&zST`MJJ1-30!-T*4
z%tb;ZCqNX^3h(QKtt3NSE%1B>2jN`Q)zyU~y;C0t>U9i?g;DfZU!PS?Fq^YJQNHo!
zsiXQwM@Q940nC7_@(5Q^BqXh?FfdUEo=Sbomj`T*sdVm8R*MvefIve<g$M_i(~1Jb
z!eV}%r&>WZII3i$z>aKl7@j6de*eDRv|K!tc$N#2i_Un~c<LZ{?@3dJr82Uz2IP4o
z_<<q-A(2Z$<JhHESB?rhU7>x^(SevM2A<#INiX7rCWjQmE-hiLV|_nz5zmvS)sPv6
zyD`h)G)ENmA1#S$+<*qg*N1xLg^TaA4GdU};c~hA9{l;4aQw5xlb6(B*XY=KdFkMa
zD(P>HS0X^w9W8$Yu28b%)^?OmmAf6`5oEM8SrLI5T+GZuDBAN}#hID&OFfAT`vkC%
z7wCxFn9-wTC~vDF$(ro82K?me-Ef(=xJYCXn^0y~!i?qB*Czt5&d!f_4f0jM4dOnJ
z)7ynF@5JVT@l530Ax+U)4hod2#i0g-fYf<pmD0f=b{QU)mOK;@9`y?jQ7O!w*Mz|*
z$fayMmTip$WJ=-IQ$e|u3g?FE%dfhRasx)$j<G99;52#Z_)a9Tr<DnPbyZYGUY-cu
zAA)UU!6!q4U1tE%GpVJ{dR~10136#kJp7W`xJIn%;vmw`c+g2rR0K<{||7jL)%
z{oS`8S&*rA-HPp7PSt`Z6OEw?URfa8@Ac=KKfFT_G8`l5F=okD$_KY42VBZ}J$XHq
z(VZv@aZvt;v!>uyT);uWi+A_LQBcW}EpP(AQL>MP2XaQgnu_Q+up}qch>bxdeG`i~
zu{B&pO9~|8Vq<AsZdtvRp)_KNS4q$FpwFqZ&Y-p(WF$fQh<Okxruqny-MZ!%dKXMe
zq<+J2dMhP>af=$Ckqc39?$EHR9(nof=3qUx!z4QbYoGvbX|0YQKYoB%sm}6s3Y;4(
z{9^8z=r;yr6EO6s6?@iRUlT@bB(=i)j8woLP0KqOOs+}FMvzP=rq9{kZVx~<?_F5P
zDrh%ul?}>(2ed?T@rM0Ai39seEnJ4OgIdOThe>t!1E5j8wu2f4KGr+Z+liyrsIm+Z
z65wjH&9~(OR)9fm^PM}9KPR&W7JlHUmI-0MmhE(Tatk7y+Pko@TQ5oFnQ~;5nJE%E
z|D@}cFbZ;UKW_SWv})Zrj%6o!^|c@Df2?1_(HftcN|g#62hgX<r-4Ne+joarX3Scx
zhWGA0*b6pdC|%}kK^fH*72k)4MPrRw?hgNWmu5jFmjvjvir&}UR3A7XI+nM*w><m!
z4=1<h+ZHQ9b)YUwrhz8cFEU^g67|Hro2+dPr5Z9Rno<azPa>`XTsD~CHU}&8ALV9Y
zWm&NOqp675<7IeVekl~|YH4+vmIL{cT-64Wat!my;b982CQ6J}37=pEhTGSKiuW!n
zK!_nIP>h(E1(r#~%ZT;HLJb03u7aC|qw9<Q<;E=e9#h~`eDh_v1UVR_448xB3BaY?
z_CNht+)df;-SyYh(o))&_ADsI87%L4#*{U*szPH@KBl$ZQIYR^Qst|x`v)NXBp<ik
z9?qB=)WvRjx~?6*5-G?eKoA*}KfB6|TXVo0744<t0VQ`Z{LsjFU+ellCKeVX)nEsQ
z_OffSY+^$~umTHi`RS21BP%Pb<0oq%fENU%d>_rqAP|L=H=pJh(){i1Z7}7io$6&m
zzq_oA7rsD`FN95p-<4oH2=uVQ)xB@uzQLyND7jmsgPBZ<tmXHYp<T=O{PGQ(s}cq(
z?*4K)fpFZn3rGw^WV)_`X&yl+ZvY|x^M|W%7tk4|c$vl1Zs-EV$>knJg2%`4RtJ1*
zh}b-|mLToUFRT3YO^DVZ4nXLBHYkvrW|_LmcHVgyzXHMaT|Ql$;1=LDuAx9d8#)iv
z+C`hSI^l~G1!PAPay&geU{ls&AV&+s;X;UxlGqa~L+PF(PmTjC3`P&A=1J~=mDlgt
zX7pyzOix}uf)jM7K_9Lca!=yd-$$MGc$%5W<;7fpuE*`Pg59c(Vvamu2PgJ2LQ(<=
z$tLXSOqtp<wtKL43hTmmX5Sx6S?UyKXSJfxb*Y^+RupB;(G0tHB+=6UL4Wd9|B<i0
zhK9n_eKW}rZ8iDvC+saM{HSJ{YLVoNk>DReHM)dIrV4lxvPz=SQJR%cJcsiN11%eA
z2+{p_SUWp?ua0FgKm51sIPm^W3SC?b(vy>aJ<!X)BaIXM!zM2OMSladB<Ul2Ox;uZ
zOv|fh`_XkM+;G#tUgDzsyZlg^?!U>}f8RPdKgJmoA0H16gQ~QOlu@(pDH+tE^W(c`
zU+^<6KDXWg-BpkkkV_Rf{fheFXU7-T0H`qXbO&Z=fj<7<wR~*puacQ)&l7)13h{iG
z&-e6rAM6F|GISU?nVG-Ff#L6B0jvVpE}#Fx@>?{0N!ngJQ|%zkUN@-Rmm-zD;C)@l
z{4jSI1xzXX`?33Wfr-v03$`C0Qankqp(0PWKyYnv0^7TwEh9TrfK72r(*HfABgU=?
zIe>Vq`Hu*AK8heDA)!3^tiy1P9CH7Q%I)$Jt$}=)pkij0n~&bigeNVXP|OutK4uMd
zv>rpy+_!JnG}*bh-ghlVV;DnapFv%K8Pm56rQ}~#>}ydSA}*YS=mn<L<7V=?a4HRP
zB!<jaLv~{H=*|Y%;|i!rw%o$G_H}s|dOI+V6syXSf+E#U1J1;bH4EQgPT5kO6kt+-
z*f0(5%^NSV!`dh$!Qwkzff{T)d|g;JhMJmQUU<XB5BtSpjlzst5D}Tq?A@pT^Mw2P
zppP9j-ss!dVXPFh0jE2hadltdT4BB8ik4FB>_C2{_#HOc;~+!AR@3s!oG^1hLNWU5
zGiqN9zPPT(iE9h7`u+yspUKb6TzV05Rr|)EN2U=AgWtx77cfA!ny1``0jTjhIaT~s
zqQ&&0LVL<YNFD)F$T3~uRRzNOJjuACfGwpggU`ZPD_9uNLCP2XDh$po69rH;c8|HR
zxGUA@Uf&;ie((<(5C?P5QjNl?RIA_uLP%0FjmgFEUeodkLt_2kN=iUIj;*|+`XLtF
zzyd4cpjeMEAgrN~E*XvD{MPYXDqls4PO-tUIBRsh!|GSTB3>UukReB6ks6b^!9nEC
zdT@*3g;uZo4L}))kt7AtUiQNao0*Y!yn%;BVW&TRQ)lBBUmrLc0Rw~vHpfi_HhuXg
z@uqi|7AWZq`~O~BSs^XbeMuS~t24NiZSC5%=(y^ng?(+YU3&imM+R<1QF{8r#cbLW
z>hKC75EY-;cu6Yq=(g(2eHJc~zTF*kB`YZ^>b~m=h>(ns!oK|D7b5(rFlKe%1Fo}t
zj7Bf>>oaUo2V2|FjJ(U(3@D7h!Ge14^P0&&fBrxK1&nLhvd6R>Vy*KuLr&J;Qzvun
ztbJcc$1<&RB47%Gj!x<OiH!?c>U<n@=&Iq~gm>eJ9oMDm;FBCgbMOl}?>5u_FAN}I
zfF=_ym{uE`o&}+!Ty18`SLp-`0(6vE6Xkr|yKe$Xdn%1Na9JjY5;lhsVLUdr-H8$j
zk{XjS3G$4D&yv8GfR`Un2GT!!hlk~K(xxGgK(BIUE6#P<z2WSPlei65L&`Gq=lkD5
za~A>YA&tPzZ1bg3EAA_qXvTgQ%6hsWWuJSAh*la*m#KXMJlx*^?5)~A%#2yP7#cox
zug~hR1`nnMyI~#5&LiovF>A^w&7pc2G=i6-RtEuwpMPqzFLMLzwV#ka@t6&FSd(aY
zL3(NdvS5;;Kq4L%QRu|vBndN<6&~g$#tfZ@Qk?rKq^i9_*A+lIFabgL7=fJ$h*v@m
zv}Rz)mm2qx1?!^0HKDgLz*PuqV`XFeK7(*jT-&0;xOkOm+)$RW_#0r}=lMP`;Bb2E
zoU&{;U1z}(A3bWtL<rD*dlp$M4olk&gHjEce-J$YP6o&*TitseXfg&b8xa0!E!E!0
zD;rQ8fPRnywMg7ILuM0w;z*0|xNxk_Jz>BOS_~lyVqm~^N2cbTi-AIDPzE5L<_Qd`
zhBu#F|L^M82!dPSkc2SEmuq?o;?ktpq_t#bm!J|ou*EGQGbx-3>STHchnAN6Z;ClM
z84ECu0W^TJxwBH9zqV4uy&~50%I|A~-}GPAX3cqkiGpP0UohdIX^fm*o!hQ<477}{
z8#jZf1}IYBE#rT{A~sl-j7fS-ffCMLn`$k*%i}Xm@!wHXK^N9W7DitvvP2$b_E`Dh
zJW^a33LP4Fs5~r*AvL{xoF=4zh;PEAby7eoB<&kM`aglYdwLA{p2ND(9UA}jxw%Iv
zBy2>mT|cr%57~om7`Jq^5n5c7r|2%<1H=%1xFfn4XqE%wj)_V_cJaLxC=mC*@9a3%
z&Rj9S2Yr-kkA0dHRP!rMcoiVJf@F;h0P0mHXmK5$j3&&@Nx=js9lmm}HmKjLG-F<(
z{pN9)eV3wfY5TCrg|ORAxRZN9xiez=8RR5j72V7cDPZ>Y-o;OvW2|V8=p4jS2S;}p
z+fLU>3P6EFCLU_-{bIK&Qi-8zqeWQOweeXLkjZ$yBJ$Z#Ucs~NFvJL_%{Hq>4F|``
z;0IpDq%C6Z(w1C*%Kr{r@GzdoI{%GTb;hICX1n6DVSKPT&}nnckEde{EEP!5QNV!h
zXR?td$Gs4k)QTe4p~G)}_U_`E`tQL3E$zg^$7uctGT<-sNJR+50um=aY9w<~_wORm
z+S^tEYmdT-4a!val(zW`z{PLG?^keZ;9O@Vv%`|!nVRF`_>a!Y(!c6Aq;O*hC>GUr
z^eIS6>SI)jY3NMUibL>Ti@HmmeGu73+d*l1H1?#EL7nf4-{7mm-fj*3_9Xr|kBCGT
zjU4n?@l>yzamwchlkfK5Jbp#7|ES7qJJNA@(u%B{v7W}aN4JK@<l{oI7cc7YFX@s<
zj3s?R$<Rhe8+3W35mZ?9PiPFc&kA*7f?YQYZt7v<t2fpixow<kF4AnD{aX7GV&}_@
z-7-8AMq72;Y_mv*C`v?8rbmtM3X*w3QjeuI7SoaWKY_k3z@hMPUyCMNh_?;Au<`ZM
z@QC)#NS4Ok)t6Pm9d>X^Gy3^9y6f>!>|LYEe_R0F&kT2rHt@_gBTC-v)kSx$rdD~^
zB3o^8@uV5`RFsyxz0SPaS&6hL(QLa#5O=sZI5Fcf|23?adZ;wMe3`|puk^wBbBm2b
zxUc-~2fym<1$=4axjjR@ynj!}Z-z~C);Y>dryT}&)$8j}YHxOB!&lrFNBxccYzneS
zJf)?F<*xFj_yQm0Wf-ZiR(_1`n30M+_Lv9`xS6lHSc2TWsn^*b(a0*=5H!SE@l`>G
z+D?Iou7~MeWt_?$Tv(d3qc4m5L0Ri~R`bQ1MzZDy6#GSQnX=iKq#~}(N?H|B_OCK8
z#jFmQ^OyC1-Wcqm9z=y2?x3_tN)hI~XSyTmaI9Av^3GuV)V~R3MPs-3*0OV9d<OM=
zd!S)h!1+GcR|RZ*R<pkNY461DSrLKu4wN#xL;2~xBz|>l{0DWe{?@MokQ3o;YJL4R
zB}~LC86wa6uA=M_flg9Qu2BzpizpGV^T_m54?TCy<K@(-SeEMsrF+ZB<-QL{c%Pj7
z<O<cXGU9#FF_hlXc3GbE^T0h|HcK4Uc`3fhMz>+>)VDCA>S1~#Bj!GQeD$Q|`>(Zc
zS)m)0nw)<AX+5Tq&ZMYax;0wZ_%X?LJfWD>;>DTNmM7YFgnHbuXTcHT%Yy4Qzh{0E
zKkI^YQW}?xR%*;7Qi9BhGAdeIu>oJ0h{q}0XJM|JKgSNsX5~vJiunUa4qDus9nas}
zgp;CyV;M16Ns}d9#yIwsZDMUbhdkz%aM{J6l8o}DIr~u5gQ}CBmp!>jYk^Mviphg}
zn-j1F#<?A@H2XANB-_}zTY*D|(G)Q|?poV1V^i7^J(0y0Zdph+py>}|HIZ{LF9X&!
zua4-2uz^&th)hVtd;c;RePMlVtr?uT?~$AxkROji8;xR~5t2N2s>$}JY3X4+%6gpF
z29qbOxzNn3T?C9pI-W$kd|%67+F!SM_I@tf$lKF0_iIoy%lh3N<e^C93Os}hj2Su3
z!&l6h&|j1uam9kT+qWF0Yy#AQ4HvSs|85mJq)RNUtj&U}kgGc05LF~!Ut?so8CPuQ
zE3C)3o3OfFM=3Aoa>XW;;W>cXN3nl@q6(&2977j1Z<q3}9(RsW4_wgjkEH>s)W9lR
z?X%OlgQ{r_zJ0k`K_rb^h)Q=ONFb`CuqI>1worFKx+dnSW$ajE*d9}qBP}}zir)xm
z6>7#(#8IGuMcz6z^^UyA->6}wEW-is>kEz42xSvoQApXpU3$t?pzqOJIlMD56VmAR
zjiGi4dq()UmacblsPXp(65U*Kt?|1Y!_DvGw9~2d2YJs37j4o*k_VN<I*Xo_-T6T}
zRBLC}n=%yN16w`w2ZPrEv8F_5LSu~A?@ZPyL3jPOgj(O7Gl*uUy(v+nIIx?Y3Xj%l
z)p^C`ep-CVL7=2eo`(xml);P*zowLbF{Y%%m1&x!?))v9XmOXUm1(IA#`;#P+^K){
ztzoClW8qLFBzj1`#WNKbe+NP~G<#QanKjm=W2Rso@`jz6Mc}0Q8(cKC{nTDsYetog
z(Y!m2=~`6E|2}@Rn`r{`Ab5zC)`&Bx#{fOE+=us?vcY4r9D+~vZY$fLYCVJVE43w6
zdG8sGJvJdJKJ!WP8g6_>6=K^@InCp~lP%~SsIv~PZ6pF-lYz5MZ*Ijg>T3@1Gu5Ab
z>b3T9IwN?RPZl~GWAy9%MA^OQxBfJ9o#Q)WN~glHz0H{$xznp$JkQ(~-|vtj9uC)j
zZ>GEenfQ<W55aFm!c}jLMp4aZt;;^GZpyJ#5tS$6-A6rXiT9)(ZEaUy)Cg{2CInDA
zh56)gv9V55fW(M-(PjN$o{Xvc(+e`UKS)Kd5v+hU#~OgP*UBW!Ml2tf25H0sZcX^=
z_Qev_LN7Cs?KCWnCXnIY=`cdiOn98zgv*QGO#DONVU0yMPO`E#iCg)8`d@x0^9PKl
ztJ1!^oK#(mhru&m=~{!Q11D1t>KT1ExOhG?#;MtqpB8aSyi8A`iBPcVx+jj5hhMxn
z#sbBSwzJwE&&Z@=f~dS&N+%6vcY(?fL}wGjcN|VC-LD5)d*R3-yp7!7#1$G0W-o}M
zQ00%H=oBbyj;>cuZ0rA*gs{K=&yFgsrIZeQvO)C${0Q0{tUhZ1CVK$Is>FZZSkeGo
z#3~UpIvAEsZ9OeFJosgLpfRx)x$NJVc(QMokVdK?<Stmi5nyj)il%;-=Gb(^ACKSs
zc1JuU(U9N-w_Fn<ZDs;lszYuv)_6rk6TE%7(scCV!6C7MSH#6X`5hQmO>|5eAsYJs
z@hHeH8#R|7{J((~Vuse5JVThLU(Z|`Z}la(F;Yy|&P*TvEQK8mP2chyYAGT}5ynsP
zzoT9-6XZcS@Gi8HF254d4Y{lpE8;ZhT6euNL=b@AVwag_>^xUc2Ji=uWEw6{v<fC@
z4U0tH-0-=T7Qe9T&WDBMhk_`n6X|hpLLWtfTxp)~B`GA`k&0x};88!c3htRMwfDm=
zha9z_8!JnbEEh-cYqmMyA|w4X$En|LHdSgAKpfC`I<@tiFKtZamb#xQnfe}^taPM^
z0GT23vt9Vgu(l`w>Ky#xaZ&@j6&?{c>%}dY^F#MB-<U;H?<35GLLd5t*=*GAvLLr_
zO41M0a#eIWKS~7B9R8}jy!^Sld%Twz;*!4sxNzk~$OB&7o1%|y1F_JhcDwN63)5o$
z31;<|f!HlIE=?m33spRybbOE{lNfXWfD4pCP2T-KAHGaZ*?4&Y^U>YW%pEfDxA`Hp
z^Yd95^j~TOzrl#^*`og0b*yU%*AOi#7g6Z>9l>-esyq(TJy@Q3N+yWp+Y&1e=&l{8
zDa%GMfq<(NI=8az29FWEW6S?#kJj=2l4B+g9%7n4k5kVNp1W`hGDCVXR29eJ@(FS7
zRsCr`RP#OgCS&@+h;yMYSnw}ua*gMN>)E%zeGg_0F5xEXZbn@EYbKJw1pAVJ_A-sw
z59~1;XBtOv3DoEIJycm!kGvY(_(D-HUwjd`kJI02dAej!xg(uH9W%m`Q9WX%bGk!m
z!?coGRe3O_UC&%xe!x=enV7~_^lZPXvFr4hbC|&x{z-p^ok72j?fkf7nvubk_`dmU
z?cvXQRyd0^8y6DO=xmT@Ki@_hr6(&Y1nR9Ilq0y%)W0_CFgOLL@hhQNlTmMdVoY&9
zk_RLtOYVA`&;2+5>jK<!atrN`gyj7#>)~(f^t;BrDX`RUHa$u~VH1P{4Y&_}9_{6!
zV;ct;Ih{aa0*iF7q#ezk%eBvjfG3d4%{a^33O-|R>q*tQHcr{eVxX7X{FE4ql^Z%(
z99(QkMQomi@82&2)XcZ*)|zOe&QGds>upd^EQT<LEffa?5iE<KNO4(cjS<Pm+@|aL
zJY~!Bg>2RMvSjSP(qC05D_IlsS3NMs$_Rok<oQ{@%8muz8lI``Okg<DcE+?!HEPx#
zFZS%{p<Op{*zZa-z6vS%@sC3@_2Q5Y6OGfI938(VCDX-m4THy-d@2q`qBpW9gWQ%G
zxZND)_x|2NO|4O%)WdE0ZryUFoq};wJOBK(o3D%WFR>PHNrP`E<vUY%wkB;3pIN@{
z9(&F@LI8CG)C$iJbdGMvG3rUxx2sN&K0ipRQ{&o~JKAi@cWoc985P^9iY%yvPm|QS
zB65ca7CyJFcXgJVl@ahCqlx+&_SXKcQ>lVV{;KNHpUyNIwBQ}&j<05gjMRs^H5_6t
zNh}(s`W}^a3rE@=@PAo&RiZ;g%h(M?OgM%TqhkTaGgeX%?L^=Pc>I1WiNim(aTYWG
zxth{n1-lpWpraDX$3bgcJpB$%ew#mHQ{Ss_M)7y9$?ygdp%X{!9KoU9S=llrT59xD
zqHO@#)U_nKgL)J{mgPuW@;;)D_y}d<oA1`{yjrEWuDMQ2n~<ULjL?~s8>bg{i4gH*
ziHcs`oVz;yLz0NMQI|zfesw?V_q%K=Uo)w(Z>^9()KU8z6xN4TxjQ!DobGZs>f$}9
zck}%eo?wFc%Twpio|ijoe6+hnZ-yYEh<MgshS3QH8q^@Ce^Q~^fWpIPuY<8u84X^)
zM`LvzW8CK1igzSUxO&8t?AF;-gN}zjgDy77La~nOChy^R`&uRQdfeXz9j&&6p^{To
zcXwC^CX9Mzp>0L_JDq=DJ1mH7cB;DHu;6A7^-`ZE^`$-9U;L2cIz#z}z-|g(?o>hV
zgHcabH?HW0tal}2+;rlxl>0Q?cGeI05CfyCkOp}+#l*_Sr#=0=Au8CB5#`|@v<suX
zU!XjN$%0DYCBhEEsS%9l^VBr588o;&DL#=PIyk1L{=S1SGV$pr?#=6Ewy#X^rMJx2
z^*=_HbiX)_P8ViBLY*=h)O3keW9^>iS8~zx!O-bskgmZ?u1Pc`9Q>li_j2bC>tWC>
zX<7A(Qf0FZ)p|U+mF}qp-0sckw&h1%n$I8ZDxA^Kn>ZC}){vFEVwJmw>9{2r+?4+`
zXCv~E{X_XdB<o<Soaq(`bA6fzMe0~>0NdIs9j}Fdgqqqt=L1vWILT}!$Gd`Oe~c1x
z1poYeg%SHXn$NBF`NB1e9~);Yr--U~%?XivETq4m_MZ6NS^BF*dZz&PD9`glNRLVN
zUG=!^Agii#IhxC!jQOw>=3C%9EjAL#TF^;K&#-wsEPCH+M9_|G0y~*(X1H`|(M~R|
z)={-BK~nL?>Fy`p#?RCQJ}25U$`pbea|1_)5Y9>%B8@&~e^5A}po#hYI_dKYF}LD0
zUcUFmCQzu&(AU!wLog}RUO-bC)ykTR$^A3S(Vxqpr4``bH69V|p6BU!9A#LmTF|6D
z8#d|^A0#W<)uj>_$VNZVD(7&VQc?EQ)ulkInp!cstgG`4PgH)jgNx?TI2X6YBIMMD
zmY+!J@TFr?;zSh9itDm>vM}63-(<gAbK)71nTe+{X6cT4GxM&zDi?10x`E<Y+6=ej
z)3!4v*F{UH>Vv8lu=MzIx*rgVH}Lvy4+73W?wmee{XU6$-zPlInwLjI;4G2FC_T%l
z$6q~^UcXKx;m-r_onSdp`Q#vo(1Ft-f_){7`s{VbB42sL<y44|<ww4h3v<7=!ay6%
zc^TAjPi3#)79K!+kplICOYMfsNvoaTMG5F-+m38d9ugWlM^aTV-xg-I-f0`hCM1-$
zokDAoO%!ZzV?d8zoVTL=eu?H7A6$VTQMX>aesa8CvNdD(#p_H-Kt7KBhGd3@H9Aeb
zs9t<VzBv^75FnM7(%&a!Z^<xwdq!`)BdAJl0BNIbOwaACeY`NXK|EA%ntD$;e`_Th
zvffGMhU+s-*;ZNmCY<T@i5>rPcrOl@d*xkW;zOV9XPXwg)H&SjS?W|vQH8S?*$*O#
z#-Kde^Vy!<X^}R5$J<QX=Ay?{Vx2)z&ivT5B-T1F@BOsfsqvXw87bY89fhW7&TEYZ
zHw%dQ8--tvC4z%_%6U*<T@$t$ZrWeysN(Rkzj$DkPZ0~|*)1-FqumoF(2b-e#<XK<
zgeEMoBKVMujB<rv$%5u7<NU}c8p+5CXOmQ)<JEu%K0mHqf6>b8eZ1EyzGF^<Hi|!D
zI2#ny@W!Gz!wwlTtH&T#p!ZQs7X8WkenE$H(5<^UYPYm3vT~>zHa4@RgqarYD0W?Q
z_U_;RJa}srw;4AV=dqQV_MM%Zd_z_0TAfVZo+`D)KJHOCErwX{1__lREgA7;q|wcU
zil38Zq=B_)6H1>|yR#+ZBT=92L`Dvu#jLyhzO-Z8#-1J<OV0PL)NG$D6|KU4T?~$c
zQ^@peKWfL`-nfEiJ#PJhci!`b*Jnqp#!kEtl~vc~;EY04Ljd;eHn|T*iLviZiuId5
zKT>cmutJTooE;ChzS~rZ@_Jn>JwqH6o34ZD5l=F-z|mW|*w_~6HD}_5FG}$9Y-mFO
z!7A*(0_W-yy_I-{MZn%d*Z6gV2uj&!F}SZ==01w%bP}y#{i5aIqxa>VMXjI8OA0f-
z|6b`Tv4xvzWd(klLMH?3$zjA@*`Vhi*fzcGS#V<<9izubF^S9|w1l$LQpsyIxaE`5
zjjYI*du=OKRwk2C!KNrYE3tzyR4$(I#u&N>UAD6D@A!@v2(leEzVkbmWjBRSUcD<>
z@<M1EwS+`jmR5Cs+~5|ej@6TD4m_2;;k1kZj)jYoeU$$HRkABm?!(N)l}k>Q778?r
z$Xd(F!&CZvWBTLt#_+c$vmI%>cje#xprtptx$2&5k{hjkd#ev$I}X>*0MNoU5JV*R
zH7ocNS~GY3x#?EQ-)~u5&%-L7<K>cfn8sf{8MXg#*MF0RpQLfUW3=Al)r&}R8645=
zGdkT>@85>P(@Orh&t#ndma>f9T<I@0eO#+kcT-ya$W5*P2^>?sN&LCK(<9uDh5(g9
z%3$rgjxkK9fHlo4j8fQ@#9!rTV;yJMlnbF;O+$zL4m-q}3a{<KgDAUpD~uB2byx>(
zYxj5C1fjA70PT&k;N<oQk(K4QxLkc2c|kb+hV7Iv0Ysz;Z5J;#IhtdvYTOq)8o&1`
z)oXr$s`0(2&FejW9J%_a#+Y9KUSWIpVK-I6NRhITsQy(N4|7hJTK;<>Gy(Epm5kme
z!IN4_Cw-<iMaVLH_ox;OG`=yt({lPnsAoJ-_d&hnQ|p@_)}QbbmkKa|<jxzfg2gxx
ziCs*ecE72zw${_${^O0CSBM5#@an}7q{OU%uy3A^hu=tGJy5M@^B-vJXPi!cbpMu<
zF6HT0pW*0-3zge{lk?k%_|CuW(Nqk)Z*)$4w<F@B&fyH_0DQz@iG*9g=9jUQFn3X4
z@1(A^2yNnnw;WEeVDD_nz~*(<r8GSkP{hsm((?`kyu@zGM+eD&jKe;wH!3P4a-N{^
zU_(=Uc4Bfc?A$_@)6W?5wX$H6Z#|cv-l6zb6onK3FNpN+ProMX$+mn<$E)6mt(&<%
z5;-1&rKSMld~lg@pY>Y$RkNr6E`0LK0s9ikLi+B4$V(*vFH?O_7oxvDm7Ci4t>=fc
z#bEnI*@xGFascQf%(WkCzm{xCpb-(uuI;lg_~v{EvxWy^Cf&J@;(jw4(QA;qwtQLr
z@rj_y;o?OIOp_yS6U|hF!8HkT70=&%`eYPpx4X(fYTSs)^83n)IrJ?yGFN$)Z`iol
zH5{1+%RV3WhpFCI9PwkfX2-9LKh!}*MO7TA@{7%1VZS+6rlvrQVG(lm5nf`3ULH@&
z;U@wt{xg;Abbxg=JwHVn@@nnRv(|d1j<cvH6=ii=a~71V^WC=jHF?k$V3q7=(3u#S
zs#Uk6G(5vMDiG^a_G#~G#}gH+2t+f2(e-7;xccOiwgkvB<Q11kUA=-b4sseiEN^%=
zRzqKPCrkNTgU)9!)`R)fvIplCt=k`Tlh51}*Z#0esh+T;!&L5(DcUHd%6t7?FVnmg
zsOP91hzY=iG+lud`$s!&AdC*J?~iY&`u@Gl-PmsK{;T)7VomDxwt(XVcC=i!vIJIu
zJv2c7^go`nx#Ws=jzLRR0oDO+7o+#7^Vufbk&Nrv3m^RlV}(;&xYjpz$Ii9fem`-~
zX?bwgyk_SsFOb-GW<2pFb>;IgnJqHk9YjZKs+VLc^nPa~P)GeR9&y9mKlHYt#Y&21
z3{3dtMFBHoO4duDp>E{YCCcizM5>)*f-aosqCzWJG&-trtKWXnNcFK<<`~CsDu6Xu
zC!Ni45a0ee^VQl)o|+P(i*?(jHo2Q0WRe`n(*2>G3_Tpo0PR1NH(vHbzZ$1QR)IK=
z1}6|^ZQb~$#~*v8_5tIPl8Oq7(2ro^aM8JzA`HBY_CQopmX{CfTW)TCV~UM-%{*bc
z{ci-4Hp*r1^&Xlx@E>@RDX#;i+~ah<ExKzF%ET|_zgz4Y!uyVa+7H9OQGHdmGflgG
zrpoj>$O$1j1Im#i18hfc5v1g2>!ozHpx8@<CEKL`G8Z}FKc@LUi1Ja!{B7OatN81_
z>+?|^%G*OTcHhxw&%GSa{&qy!MF_%FoBm=S;DclDc1@t@Ep6~KyRi;G!nx}5NXKAm
zp_AYFTmWj)A0DL|bl7Nad?{de2aiN6JvY~^nj9Mq_^Bz&+g=rhPEOGbnRL9GoIE@Q
zI`1XO0q^c8fDi9bF(7zQk(?6bI0aLSmB1f~kPxZ#B*%#`f^W<liEOy#co%rT3s(T0
z0n7969y{sg32L!S5fb3Rev|-b=*~~zWIMehHG9nHr{kEA(<LnzG&(e-?4Ymo9tc=7
z{Gj$x<TqyB7WP2>%{(L{)Tz78Sb|v=$A7lx3zWWBFT)@PcmR~#R@@m6k{7{zzw<1X
zmm02J3B|<h`VLm6<2>9{Tvs+vE%64J=yl3*0}%$59Cd4TRdKUxx6UgQuCP;D??gNX
z*Z&)z>=5V@{~7g^y%<*l(v5z-M+}{iD5J0_mereo%G=stVV5=h%+2jRTWO0Svub7_
zDOxGcg*F^~8}x+%?{??CUx6fqEfHLx0+?M_VuxD_m`+8K)6>y~qp&Y(4Yq<BGfbF_
zuB(1>kML)T>soZ;x!?l6h}@Z2+IDN_06r_GL_o7a)kKVqHl!c45pi&5u_ltUU=a;c
zxUJ>T{3j1I(S+&FPl~b{IG6z)&Z=IKSrxzz#udQ58N&&-(9Djp&zJ~rzM%pB*Xdf#
zGG8B<!$Y6hNASp1X1{%*P4<jkc~d$e5a3P8XL*A+cD6hCWPl^vnR-2U=x%V!0)<!L
z&}uZ|J(p^{OM;$v6uzE2{SB4DTLFLT@~(u<R|hFdec#OlBh7^esIevk1+`=UJMJ|i
zHW-Z<VsdxZ*4#5{3+-6uhh#D~nKNFdwQ>7o=Dgvc^-Z=+XrtU&;q>2vv+_cnb6o*v
z-{v>hEB|)$9|bO$Z9+a;Q9&-)fBv`c{QKm6aE6TY#fC|!)ZuBjk?qE*jUSR{Uf%oX
zA9iu2&kxU8>uVctzq!tS<S+^UMeXsq$;sc{KJ8zj4|Uj!yqi=t2WCDye>iA`@hjPN
z)jf<obYHuGpp1dMDchN!zOhrboED!Z3_Dq@W$`}ln^~hBEd%}YKCMytSZ7K5)*(<?
z7CX2@$u>0c8pSddFUX%q_rlsh*{VkdFW-ex@V-Szk;^r(d!0-%(Ht~Xm2JU@4l&1z
z5Mvum%NbX62m2`*MO=$N4Ch+%+xMOdpI3A?*2Rd=mm0NwuC48$S!A%PUk=4N`T*R;
z>+hA0Kte!wza0p8Lr65he(s!uY4;lBs*E5a>LILQxBBTFmco7U{rncIw?nz|T?5rI
z7$%A7F4dhMqRqshP_H6ENC3!6ACcu8k1A;+uqM3Mh;A(aE!s64Y7XX<mtEW-BIwJO
zF`cW<ZKFV=*M*VfyepAC6pQ|uyL*iy0W*Mfk6)Q|K8yFXA@^PXWN8B(>dngA?ysOA
z?yQ718V`H|b?g7I_195Vec$&ujDm<LV$iLGG?Ge((v5T@A&qp0DDVR56p(J|1}PQk
zk}d%$=`QKtx}fjRcRb_qA06tw+<VSGd(SoJTyrHnAWYYuQzXRF$>7`zp7|sGT<5}+
zp6$Upi3U4y^4!iL?d_Q|<rFz%V`DLP)Z1(+Po~^#%R1w&>eaNN{TVt>A|=H_-kA>1
zL7RPal+Fqjlh?6C`&0!E%9{n}dBPd+0fdP~hFr;5-AYymQ%-9bY$#wQs!&WUdw>b^
z#&Tq!i&iZ<-`bYIX&>hWNf7v-z<WX1umGLS_%al_VpF5rLC%SfMz0~vOY^<iNB*He
z^h3|fQ%oPBz32MK$TrgapMrQ@HFEF@u#?absBQ&EK)Asgf}&w=aap<?HIKZyd?X8I
z(x08Z;cW?;q(`~b@`{Ms<?e>mk`y;+oOVRS)FcaS7EnI=4A9%^>L5gKCKB581=tnR
zJD&k02FeA*((k)Mf~t`!*ncT0k6Kq#oHb(x6BieCXEmsbY#Pom=aat8AUBOo&3l}Z
z!e6y-yaNC<Xf_OR;F2#o9efmV{hA*0>6nf@7J}1>kzBdpZ(g7Qf3u{@Itpk4SGF!q
z^M4EykD6pKDrA(=v8A~271BqRm3YJX!_hsvrATzok1c~ULB^AE4*Sn`9f~0I@rivX
zza@AFJa<}MFEkjU%x1T$z?$nkId20kuaPajJgh-Hb<(E{(&=HrWg21o_1lc$6!=oX
zQ(rjfZL$NNWXWfF`Tn#utb*z4PCVV#M6q8)*s&vc2qSE6$FuXwv3TK`pvh3&o0t9!
z2g>a~0W_iZ8dDBmhR*(Z0YYfaN&a6NK<;jdApw<PjLIXbD9>%j28-L#(G66&>3N?;
zh4nlV`&fZ+nxg}PsOzz;d1&0AnyY<Y_0zj$#Sx=5qEbxR!7qMxjHm@Z=#YuiOOL{K
zj(Tb<+Y%Q$QD@+oP+l3DXoc`e;!K_TRZrh<=g)08t#ccFzj0=4`RDsg&3W?YyFkZY
zmIZY$T^WV#iHL!*N%{G!_ppaLU<?GdIXVEodp+Y&n~GFRFSOnO)Z391)#eZNs*OrP
zoTMw<D=0<-Qo*QDe%x>B3TWfqw-bbqZ6sNMEZKAbng})yWMyO+>FFV19R}{@Q>Y7x
z{Ce<qnHqCu%gMCC>8ASE5_9LQXBFwq*l_xKJ(vxo(SJ*n<+hn&n(%-YhWVxkK}T&=
z<vs|>VVp9pVP?3l(yb}9lSFGg+Cgelyed9)hmV2-lK~n>mTnZ$Yw&Fn<J;!R7%bMp
z=KsD4qz2mXw(p_PG;?cT7?sp4a_=<z+i$_DA+OgK5G0Tb232e*N`lcP@xk0tev^sb
zkV1YSwrl$yu~{a5W@y{MXO(Tql~iiO81dt=*$HVXno5Pq=<CoMz1}|PU6d`sN$u`_
z_-T4jznv0FT(~wvRnEXJts@uPSOo5RgbrIGn4^7S0bYbF18j_5cTfaJb0Lo2m`y_a
zPJ!ob*5Fy_SOe5*`n$Bb-tTr#K;yI=B_C`SK_ZNOiAol4bsvkN-ie55WeO~eA76v{
zmTV7m33^`^!*oqJILmgnd8WP`d~K6cZ2vY~@8CprP%Bc!C8^k}YVB6U4!t)i;Q_q%
zAUHfFqRQoPdKguHmi^7>+5hiFK)1^z?o6Ec#Lphr7O;EtDOaLPuPiaAs#a!ZmzO1`
z4O~5gOjhAn(#B3X%YzC?Pu&lb#&8Q+9aS_a85n8f?QR%)Pv-y=7JDt=L=CF{GxwBv
zM0cbew;BS>-fowV2WJ2q!9?r%Iry*eSp(eWok=D-)V;>njMN+eooGU50rUrcl0k)o
zdsj}1;@%s9FVnB7Ur|G<g-vd3%dDPSG@I*Pj|P66_ifdKz@u$B4cU#ihNc+UQ4Ow4
zQn*Wcf1LBBtH-Qg_vI-ZCPYJNHK66`Id<c#9M7+HFs^c0ukKkO$K<_(w#ic%SP7j}
z4G-_o$QMxR_q5-=Fv_c&nTOIi&<2}<k8R%52KkLZ6P1<?!@TV|j}kBcr=q4i1%qUo
z@#wTi1aiI4IAK?Pm$?*>k>P0mYF|~!bCGyFSBoHj_yv1hnZ#RmkVpbLl7jV&eS2dv
zh2+nA?PJyd^p<5MCME_9Pwa(w>YQbb5kH*uqR9$WNa->fER!{0Dw%z;q*{9BOc+Zp
zaYl|Ad3_BGl5t-Yb4+6sNiMFXP2-yhOH9<agruszhNH%8w4~QZOK=1dX%2g+r2GA!
z3k~|qS-wj}SfZhW@b%L?^%XE0k3NsQ`|!Yc4$;Ys?lTltLb+Cpt>OCiYcj~nCS&t9
za7+i!wvW3(7dg+VZ28m;x`)pq!)L)?2%g<-8D8^5mvzn->kI2OG$iRh&JLvR<M!h*
z3u!?#^)h@*e}oZxo|%>9$JSf0Wct)+TTEX~`8JeD8hqzLjnl7dCg!fTQ4Tmyx_Bt+
zy?>yYD|o@|umiJWGdRWV`%2HykP>{zvbCQ-(|Tr_aJ?X)Y%>^>dy56e5V0N=vwQva
zv8}JAI-k6y2ShLvDs7(>IcZ66K(Y*R2?}DysEBqQh5rH)uLJIYKleoC)f53?yQI1U
z1m(7r46WzpGV@@qG#oDAAyKxTwSjV<<flY|CgF=<%j*m`;7S8!9Yb*g%ztd5Jon%B
zFPRSz-J0`x*=H~`@+SV58noWA|NiFRGF#p;&zMtu#Y8l>BAQKckk1ETCq+&oqF!#X
z21Lpp+*4v+K08dU?VJO%8BIT4TUPAhEgTZhs10+J?kVf`Q(f~DvJ8(_3e43N51Lh}
z`OJQrJ+u>XjnEH41Y86m$?=2fgmKC4=ywDTZUgaKm@|zKCyeO1b84w4tB?N_Ij&2{
z-mX{tSisLN>3bet(gAj(=DxI#DRVR}Q#GRM?Vu*M4ueaI57)S(!!UC1dkd-O2~ekq
zR$9W^%(X{TP9Ws8&Pl;V0k$b!J(tz7r;bCY9zAOkY-0|dyB-PG!ElymJNJDCM6|7&
z-)2A|GwUJ0M&NU4_J{65SeQQqY&dd@?>}x8{g*g^XpXV)*ZY8cj2`RPR4vkMDTj)D
zI`>Nw91aQPDz&V_ROG#C4lLLAhdtAb^|Q`&8zQx+`eXlFa(2<*Xnkh30xYS8==E1R
zF81xZ&VSlojVG=urRjG?>zv94q_(oj96s;c-oS;!YPo^((i0g5#w<X6%}3tBYXuF3
zm|p1M<pk*mDYEsnjo>Ny=RPW8oD1<mcLiFXsP;rXcSYx~br`TXZgBjbuo&dwf=@Sy
z3bM_%&vrerZ1-0XQ2#THf5|;|6ED0<_~YSb<9*M=m^pO_z^B3_*q>#6b=K0<+?id5
zI25^<JPrBJa%}R?oHc@S;z1hAJ27=RSlgC|gx+`2iVT|9Q3*II9(=<lxOA90ciSc|
zbYyv}J^@{%(|fXv?{v3+TksRad}zr!Lo3T;Y8<v9FoJf3I6~t@Rw9L*cUYk=ozksg
z7l>-4gGlfZ3WRB(yBj_2f&1y+@P!v)aeVjy5gC2*-KZ452RTl<{Pi_sj*C!&*3ps+
z5rUBAYyG7J(HwXxO{dnk?sz4xc`z4Gorn#w=YWE#8=*@fa&fi1a3~HQ-ADO4Z$o*{
zA45Bh3=JyOZKS}Ugxi;v=U@2$T2PX&L7U^FPNzgt0s=^nS*h=oqXvI;&koO_Y2`Q+
zEZ;J75TOgSY4l`jSp=`>@A-pCyq^-A97=g;`BD5y2Hmkf@cstKP*acbLgzLH#^6)F
zvQNFe@=8w=`P|j(hoE+l3|Hg}hqjD|y(~ODnu)W}-(ypR0g(&m5AVV@#TWx@Mi@rq
zxj`E^AA)9|jU!xCkt(}Mo@dpXn^4{%t{pT0|J9-*rZ&JQq8Ueq1_S%DvRW2)gdc`+
z6~-($C-n@2zkADRPW!JVOzI|J49E|eejU2cp7PvT&44OV_c67&0uKqG@%b(TXl~dz
zkPOGVg$h8Q<ufou&?CT=VBly|=FqStm(Vf`0huHrlHovUE43l_)hTr!kFD)*V+m2*
zgLBUOOGzHBrg-y98=LSvN>OB0(#u8xFZQpshWW{l8_0Ow(m&s!i7`7kzS`6JbJG!Q
z&~%6nqQe{$^}@3R3|R34U@rV;|7y+0BWk&za)|?_oJ7tTac`kQcd!|d1N0s3)w0mh
z+aoCoeVI_6bJ1KFp0yZb#@Bv7m6>ebIEW|&>TO|%CjPBEUsC@P1aBBP!U>D+)2f1m
zEqDj4{lhDi)6ZR6Xf?FNUqhX_&5m~~+|J`EQFLY4Y>=DsI*I@Dm&K}(0Ab7M+c<r8
z=5cS@39Cr?FHvO%1~j@*W#x>hK`{ROV1w!bgXFRYT2TE={aeIUuf3V4j08c-A1ecE
z(R3L8j|Z&?gk(1IEvWrb1VdYPDAua!JD`!)kUTDcg&jx}8%GG!+BtMeEG*_Ovt|1<
zlkbMCRQ2iJt$8sf^lc%eo}IZ<BxgD}`daFdAzu;ExKu<1#93Uzh1~#^<UX4}{>n|@
z)Hm4#E?VB<DXKtfvEFjh<fNTFs+w2_bBUtb`<%Pj>703P$b)1u<!z<gKiGKL*vf_`
zQYHp&w+8g8dOpA5EkFm3pXcP}2<HrJN&=@~ylyn^4~CkmeOoftFZX`LTxoaNGtZ*S
zKo#J0KFM%AInk<~l^G5>t6w2KCxM6g`P%CV>^g%#JN%8M-mij3^$qt`qcRY|4lB(i
z(0gcKGhtBS&t8`VcP&9C3ryRyf`W8<Y84_8I3|3M<?!jr_Z6v{63WFuH2a6vO3W<+
z0H7yDqu;6VzS312vaAH~KJ(^(Spa#^xcS#tbuF9~d}wgK!F1M*18#{(i-+_b;XZ)M
z;D!f*<xIhl0kR<m@B$*f``RG)Ri~V(tvn6Z=$A|S(JfXBPZYtG9P}(PijquYz<78a
zw+SFh9nk=yD~)J_ZIi&k#U)fyhY}xb<usMy=d^&P&A~tdqI<QVpeh)3eghnZ8XkPA
z^PdKb3Bh8$>;?5@WIVR0(vi`@h5vlmZvP~IuJZV|P1o_qu|}VpxMv5IlMg-J_>xWz
z+ZR*_>`(vC1=6Le^MOSbQSg+%ffDuqSOYp5CNLz&xXANj*#PGc%V2x>BtO0OIP&ZE
zVnol7-98jW;QBDoxn(+h^2x^x`DXBIAK%d2LvT;EB_SM1(SR5(cxw?!fC1@N^uVe4
zq%#p@CKy{l#{V4@;I|9<X1B5@1rA*5-UxV=p<P5&3}InkK;1((zHN5!3@==z7CmYR
zza^OdyxRXSXjZ*F{QTkc!SPCM*loYYB|8@$5*xRR*)#;sdI&4{4Opr}DCEo>Id)uH
z+C{;^!OhXiwcHu3j^J&0fAA{9JetEHd>$yRGv)B5@y1#wv3AA<$+vvWHLj!^nXl3x
z5jKJ5{ygj~(@DWnxShwq;b2+S*7`uB5+Reqxyw`Iv``S)W$C(g$6}R!-DAPBlrO`<
z`f$5ajrpSO^2zKs@zNZ79&J^(WC^6sI*x}7jOq<@aAt$MJ=SBbFi(fi<Ez9g4qP14
z?AuCUw%*!bjL=!^{Ow-NCa#Z`H96{FC%_(HQeGOPfXly1bXc%Ao1}oX2ezwgDS8&8
zMfeE*E<cp+sJpv+e!eA^0>B9H=P>H!{Oq7XXBG(<*(~bi1sIV9{rhU)N+809f;Bj_
z!0*%77?{~^q6vvP>GFZ}IS`Qr=`ch|Zd(28$tF|qa>z?4sP%@H(T*g3IG17K*w?RL
zlg>{TwuK-_5Qv<zC(!m}R~R;2LJ#Q?(2#rM5sY>Z5*Z#?gAu%dlc%Tk`c$D9UD@Ej
zl8@-%m(%X0)2XWUIOPI63WiWcUNtmYM6{(NB2y&R;~fi3;*|3z$P%hGUX30;M^^K{
zDDc~a#zt~V_i_4mwmHhaCcF4(w&(L`pE<p~eR?y*6PbG5a#T&MD>|lZdsMiXjOT}%
zt9ZP+wId9Aa~!z}y}mCJ3`|WW&VItg9zYfl$yD&*S{M#4LYcXBYp52Wi}N9X<Z3z<
z_$#R~J<xEHT*5$-d<EcThF{M?Lr|3-?LBymHWbXSdYp|b*n95QpTwU2$?CxMj0KYj
z?}APg;ucMq(;Nug-dn8wT!_hb1LpcEAwrSjGUv_l8y~}ZMADsrN4!wGPF>%sM0qf3
zRPCWBgVo%2v?5P30z->e8|di?mSdQRHomZU26wUWZfLr``{6cJGAGc47NxW1;2QKA
z(THA%{j5@i=NUJm&3B++k(9>?fmtAZ*Nl-{GEz7$ApyFvx44J764rmDJj0+bDrwr+
zKos;*xnR<N>Mq;P$Zt8aJR+jr)%rzZT$_giT=Zor%Y<7^Dg(k{FF#}@))hsDAxtHT
zk4J0}<;9D3rYt$Ey-H}#E}zoUy5C5*tau%ED5;jXs8MeKt6*yi>!IiZksGT}Gr{xJ
zV4>AN0k`{orKJk@*z9AvmDevUj=_fFt71ls9(Vm4_Vm$tspGkzg_h&wpFQNzEBfnA
zBN_fL1gcC*;!(fNlUgO+ID62QSGt42og0;y|0>!kfYii@cQ&)2vM=vNi~THdjwBDw
zv6Ev)Id_F)^BOMiiQw7DdZ5A8#ERHaKgpCXnG$h&vah$xzp4lz-F)kW>K;D3;_5?x
z<MGj!M^KAx@2Pu_DBo|lb<Rh0-}4u<va(F7)6)&=Y(Ap^in#nG!1CA%|9hp~4lBXx
z`%FSL6EqFYlb0PMbPII%L!aZ=jkl*xZ*G74CUG+^FNmJjC&tp<`dvnRQpQne`L!d8
z3U9Z&QQyo7vY4OEyQw_sAjKL`E(}D*x!q%5r)6&9zq>1(u{pD8EV%ZptLw$1o}Zk?
zDHAV<<8MoU4!!q|&fq3I(xLYV%-d(TTK!_Z1NXrF!jxNkcC<pxJc6}stf{T79CGVQ
z=#3d)Itf%348)V=U2mA2I&8M*avGtN(Hk*P=Hd<`BxR(GE2EaGwlid#m78B2+1O=9
z;gtEeHOV&H@rX({N_ti$2yv_DQCd1%i|2}R6BuWk2?<)T#~+GIw?r}By4B@qzu|T;
z*0+g<!K<R{_Sn69jrV%0Le)rL$!v@kLC|mI!Sy-PJO{&6n$8oHHpr`XK%C1ttOP+X
z13TvLp}g_k6*oD<z68}$zUFE;8j0~bpPeN=PBACZbz)yql<bcFk(gcGN9SPh3@^)|
z?2U!m_jx}Swh4|36>;yd@=8S%!^9Cimxo46OzB?lW~8MCe*Bu}{QPeEQbklgTc1y3
z0KPKWj!y%$6l{1fHFUysl;Zi;Jss(D)wDv7Pi1@KG!qi?`lkb%i{HM4zFejf(a~V-
zpVS+f(i~9L8QX6kFng7%@Ie{{dud4n4{vO%gOQ-tQcssUqnk_PHd~K&Rp4R=uYaTc
zetQqOdxHr0@ZZZ!22DoIsUTW8S*}RNt2MlPd4U<c%>BafN=N$??0(xGCe0tZt~x~H
zt~0ss`(^Z;CHqVBhTl8AucMWisP=iU$#tgJzb!+S&+z&Wf?IU`5qJTxw3J_F+qkZ_
zTrd1s*y*@!Xgk?QYFQ{{5RN_(yw>Y4ZtC)0Vp{R~!?#^F=Z*~_n&&|$vUa9%T9R`|
zR+&?O+I}{i@(kv(u39lxl)Vcf$!beVRdCvt(@0<Vnx1*Bc{v+v(j{Fftv8ayWYBtk
zVB*b?0*N<tWBa@s<DTi6DGlhW7&3N};Bxt>EGZg7PG<XT=5@kxr4EFO-lu1xM+6yl
z>aoOP4bL1LTX1M-X#u(qqqt#sEjNrJ2a=y>w|?Caq=GPCdwY9o^j373H-hfbpaT>8
zW<DXWAsO5>=qT}*Irz{nsiB~`x%YX#`jF{2dF+RQ=qO%QGwQzOy*vyRooX#Y!hU~?
zWY6sz7J^^fd8mu)2|g!q8+T?<^b~l`<c@`hACtOO^%cI{E+|NMs@uRE5>-+foOKQ#
zFVv>5D3hu!V~Oa`|1`l>|IuYK)O#$>du;io<jqr6bCX`{`I7R%lSA9S-i;7^+DDk>
zQAxZ?e}vq((*~EEg89#l=F~|OJ||aDWvpHU%LJXlwbNOGk9S~u$V-FPR14=;pSk>i
z0CaFFY{*_=dD}AEYg*nq^-LK6welTd`P{*yQx7cMIpo0aAEcFGj!(tTQ2=_ApQPi4
z$Rm|}>ph*(SSOV9D2K9%-0o*`s^=e9Rge5U0^L(bRG6tp$$RSi`cwD6KIc0s<c-qR
zAED39Aay|bJV-jpTuD*Y`KXy+#_hi0qu{87r_r6Cwo9|xdlJK~<XqCr*@mJP-gms0
zz8-Zl`ksM-zEUtt(k&-67Scnp;e%lF?4>(&;57Nc!SsvyCd$=RYG0uT1pdr;x7vG{
z#36bUsy1pIxmo8kNYr+E9QxoVGd0HLJyg!McXq}%yvB*--eL2mB3n$OLh0~P6O9zo
z(EI5otDN&+OF~Al0Vw~l97Tu}EN{rrlye8y8~_zSrZU9fI@HXQv%N$~GzTg(E-tQ&
z8Bz!*I=;?sLE1?#DkwkoZ#w2vgYOPJ6UrT={fswe&aoCVQy;y)<wQ$MJF_m?$mPsK
zQaak%ow(o=wFA9Yjnlf!vpa`ajzs7_s&ee?>}^kA%31E9!V^*SPXT^Yw3+L7&2{!l
zZ$aHR0)bjHq-B|zRPkLR%!PxdE6dB9OxlUhs)Q<FwdQ>a(ugRLlgEtziA7%c1H1Tp
zaILzi_&$41Zte>=>BVnz=UsB1AxI=JAA%)8kV6{nOXm1zdph(hC6HZJf{+7RwIL)8
z(S`N~;$~gyv|-6lr<cf3V?n-wOct~+R0Bmx;5q*U=0HK^Z1Eae4qz}<mw{!aEA%!j
zIFEspr`Fq0R9YJ05O}Q1!>nBx)4jfPIFRoI>~}Y~`yrNA#Jk-Br1=Hs02p++4vN?o
z4!RFqP73kQt_O@3WIB@E3QR;}0(YIDog+x$p?LVHxi-EgSF<r&T-R@4fv2FtE$x_f
zCRrNi@q6u)2l2!d;>#Nwuh?_ANO5kVn!E*71CJ+}-&bj|qoV^v`6tOp>#M7Q$B(I_
zTp<M;-N>&orPZ(ZoNf`29)y`q^G6g_Lu$NTh(!U5mjM=*?jPk=0>pz4>x}IBCXZF+
zI5#&pq@a+|;RQ7dkU?H$jy2R>pt3!e-#v7L@L<cz7lMUWG%rYk6QFetSnMXr6^OAX
z_`T}D-Ln($Tep>YTW|sJD+CI7*;Q*|74!7G5CeU6&jo3~3)t_GQOJ>dDa4mM*sg%9
zoQvK2VF9t~srLg#$Qj}x5GC1#CsB?X3k@av6E@15ropjm`g5;fOk98_@#(O4J=N|c
z_L1PX-$IfP|AK&ZJbe76wKb)m-L0)+EKjJL<Z6^2;{l~yqP7A<UM0dQ_^Sp@GkzO2
zxvm6sJWDnrwr4YCi-QRVo$jrYACCkre@t9|D=17gVp>kCZa+BOTu&d$I^$~wp0Ul5
zbgf>xN$wQYvE(mzgnNzI+E?r%+Th7}E+44T+Yg7iU)MiZ1Cqdc6;Z_88iX^-W6g4u
zN`yb(cx#1XdizD?kCa&YgVpg$rbAa~e1vV>_o}|dDIr0qb=ut<zHTtPB`;J#SoQ`j
zM`SV23rcYX?<a~wV#J}l`qWrsRS{S7?xJ2!)k8vR73E8Uv=kqq2XP_qf@noJ?B<>g
z8Q>h)w#rINOM5gVb&XJ9qVgF%LuRpLdq}3oj<~^44m4lOXlukeZE{YWstUs#d?Ug1
z-CXk_zV6!p27HmN3=Rw+;0r;J5#$W6YR{cgOu~qSAd&tbKfds79iLJ~_)B*e8nZwQ
z!v&AkTWh%T-q#~Mq-OEpm{R=Psw3wtbm>ExneU(C3R4;Ht=!sbrz<4lbNev+)S>39
zt{heqxM!KkK=MyZX*1Y9J|$%POdNzsb^flNxLTbn*$Xa(05)%nbsjA#5z)!yLA_|l
z-rk;7-Cp$PR7e7*hcL*<S4uR|LMcB)42pnWp%CYLZTuKdF|Z3Q)a2j%Fr-!&*k>SI
zPv$l5Ha!6|K@j~ju!LacLjvQ-ngb-7MeOv`E$^nhWZe@mXiFYFgJn5riY?qbZ!@~)
zz?M262w^F7mco1;G7!@LkSr%Z+FLH10;s=5YETy9<KjAvY+t>4)U$Jj7rjD>#SeAy
zAXedoC5-MhD&@R`CYSvpsiT&z!VTsK<%L$IE<Bad>fPb#p#|zE1x+!le+h9g#}BVy
zqddSuhqMHUMUqVfHpZ$lohu}Vrg`25k_HKaL@r!0@I<}DV8Mq->1!P~Z+5UI(Vf%N
z&mG&ok-k@bX0}I)g=zrP$_;$xT@Nm@KSe;;;OQ|dAA|GHl>J2~5qP#}vND2TP~n8?
zZ$xHRD#qedahqlce#=)L9~gk7-roPBdjdt$hF3D2ibl=hsXn4C5`D==aD061oLHEh
z{ll~z1_({WHXE?OCF1%|IX8&=fsQ6T;4^O=VDEk-fYj*o)f66Q1}<)H@VpL|!cGeh
z)np(zjKHk|>r*|yqM)D~LUr`pF1C+=C#9nwPx2uZO7PC|@-p=|@XLwpDqmL>UDSxa
znHr&BcaQbKv&<@eOF=b(3dF&!da_G}4rW@8t7bNakfMgYkn&O4JL0DIYnT>=LI@%n
z-zqt#Ww|=G>qMO^u6*eDD)TB1_E&NI|3n><x0)$N1ep1gPJS)3vVc=*o0Rmh&g&y$
zgN3vzE)p!<^<eSWPXdfpUZ{*pd6TJpj4`_MT=na3p{dIT!oQ_QtB%P&Lj;95bGL~m
zgy_v~p}hSHqndr`%E#B*R&RzFQTKW7(QW5P<KoOEbnbG~vAfhY%e{MbBSf%G$8&sh
z<~%g%72wnuzQf4qn@Q)hGco5<I;U6xS6LvExWwCXMY&6b^s}6^aN*#JJ-od595b2c
z3R%H!X1IjEizdw_R5y<_s(=Ht*J++A!lazj35d|f=}^ymfzUXdkdW}9+HtK+Soo+=
zxec?FXNMC2dz8iCh6~FXmCE;jUI4<4n<a`SE0x%|B-Lkh(H_4hPh1<{PYI0*`690O
zk7$d*7>g*KoZ8gkXe_!jh$&1C?fZUj>8ls##fJzb?8Enmsp3F22c$wVA<Vqa0Z1~8
zRv(W3QS;16qrcY-$zMrP5sYg*6s;&EAux2|s_+#x$WI;x&B4Ii?kKwW3a?XQ2zOHw
zfP;+oIYNpL!T0{YHXQf1AGk*En@>VaTdy%hSUwGIwDk$%c={CYw%`0Ce0=;cri1-`
z8zRVJ@=EoRM%>Zlg2j1$$RB|^&8#fbJa*7Ds0Of0c>49yS8DoBiMYbOvs*uC<2pNy
z*-}w1xx^u2IJu@e$QEF@4UBMS`l{78<~Zs63N}Qv>m$a6^+%>UI#Z^Ps)=AUfs{oJ
zQvA(nbhQW8!u)J(@1V{Pf{PL6AHNb#(YCNtObRKtut{{ihIu7$w2sm?Oohx`Hph;j
z$z(+kv)@6Zw?c~4sL0LE26zJ;e`+egDu{z>LL{hI1is|E7Si%UMI~!Qm3dNy8tY|^
zS-HCNvn%3m4h>T}Dk`GKFO4(Zer-9GsWV7VrVN_G&V*&_`RDX3Gc&X5fxNu@-6vn>
z;sKFW)If$^t<+{~>9~1C)zme_WRnLGpf-rL#T`Z8@J`mIJAaAhO?HkUoz@F!+R&jp
z4oU4z>X)}b6;~#!e_=<y(XV43U`UH?*L|1_vmDdz>gqbX|M8AKjE#Zxl~nxqamEn)
zuE$0IyW6OFJ!IYI2aMaw!{jFJ@(V=N{)y}5yAKB4G8nG`c|y;_^`bYc+fY2`79PN0
zUm!G=dvbn4Y*WkZUE#26QjhV<5Zh5rLxT>MaOo($=OY2)P=#AJTM-~%4ic%cgyXvq
zLAx8akNBXK^Nw&a{8z-}h5NF6SJfCStIl9jdw{Dfn#u@0KphS-u2B~FiocproGa+p
z^Nd!x-&PWG3~bNLY<Dhjy1iA?bJlw>Mfx>n9rKirl@aAvF9M>-^}f2QxEp%-QRWdl
zBw#P@9L}71qgEqU2Ang>KZln3vt~I`#lxS+Dkd&qzx@hZ0!Rv`xGSrxdAYeePCNs@
z%-9HWoI$&(c>l8ETIG0Gbe%R6v0U$e6=S10SAx}lEXplE8G4|Roff8UKWo$hk=3V?
z8BR4Igv02*qtMCD{X;i%LUggi{ehY9SNn|FN~(9R2^Nj7Ssych=dGa-hoQ4eb_%=b
z-hkbDF&?TkVGIwJ82<a>t#KuWK#?1;w5jx(8<chKxc$^=p4*T}R(;aFeNuN!I?*~<
z^EU53-xdKn#{U4b7%F>lua!1Q2sBa4y*N?m`7gC~r?$51{b!mVET`I}0TA!1oLgR6
z%8G@a4@akmg<JXBU_M6ZqZye{C|9Thww2%@n71iDW6Mhj@J$Ezp-MXG49SaL^xCme
z5c}bI@A`>(cnh8+gaEPK7h4(V{{F<HVTPM*L(YE+_A10L4qVrH9xU|fH5)O13UPIO
z=bK)=#_7y)@E-x-?W;A>Eet#t=wCK2A5#=nRVcfO41*tVg5lWNHd0$geh~1md=5&!
z01;;_@D(_q>~->@iK*)RMZj1?kql{v6f_G`&PIoM;kkw#KQb<pdw*@g70%Ooc3MyV
z#ndVB2|-W?rvBTvy{IsfEK~>bzEI`DpQHlbfOi$Vvv`MwJ)kK8#>wN5&hEKjUuip{
z+~gre_W_pQt;DTqj*A1)%?*b;BNJ1HD5fM8!K}jvBvl68%T<P)oxyq!u!v!uudsZ&
z3I*1p29~2^G{ZB;j=ikmmG+9?W76~J7V`4-6E@R3Q=MM9NMfKTmmt#~-HP^utu(B=
zTSgvBn(BqOd?FGSiXg~o=8KQn(|V!TS&HhubndgAol?A;sBcrLA&TRuNe8PblAsCx
z<u}t6PQg3gUw@ev8oqc%+<*v97Ehamz2i{V4px={OEHb@T%ikpc$)zPppU(mrwYA=
zsJmf<HC;03FrMKn9jzTxM?71kh7D6a9MAmom@LGtzYee1ECrM0T3aOwUlF=E#u5C}
z(%qMvn`D$1Ac5-w0HIxE%7H{!I;<TeLT+|D6MuWI;dlR4_Kz2{ffZclKi(F5HEhfV
ziMdwE3!IhXu*+yM;_j8giI+;P;8e8xmJAImY2Xu`l^qM9zIoUh6)+wxBR!XHKKtOt
zgeeB;$%s|vrOrjp+&AohN>})0$L-at1yh|6-N&s*(bKpbm&?tob6|ATq+H#EFC0)5
z_~^;M<LRq<qCwSbb^S3x5R^mHuZ_#qVe@gQ$)RLyBh0YwgaC^Kb>=k8xd(!GU?vGz
zqquKVBF}9#@9?}Afh{#{x)=juzIt}BWXy$H=8(g$R^{aQ%LM(V7lY-GI10Ul;@mqH
zb!-Li@Q{R@P+o%<oo5zS;KnPfYcpSdGNdp14ln@i`y;z%iqdvs@~JN*jUJJ*K0^1B
zCJ4y+iBridDbYBD@c$beYH##ka2B@P;wc<)gtGN)mb|Ciw~0KrPVlxfXk3UNC?L_-
z6Wt3+!iLpjJSiKJ5+DH%jsd^1H2?i^7rzxP!2_Zn-WI_eQ2Xz5<i(IS{jwJF1!xE)
z5=_ass3kdCj|@6~ZT<KBVWs87x5jQ$aK3QbGQ}`zEw8P0dEI=o8x$Bg*Vg~6>bI90
z+Aw&ViLl)(Ix-NReZ(&^PnfnW4Z;-P@oK4?CweF|qfUL`H!t%yaHv1N|F+~RlGpBQ
zBE;yek@CXA5QgF5VO!>qohv<Mj#3FcJKz7baGez=;&Ny`d-itbo(TLB6*Z{_2Xs!3
zcwLuBjZ%3{S$_G+Dh&!+D@@3vGcXe7ch0sjaWYt_87`S;zDiMaHp&v$D3#WWr@y&*
zi40h92<=Vb61oTZ4-1Ky7#qtuX2yij0s{H(-rJVn`IVCjAD4HR`pp-M8csSIYN7g(
zKxP3^wM1wppuV8sHYAc0EDg<Hfi1|dQ67$TNXgUrl$-l(>GEqM8Dx&T-j*UUjpuSU
z>XhR&$|mv@QN_bk(9E&heVSiq`=jJ3Pdi_T#PrsycR3NSMjo-Q5zqFZ$<uUl;IS(9
zp;mr!0HVxFX-$Oei|Oy0&n>s2;D0#YR;QjzPiw&*?{>7GnzZ#A>MnC^Rp}~o{P}5&
z0kdK270Mw8)`C0@PygiI12m*IpjuGJz}`KRsw&LmoE@YzPrunZvq^n(w(Xh4wo+JP
z{Uu?3Jl8GQk?<C0pWOW}{4`ut-x-oCWGi{)R<S(`aNI#INZT4X4e%OwYWtU-f8hz&
zE<_l7SWg~4;B~3vTS@$xTUhUMw7Ybx>W->sh^MN5knig*y=pAzEVWhTPzHM458k+=
zCvglrHhE6%&!Y9At^|Ca=*H<!nBRdagb#H8`g!Uya+vaxQ^Cn<4*W$^baZO-QPXs9
z?0)c?3RB5lTzj5MI+izKcn>HK3ic2r;(#i@>vVwK?w{z>uBK=lE`jtcE8t!aM_+JH
z)m1!x%(m4h@!n$8U%#wk^?UJQf#uTTL8VSzRk*p+T3~&JZ~mY$+x#n8cq^ZjHC<xF
z66$ock4gBmzMqSW>nCcp!TGp<@~by3t<R&|XFl~6Mk)p`4G$?h=2AG&C4)rtfzo!E
z8G#X<SR<vl1=J#%z=f^`l!33Mm2&A-2ZcDa)0+Qmirh~%H0TJIS5OE)#JzDi3!?#h
z<i|o8{vH%aUw+e8loe5py_9@kd;HB!#Sy*-tYCE0Ty}yk_$WCCZ{3dj$lW7#Yg`w(
zo1$Ydj-z&Rkka4TVA8P)6PqNstOeF`lb4Su{J~})ib;6N;W}Ny^Ht7z?^WdiQL^Qo
z9WAT0B^(X{#3Eh_Q;`TMD@ea$ntM|xJI3*0mV{>uQ_UhV?)phJEL{3qL}_0#$wGE_
zr4x8c!#P~nqz(zQHrH>?Rj`S^jHGBY=g5kq?KfugCfnqZ?SOu<-fNc&KlB06`a!*A
zj~}Pcat;|}_8OxT5@J1L-(Khe1qP%HWBewx6-hqD3D5<22r!Q0;Tq3hQIpe<o*>JV
zDPUVth{pX@ELbJ<yMODa&7ZGgfv`xwHD5=r#9|*>aboQ2f7E~ppAuGte+)fb2bvYs
z(8j*bXgJ#Bu|`8g(t7`o01b80f%w#$Z6Q;anJP#~A^6hH-_GC;)BWa7i5Y^0KJrH&
ziO{dB_=!<=I#GV@XbfuAd*7_oX&SbrZ<+A?C?{99jF8$h=zwrxZlZ>VM{D7FO&Fb%
zDIp9Go(Qmw@_(LKk8D<ihDMjDwjGPVJ3uI>9gm8fMi&+Bdpe-{nKXl?$9H~^=_9R3
zw%F3Wn6I;@wbs_ltb5iH)qnNsJ&v1jV|=1#;l5C<6Ob$|#cwP#n&xFe99l)_cwbvI
z==2q<h*3$d5;dh^c$DHomy_#}kn*&i62VACsn;JSC*i0?Fg(`c(p(DgXkZg^N%19q
z_y%&zZaF)*$Edw)&y}S`>s!reoxgtR{+Ui>gJ@54C{O+sT%mKUbZ#kU+Oj&{yN$3@
zu>WTC70J@ph-F7l8y5r~Efo8n$}U8g=P2)9Dm5RS;-kI_^(4aj{W)o?yZ;T_&gb3F
zXqk$@y&I+K`8k>G#vp)x5xdL+L_%753F)p=_3k%<pPRwhPK1O?Wh>Np_WPxzBy0<C
zZ{eBG#)=O1@LUN~Zagy@gZGR&EU0v>cZG5$GKtrIAvxoF-?yQA?PXn#bwT^(Zad4l
zh5LKFY=_eJdv6Vpe9UZZS<Ba`(qJ2bRWe{HIbFFqF6ivNzn-#zrJ>ESYQ?cS+-Yww
z>1pc*3m@w`qg#qLGbJVx+yvp7fU}(*e%bO=CJVEKjpcgStWTGPiF+7s-Gaw~98(xB
zAOU;iS)$3gN%6cP@wptWWyZLg>=axsmika+fV}j0xB9#XO|?4i3shVJPb4yZiw2kC
zkv^8KaKlbH=UmguV5V1XMcG)0w}Jg>fXfKB5)g`x;%5VBWb6N&ELz9iH_WU<&cIlr
zG0iJHIVkXX^B^E&^nFZat3oLp2XI=H+&$q>s5u{hU|$PcSd-B#9EgO%R|ru6840%7
zMx9S8>E_idmAk?r1V6ar&Gk0~u(s-6`y8{#b;uCGLk1gJQ$kclIZnMC|5>0waR3&R
zK{=H0%;r+Uf7fy=%2{`JR>KxXlaJe<sdJ0+Q7lr0%Ey_j-<kJ*a$57JQ*ks!8s%a%
zwkGlW-6hx=(ov`9=dll1uP-XvJUcxu`#C)Bx(egP_@iD*b`*%cKZyWIMVY4}wb{k~
zoG=w_z<bPeZ{6<Q_ZM4NzC}gPPaiQM+M7<8&L2+Qn?{cZ_RiDRd^LmrnDy`xSK>H!
zY^Y~yqLWmMZLU(k?`c=S1opXwuBa-*z?G72sEn5MkU2{p;9s8hNV-})r$JUc=O;T(
z(V;gW=!!51rp@xTUuWquo&KC(OJsNrpVCqo0<`~AxT>0FR{~?n<(ehK)7czE|9AWP
zpBJ1H7>38%&CmPn^~?`4Fa>**upOD8YNCCY|8ru?b^Av8^*aTzx29?R0{Haj^@NjR
z0cGtRy5-1dQHh6x`LCs|FY7Ym1}Rj3{ey;FdTiU~OwuY^ZGuWAkK;UUztsr^G9${&
z$lXPb+CL0D_^8uyZFdT6vKLvENtAbE#bcV4is@Az;{k(Dv$nG3G|=+*D_-8H5iEf2
zK;IKS2kw(H=`s9pdJTxF`gGd&_fB5^HlzZ8*_4d{Ab07gv4Jpztp=p_FG)Sdj0Fld
zsR!yiS*VQz{!%t%$oSeP_pv&D2l!^KpRYG;AA+=B$g3o`qv)$CHTju;SKH2%Ybr_T
z9(;eK>9|{pyYaWN-BBf2HH^yD*BrQ4lzzX5So>ZrcrJWjLh%Ii{Qwlh5EMpU(aVqI
z_3=#`D~HzHQ6cl!TQnA@`xT^TWcLdwk|vlNh^o)x0XvEdR2f}`;goW{R$Pj^_|#qI
zu_9?OD=YGme$s!;+ZMo{xrin%Kpoz*qjvi1|NX{f*V9?8@MnX>X*OW;2k;SOXlQ>C
zs#lA?jm<q-0+yD60W1mRlBU&F`gU}dDJ=xm6zws^0v(I_%DERXXuB9^5Ly?x5+J6l
zuKn*1;Ajo$`8fSl#(Gt@g|BJHx213)A@SUP4lTjK)o_09I|>6i*#qQ!oPzVp2GXXB
zmBznwxIj(m#jb8v5t#868W$ep!7vqs!13MJ01M?S2uD&>AZGDg4q-w1;(NrerRDS`
z^np;ywA|uDElGR(J($nK;5kpHBY0OZ6VA(u|1^5Z<*p1_5|<bT(ljsGrE32DfFzHC
zXXE1Iu^+9jtmt=?DW7t1eG!3HGdUt~5S8vW4&`ekIseAr<3NgsGD1yIs}~%Ni1yc^
zl|A5ChKzYj4gbDX5e?9FF0QWF5NTI#u2zSmJ6pRqq!FM`3bH`?G9>68^PkP~Z~l4k
z4Ag77Y`UY#$w~L){azbuYi2M_u=h=0xlGAInu}+CmN64@a^=UTCDL<>!7LX0?M{%7
zInU`4n9Dhv0hy6PRO<ZCqX6TN9<ze0(hXZpV~SQBv_6+$xcYSW5*Xh^eun>=!5NaV
zaL|TZsBBJ$vloZ2V!M0%BYM3~3|V6D{dAovN8&WumK*QeZ0&X|OmWidhb?^q&aC6X
z_kbNEQ4OA6dO?Z^{+V{SVQ{F}>kTLqZvTj~qRaD?u_o=^95|gi#SM6Jg{{~k^xML@
zYD*Mdu)F7C-Qs{<_s`kSbO;1U##wyhPJxL)6`#lDvl2N|mc2-N13s5}Wf&e%B~u#*
z>C{uC{1?|&00(Hs>DS3~HUfI5w%k{(=+0hSow?6&M+B2(-Tl~{bk~SUQ{D=JML9lf
zEGy?Fa5?)9O@127Cvh;hcG~S0ylIh4f?}elYYbcc)S&eBYNc`wk1GMlA{j+TSZwdG
zh;_JJH0uL=(EwRwmS!_~O$>iXv00wO-wW!wtR~h7qrPpwPpPBAEbm1HY(7WmC6YD`
z9J0N@r~9A9`F5DdO(_JQgGHsZ9R3-{PZXQ5v&>QM%Xf_5fYuQ>laW1uC4+dLP&R>w
z^(?SxY_uimE>>edT;yN-iO6<1V<N>@-p?^VGyJJ4v8j~7^i9gO<qzvsi~9#Pw{DPW
z&YRsWz|RHyNcJL3?F=1JFGY}CMx{`?LtcLAs3wij-wG?Q21DBL+S{+7E+_T)*>uRl
z2U^FxrQ}Yw9v~H&@Tb6Tw<?vGfz~HY!3hw%ia}~ou^`uxyK}_)TuOzQ>=dkkwE(GA
z@Y>Xwxd*r-z`aP?D0ZyV%ahrR>9DF&UpEr&*@9LS&$F(k>AU;R#q;x{H)BRaN^w;<
z=q$|KH`{#ZdS8F&?oQ%=0wqYTKf}<HUg}66;4azW20#pl&Tzn{^}Huo8B_tEOFYkj
z$aQCU_b6BAw$u@$`drtJv)@`_{!srMHxRcUat3gyv-F&teUXFe1<NWdk<Ynf>D!kd
za6l{n1gu3j<_3-tqcQG3J64D<N#bm{Ef?=np8*&G76Z4ZrHX+N*sV6P+_k~;B%ya$
zsb8O~!7MbPIXn)p?{8W$Gg~J)3U*tAFE78~3nf)EMlIqb7Nv>kRI%67{jdIZhNya$
zHuqcQ2@D@-6EOm3RQBSI@(~a$HSa(en<9O1xn6yNr3t6F$l)K}YNrrSoqjGii4Swq
zF>l|dsw((DD5^OYL%Ot-MpXnwUetaLdH(xGa@I9|2Hx%D6p0pFD872Pp}>J!HxVE&
zw{;+K%E&egyAbw(^2Q&-n3F#P+0RLror1YN8HeuAd`Rv6^hH%Vi@Kt$b=<ac)he;>
zwIO`)iU!C0!c89+;`3h~k|AZ$rs|{HnkolyZ&$~LjE_jU0vH(V*ob=ogjt`AX2$N{
zKHQQFsTJz^lw9!{UT3f2&<!(RZuya#cGQEcuW<%l(!hsv2Adfht9B4S+Ud+uQcUZG
zDg=rv9LYx?)1Z9jqs2Oszk5TTM#?W0kB-~gTu(P+;ze&60fwy7SwH8INUI8b4P5r2
zsg<R)dr(g?+zzqi{6RkzV^KI;ZQ<#<+AS!Vp4C%yo?_7#F{cuxCE%Z72yhn`gnCTr
zD`K!Wvcop&au^~hDT#=zXe7uOt&|F<C>;${ygVHQj6iw?V@G)U-e<051z$zAW&$Fj
zpIHwZRMlgirtOQt{y31`&hgVG+1c8OO;BHM`sUvDLA&I*I#eZL!6YHa753>_g+*I}
zu6FXX68G`S?{-+IW%!iAT86splmQ-ea$a}J=;!z67M(0PYc1>Uub(@JdFXK^O8%6p
zsDIVN24c#mheYQ){O1u`v(N!l3now%9e4X6m4szhCeyqS2|XP742yiOL`3-t4!rE^
z<?j)$yrNpp`s`wD;-G8gg9Dj)%H}31Z@(YS+u;r<KQjI~+^k+XoLeLHNoR16*L6q`
zbdFN*&#C92CAToTd3cau*AdXVh>6FIvLG<bLlXAO#o2sMxVWIea;>2TxJH|5(08e^
zgiIEyp{!{5+NI7AV~|EcLmd|YHlCTDr1ugp*hu|d6z;^mpxD~wxPhgio_*H>9-S83
zZO7?0Dc9@qa?g1OCB}?ZX^C|h`MrV{?(yI|o%Cq-wie7*e>ynEI93rUBlHwWrb)o6
zDLOP!j&G#8LO^6*zp@sEI%DRZr*fzCDUU?yvmkt0lkTj&@MaZk4y<qNSKgV42nshq
zde<Qt)RS;1q^iXB8kZpkI=~D`H_z0|gBKq(f5`EK?3_BYd&0d&r8zqWGFkVVKE|FT
zB*=Qs5(Y>fZ#JBdo0jIEy4dq%E%pmw7#xko4LZO!&@>h$AFe!B!3bm#aB69@oE-1M
zqBTtRUy_Gpq(DUiWYN-6jiZGj1$EOE<L#)D`z@dN?%XuZ1*2BJ3VBTM!T_eI`^MpK
zCWdIz#G{vG1>WA?O-;h;>r2)9bsNMwvyEc2vAK;=&G+f}T}`c>6rqN>&!m=!cm5$C
zlnpFe6P*-2&mD;;#QJ+Po7pdMvVqe9o&l>~PRC^qo8XBRG3y-@HzmFD*QZnGe?rb(
zroW9zqD;sA8bBM41kWfxKJn?fw(^jR2{fEvUM=(3he|-!Zkb(zRDs`jP~O<nq;Y0Z
zic8GmK7pn|n`$gl5Zw!h-#b#W4)ss{X@;_{K~SYd1XZ8hZlY!=3$H|+k=AG9+`+i~
zP^evo2u{Xm$B+@5WHmllPZt$csGv_V>DG)jLNpcg5;k0FfM;~zJ-CjFZS|w@8isJw
z?ZiKmJ18yI)@fUBF(;=YUnP=h!gZ@8F-hPVXwGb8UbPz9`sl`z$QG%poyar5LHu4y
znBc7oPZ7TR0CHHq@FyK5u{#CW<CTRSvw~FxfcHODVY}<;O@Af=BK0beXW9VOA=tCx
ze~)|S4V0?^-3H*=n0`&+36bX<?Cd!&3}v!P8yZM+N@@*9rp!mCI*qzw>t0iGRG&J|
z_mI>2j21~o_>I|Va$;->!w~{tLazfh2FeoLMw6F`+}9b{82jO@Nq8HrNXpSw|3Xef
zD=)6&c}rC3-6UWfXDgC{R${sx@+s?@OFC>^^nLxJ{bkQHHsLgd;*BlUBHJL5(<~bb
z=EOQDVZQpGGn<PDj?5o!bq|wfiim23Ae6ZQ{{?Yl$X;b@l1a~E(q`ksdu64kE5^5i
z`Hc1Grz9_HSf>g$3G2HR@E+h_`*#IKSq|?InS}iQTkH_NTW~HMUIBNTA6PU%6AGu`
z-TnwVG;qSq)A0P<Lb(c_hXv<I4ZiWMj)HH^E^(iYGEcL>ffzvZ%sBgD9FqYjQ9;!a
ztFs1n&~B5_{pp^hh$r?yJZ?|bpa%|K(yx!SOUya%cqX2sWK+sacvtj^&Mo&m<aK9T
z>b_(f_v~n3PxE1Nvwo}Z+dfw#hqwR4s=*~JQy;)r{5s`Sqv2c1oz9ztD<<n70U^PE
z-Zqk6W)S?!aTX}2o2R_0?J%PX)S3~KcBbxsK}Xn+3hn`V%<IhS&-|$N$}qj8LYeD$
zwW$L<mIGUwig60&6*gCJ3w~TnAILmuEu`gvKqHikOLb~0*UN;Y7Vystuuy9!^3njL
zAP?zF#N8?d87g;BVDue}K`M1zml0xkm(Wj19N}JQgWfi<qt5F0*Fm!FIZZiO{}5&C
z@w~AeRqoo^N%f!+Q3|Jh_5{Z|s&LSrIelT39Z$@!Kk(<8hxh%NWLs>_8A<onfpDcd
zf9{?Dr-GXgJb2c3GQqX1zEc*wfTdD@`pM>m8qM>3!%nk$hWRI{3)9!P;!Cl)2uo#^
z#+R7Vjqfp14@#Jt$W@wx#=qkV$m*(4oZT1uohuQ3X!PdlH}*l80%sX8O2-ct+^CsO
z#A*0U6AIor@?V!ayg)NctQ0swU_jF8J<%JbAfD|R2Ykq&#}v-WBp>#SKzK)+5!f9a
z`9c!?*7qoh(S4hy9i0q+De!yMnB;$#`)#O+F^cdZ^-(wljQ33HY2o`ye^~;B_Vg`j
z!1S4=;ZKNuynD?vmoI2FU<-nU7rD>~z3|jnHZ>EA3eibmA;ms$Z#mI5cyP<+TF49W
z6&?Jz21@?v)u^PN({U!_(aZ8b=<1tWVNW$H>-OJT@Sj|1N9WL*-swT;`ATu*%q8!j
zIbB|g=&}zI3BPs84z8|W<JU}Ppyok`KW){GE^*U!+R`{YLnG72QuNGEJ~G?R_1`@u
zXhM#Ku4CYWqF@BAn9{$xUwW3gk4eR8_43ol_L!D~et5We6rhNYF}~<f6GnS+Nxe?O
zWTsdMTGGHEX%vw8<USPh9`S$I(`#=sXy}7l@!%L+C4kYedb;IR_iG~2O32<Z0ZMTJ
zJWX9cc4T_axoqCa5Cw_Ye`eAiON>tvbr#4}W6<P3`a>RxZ4h~};XRPJp&K+~&;gy4
z;VotA`6+T@;S`VqgwQ!7POzH{V3tlK*~Q2GGW)!czi-2OwG7Je#?=Mi^ZvoTryKeJ
zh5^BQ?7h6ruOOyD%|?XoewR?^uwCVB(~tj$4V26xJOe&&QsCeL*sm_H&x?+s0G2NE
zyOpU^$pXT{n9h_k6;ix)wk#Cl+b!v3|C@0Z?~OY0s{cbT-Z3O;XM|-u4K}m1RMvFW
z(IM5yEMK*-^THVX@gw=^$;b$vB>!t7l@~A4CQ)@?{H=k(e9Ff5yyM!nYlw21x+@It
zaw8)f2P=TY2bcjM@xeb84-Mrpo<4`EvGF2UAT_I$LAG8lM<QH6$WcVcvgf<9V8#&a
zz`(!&`R&`R-z|iO2F4gbsRwK0q8%EQmhc^lME+%z&&ADM9|3)pDuM<pt=AZ4lE-%`
z=d#BVGpmYus1uPyo&9!7f0Z$;UYtQm>tlcNtgsjgK@i^PZ{6xeJvn?biYiazmTT$=
z+xGj$v>mMpf}rN224|?2x5=r4LsF7#`WBKr>$NM{`zM*9dY8DVVqssLAzb$c^q5s2
zx-!`CH=;>Q40AJO5@YL}GC+ZW5(*f971U3>o-*}Uh&?mw;BbN?6O^F<SM*j3gzg9$
z<BEN3_TmGpf3e1)W_^UZK=QPi`<tsN$DeJ<-qYmEXJ_;5*qo`v%0(14(X#+IR9kF8
zgpsOqXf8Vc`b7?;1K4$;!dQd*t%xoQdYn^R$<Bn?Hu?V_kw@NRA1gmVV%q<$XG&o{
zl!B^i9&Ien*%)ia)uLe@|7o7vr*8~ndT!HI)IQy&xi$wBmB1E-^25zJ{C0e~S{jZj
zpo)oZtGJWl3@Aen!$V@;f?ngf8@}0C+R)^1xXw=8BQ6>uTj!7o6^_|p#!qA@E`bq+
zA{dU*gpDEH=p45Qg({JPbJL?10?6<82rwU=S$t%x;t}?X7VLo8O>NN(OiZ(ae~-es
zqNOMIj5=PLRhoDt8$SmCV`Nc25a?Q%wUI)6o{|O--eKkgh$34*p~h-SvWPR;NR|}?
z0Vw;l^0M-A;Y@%c9w;Z`hSk)G>YWA44bGy+uWw=0S2Oe-te{+}-fhd$K}qL2eZ6H<
zoZ<6L{Gw=slHMyn2q3)6kwd*}esV?JH+yR*12|>3caeo@7-Ce_0c2DIHEoI;ETM<4
zwVeC{gfhSwfTz{%cQj{S2Wg&5Ll4EBf}*8p5q?;^p|r$_c$Zre5{15=Zgz47mte%u
z>C|nDu(~$|vz)@LvEIQ8(T#fhYKV%Z_{}L`G^nIn8`tcR-W`p$&Pods7eK8GNTmQL
zR6RJYvBWq~LeDuQ0I25Y<moW)1)-DAu?BwsD+z^Wp(mZ|uXi=5{5(EQnevmZtr$a=
zm(qTS8}+StRJt8|oCza@;@oKgA;M0ewVr%0Z;)3#mu|}UYdCj2@I27T<;4`PZOd60
zOF?Or5TKgy7o0qPWPS3i9#}bL>cf==T8q<)83}0C{Ql2hEKu+j={S^vSfwDbh~f%y
zPjSI5Q?5iH;{hGJNh_j5p&rgtf1w6}a+ZQ*t062b{PUPD?>v)>%wF>Z1-VH>F;0C=
zJ|`{g=m`aW=zz9VgxI{&k{67YDiN}_w*J{cpM3_kkr&0kX9y{D57cu;t<RCa7-67d
zey{OuQa;TW5#|xmuos>xni~`iHXGA|_%m6~$Kfolh(2|`7V5?r@x6k`7^-ai-Vey|
zYBrW_IhD6CMzO+eWhfuI<c9|az!dDnaPaw_6|eiYf5Kb<!AhFK)4u)|->e$Pe5Nfl
zeH-BDxe{S@%J6wTkVJ^AKn-qfSy#B7pSdH5)=1cR!_&c=0genbP)^thg4#Bq|NF$l
z^D!P6Qemp6z}DY2pEuFk+8W}qn}*_-QjVA+AW0sFs4lq2+E_1e(oZQ@zgi2Gv|jV$
zaed&0j4RH8BH<ZOmeWXZBUd-kSaDN10JWRz3|1p3x1fvpAhmHJObsip#w2me%h$EE
zsWo93@bCwZ5tnp(;0#xNe)l1$i!`WRTZakupAhap0g~$l=&Mk<!lnbyQ2Ibh3T$z}
zEokLEdMA<=3(p#6X_=hvg>l=T?g&0C9O|m%S_C!6ryfV+^EMg9HPC@!4&$Av`7Rf}
z7rg?!CtU)=g#!TgN^~8k#R*%`#}IT)CPhvy@OlVkCsF7=o`sHXUgoT9t-+{GGN=D3
znJl)3I-)ftn1qy7agukJ*k1NOc&us7^GH2Kz*x{-b{@&^X<44gYYn(d`K&ny)Fhx&
z0lm5o9|1*z#vz6YD+m($TQ!z;8E*Orm64$dNf39i5d^{9HIz5o2H^a@gYx|^d;Y}*
z9|+;I!v*Z?#}gYKDmqC6ueC!I;X?l*-h`daRJ!c1T!YZi_Ba56f9R!nLS$lgJbV*(
zpt5iD@cg{z{7_}Z<~{V+v>mg7zLzmVCioQOEXpYaHE=hHZh|bhEHkp$Yqeax3jCBI
zEv1Z=aB$nh!R>i|dT_oNa%QX*44KZ`%3SUps!5M?2jiO8sUpO!iT>^;7r%K)hk+^y
z!3aThs*Y^p;c|=r@4puLgUSkLvAUQ&o=B6n`SZWZ#NXQ@U7T5OX!h=J?BU-RniqJp
zbz8C#(DYIUJdA%w?<^Mncaa)+x9&{^;?v4wg+4$?Jy@4gKcZi3mylmj?Ft2X0=yrQ
zgglloOm@6tcrhvG-{*;^hJW-X`!Qe$Uad8U9iF6<dT?EydZ^2o{QFp{Ouo41&#lwp
z=M)*L=N5X<<|ZA$f(DTtu_0#{^C2@{SI#K(khmNAp8NvmGOV;$Vh{1hAj^VB;xAZ2
z)x4R`WL>q~u&*d6?Qq^9ey3nmMnyX~fP_rR#HhnqFq{Hx>vmibQ^N~8;E3!n37q+-
zrkhogZ&u9UZ9l*tG`qqf1I}YfjRpq(lid)~Tg1w$=R{ZUv53yRX1J7dkeaT=GyZ>U
zy#-X2Yu5#eiiHXYn@*7~=?+0aQo2*Rk#0~>kr1S%ySoJh1Vp5z8x%wuq^0lL9{m1$
z$GD6$+(+5*zR!MQt-0o!bE7E{f*>pzGfD~Q8>xE*WqYpjx%)uEbf_Iu`xm<WHx8eE
zNz-Tpg+RlXO)8?>l$}V2^-|NU9dJ=S?p@2euzxKLU0GeNqTkzF<r3|O+XqY=kkyr+
zalmYWassT(uiP5Mq(Hf00~QW&Yc%w_VeLZ=x9!p<aphNT<e6C?CVU^Fo-C(*SiIGn
z&_YUK<*56?ExgT8PVSEOSu=(bel~5{1<2j};mhr4*jLh(G#cCC?1qn7dG}^_7r-4P
zn}KLBU^^FxKd)V25$0!bZ;j*3UTu3Nf{b~6w}O#<eHnmhAngG4MnT<lTMgi=`e~R(
z7Mc;1+PCFQv!THV+}(Rjjf_}2|Bq3iD-_153!wGy#aSI<(x;=O(Jerk@lz9IJ=+@i
z{pRC~!cQ6`(b)*@I%0Id&nn0k>t<#gQsZ1kM@Ob$GuZRjTwg=RM@a&$DZ)UcDT7cb
z=o-&gRbUJ`Z{Nmaa3$#zZ^baQ&TcTuZtxdA|5f}|3}OW+2$bc18y)%9xSb;^T>d-}
zkOe|?=u_2V9P91T`v3@1np@D#-Y5OXK-hw$P<x+y_MGW5)C1pff{+!dwAl>XTL~Z!
zjjl!?;Ow;Hzm;&$DTCgizxpMHu2#3_F0vZ~-J$<FSAy=J0@c^BtFk&f+f?_7x<Mhc
zu%|Qtu-aO%PpvsxCQd_YAAR8H{uddJ{(&#oq_3dOE}ZIh$IITW^8Mmi7@P)~R{`_=
z?-+p09gd-nMDP8%1>hPihpSCA06_rKfx@m+(R?@!Ke}$}9X@GjXvoOW-wnyj%j5O>
z^V>t*h(kd2P<}C&E~X8Ga~V*ys3aZ<5eTd<q8S@NON!=evwqx_oA*zSg+l_)a&Lln
z0Hw=t7FoL&7aY%2H3F&@!FvoR4@^?P-~0Q4>M^sr{D{~D+CBcFyhq{T;a#y02Df9F
zv?10j<Xjt^88l`Cj2*zua~4FTn}LJ~CU`_nEMCxUP>_9Sa0W97MVWOCS&bTYwH9f~
z{(nLvNqI8P^ktq<2`nTRExQ59iGYUF+}&>siw;_$%C0#Y>uZX1A6ZyF*EINj`-&QX
zL8hkD2A)I5M~&vbVMN6i47o+2M4}K(1AJQZ372!E%X4z+!!`grxd>9<YN_^U2zjaB
zNLa?{MmXD(7bJQA4c+Q;?WeDayn5@8tRq}?rrqD)@2oKSn*yC$6A*<dvVJx+{7W0(
z76|i`QQ|?eSh|B`Ua8n4ssHbE!)%^CV<G<XZMZfsE$zkp2c5dA+S=NztgOEzGb$=7
zV4iW4t_=(fOik%P9llVwK~G;v$?y6t6nAsj1iCSONy9H2aaOrN$qG+f3(AFg78)}f
z)jR@xo<Fv|)N2f1CsQu}aX=;cN>@j+WPD0w?o>Y>tPZRzI!0K^x5%F0!?_MSLs_1^
zk~GcNzN`OOvhNCz9Ai>b2u#2R8hmf<d!h9JjBIXydY=y(4p8(o8RdKw0fkq{&ni|e
z8je;9*4F92vTlCd2{l{_wC3jG;$mm_=joA>7d~~;)YP<{8lRe)0%70xm+&sHmJ}AQ
zE$5_s@xS`qD`VjU1RrpA+HY#>Yhy>ZKn;reep-JdOliJkqmo1jcV}iCQ-8lY8h}<x
z=z<C>6}umimm~Ayqj=G|D%qQYJqubEHF%mB(B??pA>XrzCwBJ};q&l<Y6DEoC31iV
zhmxGJP&4alP#^j|M+UFJ?B%sCLeeL@mz^fop-tfd$hxXZ6ojy5<8#GbRHsp_i}`=W
zVkf4I#GLFQPk^KW6DV>Bzz|znS*(!6C*(k;F}A%8VOgl>?+6~}z|?UC^A9;uy?TE{
zPaBej;(Jlczy61b^ev3mLvp3vD7;I0^=Ig(NxJFCr@j@Ti}2;)D08!3t>WYu(ETqX
zW$(Mem&_0@_l|vb&jo5`)(6K|{5uYsfGv|kNUDpE-?HSwOKtz5SMSgrhy}<zV*{fB
zPiV{aW-*eTI{}7`H~Mw}MUcP9I>>j#i_fW>IR~^_gU{~|AJhK-F@RbL+ss#z%-!-U
zi#V&d7+^yM9Q%RxaoY)1<z|Cb9%s6Z-~b<xQ1?Y0m%~S3nE=DHJe|?NVs{Yr5<yb@
z6XN2Vu!Z1S<u2q}L{kx&W02Ggq`dYCsah$%O(@a&#=BO8clFtQ62asF1!vf^7zo}(
znl05s@fU!4_ChucOyls+Kk6ValS%Q4pP2@oE)syaV9&_3oGv%{jUN>oYc|cHo{EVc
z5b*l7!+hS*=;-1Yhq}8taIY-sAMlmp!4D#}v&;67<*u%xVtBK`S0tO4iJ7^#uW$a+
z<%dFpTRIC(u5t}7AUR-rq}VSns;{k#Pb3xS!45^RE^SYvxBB-h?cfd}Fbx%+j5>KQ
z1BA`(ij0B&dH4_DLAdn1#!B?Czm4L&itw%Rg(ft(X+o>J(&ZCjL43gw!;(K;GrUwW
zv5;R`xX>bF-ya$65R$^n%Y3jnZ+fWYs_LA^?Q*wk3Uo~d-wQ^9D%R6=ds{lx5T!mf
z=j)~hNJNvn2ZwfdfB#pvm=kmus)foqMhEO~OefG__Y3X~Df6=TsLmYyZpCo)p{N6~
zapA4c*aXk+y=#n%Bajuljxyc`P+V@ntzez=Jrc(*%LBzBjV1=R@bApd0_|{EPJ~cz
zkM;o<6Si(TE?QyFe5eKN&tuP%<v8ub`uW-8vi(Tp*I2oU%*pQ?PDFYqF>|fRWdT)e
zfTZRo=>-SsCf@79kZeB#s@z(T0OAntF$hzY1C2(=Q^cl<;`==!WNK8nAN0F=)TeYk
zKv=;G;sf$xU?hQ!8g8K)^h?P)yFwJEXj~DW_K)E4H(D~N$c$tHp&4YnPPojR&;yOd
zkVQckqela+$3<ZId}@E}b;T78ek9djcKk|dqf^+qJCIGZptFEh?%%T#YbKt#pb#MG
z^)Kdm5QtBF6A<`5nrCf!9f9ojE^lUcjGo;bwXAxR+r8*e3(evVXI#*(WX+|G+=n}Q
z61bto%khk$@%-%A&7G=%iHRvNokvJ0d9p@o-mR~v$K+Gh#iu<)N4~j?g=ECbcYcqt
zE}HoT=)a<ZvwB|MP3SCxuF3{@BU*;9&m0{c(`o0LUR}Hxg7&x?YbF>E3k(rVh22u-
zLbcWHZMI}8<2TT;gScuO3|3pXCRgL2fBU5ApISXv@BO2gtMgB-#1%v8MW9!!@5i&R
z$HowrL>0C`^G_89Nb-<PW&$I9gl-?|t59U9?1Zx0Y%lsNQPYYzo79=4BF==Km3y^-
zSc;3kQY$Eew)KOp^kOb(CE<MEW9o9LGQ-OVe9+a^YbM5__Z^Wz$JO>t)10X7n$uIt
z4Xb5n<t3Q{)idVozbX#MrhKlTI}Zy6Mn~I|I~%=^E49|)Vj_KiF|v$ZTX_Th#+5PB
zkIT61St^p>IK6{(`S2<&I;~X15dwkZ<ONZJzW5a%Af@vKA6*{v2G#_qy*F;5>Z=lR
z3<If4A=WpVoFKikyZiR!CTPba#J7O_6LcTkfd*_h8xCeZe|~u+fMkSTaSojN=pH$O
zj*gxm&=M?00V4W$ZBoy2qW5yr{BNV9cRqM%{|Wr!_vp+ECHc;lmU6vt?r76UVI_%s
zUs`JvQ~c<@lYR7sO3DKgwDCr$UL%<$VA5^#yZ<p+2lsPS+v&QjH$xc0C+}>|#?4at
z13K^?vo%voVQC-3Gr(rB?HXJOLdJeRk&mvor>CFuYjA=~l%3bkubzV$PrIs$CU^+y
zN6dz$ReXbi(|8x0j?dkV<rTcl&n6^vHsiV3&t>2A>td>)gndPlXe-dPL9TSjN`3wr
z4`TfnfTNo!=X^J=`9sx!PaXP9)q&MOa90rlD+ipreSHCdh+0GuQ-sbnKXS%)Xr~k3
zZ^4FkpxD^hb(i`c)8NS<SOexn$%~qg5{50KDW&u_AZ@d(>O26)PiVAeo&gcn0gO{<
zUCss|y;xz(@^HG|6<!aGg!4jD&<Gba_m+qoT3|s<jqgUD2vWCTqj$mKo^Y8&hk&D0
zgoXul)ws77_n1Pnj|v5%ILMpsBb|Y0n!?m_^HK<?2I!Tj`#b~mqzPZEdUAbR9vB%F
zo(*~_eKdVp{WaZ|Rppz(6u*IMHbqO-;iWo0`s2%6qIB9{Cn5d`8yR6j$UtSIb`e!1
zH$R_{ghaJ6^FiAURtU17iEo?Z5iKG<IoW4_-0Z4|8~h2H?-K{`s`Sty*nGn!DK~Zv
zoFF)b9*BkOi|%?ptgahV&|?pF-|sd*eE1+~R#IG?{QmvB#ev-W=zTlzzD}n>+$#C`
z`D5OBC6*o@d<e9D5nz|a#Kin0Afb>N0QpN)5jVIbQ!_J1pP53rC|UdNo*u!&pOJ{5
z!NI}pwPgtVpp7zIK}{_?clnDEeE3z7+`>Y@i&mDK;f;{~=aWl&M^LLiN?bgI>FQnx
z96NA2(3gtn?Ce~c1K-lQ;NRA_8^mz9sfh>)hc0zf?l2`MBxKl0o0u#Bp3hc)F@I9`
z--P2|6&XuCOovu=ZY6(FZ*ORB55j<*E+;!1K?dq*1;bzome-#T=9$nN4IQa8FDDL`
z%lMO?bo%tucS@9&zuOdN(s81dwg(|`%Vu@Tn^0=-Xc&s1(idma2hE94vcD-2g-mDl
zqWjFU6DUX+uzX;SQ(2iC178k2s6G8<orhm16=dCy0xl(O(ARl-IPkeSIC6z!57_TZ
z2|3nG%5_20Fb)oAyro>#W5GB60d!B`J)ZoD5=_T%PliKbs6;V53Hqv>(5!ABPZ_f*
z-W}YB3mCX*edW;kZ?FfKf}<Z)NwuR|)n2+8%Ex{uEOT^qcsZr5t?ls8y)9snwNL1{
z5ExqK9MRFy;gZH)ULVWLBW$4YJjH8U3RfK@<|8OoRaMLGEAT<?2JFug-_zCYdFA~#
zLPM*2!^6Wv{EzM30o-&#6MF642%e-!Wbw4STh_uNPp`y`NaahZiByECsp(i%Q9s$)
zZ}9tEHuq?N6?F#UYh+|(v1BQ_&zlWsnV3`;)yd1|=H~n^FZj0p`t@t5Hqki9|COPX
zl$5F}j!Bb7s*-zuSP;3UmR9;B#XoRa(DnK@ER3OC8Tjn_=SQQ<T@?BzEcnvW(jYUu
z{0AmkxMXHwp={%WN$3zmk}VIHGQ!|@HE^N(C{o+n-OSG;zQMP>>fnO%Jh6;tdwDm^
zH~m9GTfe+QODUac$%(+gfmK@^-eVzgi-%woD5?f^gyG$=XuKfg$^O_FIzz3dwmy$Y
zVxo%Q=b=T?DFG%__4)w0sI!mX(c3bz<>?*Nw`+dnk`bTTltBMuerwE{E3yOn{Beqm
z0FHwVYad?j?2Bh4J2eP3C6N5HiwA@ey7$QO#O0h<hwDMvSDF{B7O5klmPn({HMdrq
znU<DTQ={(vXhlRs+4~nlaXSQYmGWvdpPcw1(_y1PAA^wWH@jiWDyTL><bOl-udWKC
z4KZHdjy?aCESlkxm7`=q->9f40nn}snoNp|3!KCYAUqnUF)=ZL&3ok~uCkUEeV^p|
zSed_)A<!6-vSAyEA<N0k6oc}!P-daVl;f5FG^<fc(bLfdic!FrGlIb;>}D7xvDM?_
z<GS@OWeuRiGUqi0A0{sVv0P9g)*XWUrOfvd_VC$^OiX2P;g%q;QEj6Q63<jI@0XXC
zk&B%;CMqh|6OGVtE;)4ZaBFJ|{t@WGQW4Dtl5kj{{_xn_klxe&*l2bg<tqWi(tM|{
z*|e{fYW_M~$8+2N53-{e^~J6|BZ|U#$VSSFW#q>wNl1(A1_dnzoQ&Ov9_9-CZLP9X
zUyHxq+!F8wwHCtXz}SfO=!%Fa~8MWlRrNb$7?ZTS;Z2e4$PW*@Yy+O}99Vcv*L
zW2cznk|gZY5pv3r$oOm28~bHHgvgG=U1P3e1*=r62=NwH@S?l(L#6Ha=#8O*R)c@?
zTO=~RcppmTQVEKTJQhMplE`r1?w~0sD6n{0_o2eKU!x>2N9SFFqW3>8fb?^3eoCo<
z)xGTvwwFrePQRVIcL)DO1_k(gjhRYi@E~~aHRzr9*!eh4R@+2SLLlHd)UH@|D*<6s
z<-UfMdP$V-vYDyr&<k)i0m%?>>j)|b4pYs)S`EzOT_$J;f@E01tA#HL%EBR<HMQnH
znZ^B~lli7Y;!rZx>~o1Pj`h<ZvGC*B`<`E**i){GxMD#g=boISk_e=YSE4~uU2vi)
zD`Vg3l4=zwHfmwqh*X^7FD&6muI1AiH-8C|xC;*bm`R1Qbo0sks>)`M30alyz9oG7
z+Ef#4=@2_y2{sMonn)^9M&8bysGHhFSMG53FpQM<-)L`dx10HxH@d1xQvv)X*#1A0
zv|wd0^TU*5XJ=ngmo@jvhVuxt3qybTX_ef5AtYhXvDeT#A1+~^dq3_A!W%~Ue)v}!
z*xP;tkd<#a(@lygM!9@apfAqNrNs)QHRQw!{0M<6Koqbaa&*u0QFjmg!M%d=ocO%t
zTti`p^N_~1o5?!P$~U9*Ea7M+=XdvaRu&FltH+KtCju4fKD~S!2%+=#sRZ9Jhv)wl
zcuRcHps_QbG>OUob`Jj6&^k8C_ZD>cx9Puh8iB?K*x#HA?{7S)e#_s5C(x1p`3;&$
zjO=9Pg2od%uDK1_Nuw32nithc?=Y5cRhmhVzeou9jBnP_hMFLiQ}Jjno5p^s<Ds>i
zpXO}GWr~PfG#KvWC3vgu?|m@mCEt$s26x`?`Jnv*5zC}qZXW0&l_?&FC8P7M{VYpI
zyno17`oss5gLdTnh(vmWPhb&r5*posdc#LsY`j7Z9G#uRB^A!vOZn4JZFnuPs)o~-
zc{R@_o~knLCC6fnf)`J4+fZ95$JUs1?~tn4P4=E$PsBSoxz@bI6m{C@$Fe}my~v_&
zSWz=1yC5Mxoff{SV7c3%!C<1P7P|)KaK>!45y7Rn)3uH?(bCJ<1?7x2m6fGC9mZ_&
z6baGab`u$W*GorY_=rs?#tqAOh(lHA+dOc9Go^UFEjr}EG1v29%VFdEC4OvcHQ{(M
zn<~aVWoX$Ud|Pr_!&dWi`F!5D{f@u0(c!o}XMmFY_2J*a=lS}V3^E|;R%WT$U~j4}
z>5iMj5PUv^&BrRb2l3gKTVl3Hgvt2g&N7XcAzgq5BW|GB(DWi4^@m=&EYMgYNh!LF
zS}h8tt$<cW@w?1g1G;G)>C!RVxs#$n=^AX>Uw&n4ttfm5hJaZR8GiqGhTjp$soEi!
z!yLRjvAAO~anI6E4k7M51`<Wdi=Yb{OaNt(p<574M71&yyemx7CC8+VqtD>&i&1n`
z>v)VaqO9Bx@yu{GCG^Bmq)Fi`#EN?`A;qe)vmGhJB}={V<)Sk~<4E{jo)L2k2*}>q
zVvFnk{rk64N!a3{3cX30#%{kkSAu3i1K^Yqd2I1J)&!hp^)BnuSxOx4(do1il+lR^
zcSaslwKhjol{13IZ~o*xV5Wj@fEIK?vc+YJF={t2J6ZZ^3fpPAbM)xZN7EDJ7BU%A
zNA~x1W+xBZ<<(R~$p33een=GqDXp+yZF~NU$@Nf)430QuXzc3SDO3rzuhrqW71g(x
zksM%scleZpo15tEm+ERR5loJR>;BCk)e2+G&KoW3As9Zo%EbE^u{V{G@t7jqoFhN3
zEhtysKx#aEW5X6HAC&&8^+$0r^C|DA6^}W=oaC3*Bdml@)lo+-<rb}Vb#+5q5<i%3
zTy6_#pVyRN*O0D_zCCZw0VYQlXo5A9>w6x1{UDk&eszEb#Q4;<6<;~KeE)Q*U|4#P
z4#dnyM$~$uWQI_~s2w{O9Q??0BbA1{L*<A@CMNRHO+)9=Nd+`Ohh!G8wK+gNdrY-g
z_7ppde(GzBU@GMy<WND%2|`vMy-N6IIJme5uEGp)=Sl~%8zyR99b~z6F<Hq1g)!2v
zTP>>wPTV6X;kbR2#G0<f5Y!4LiX)?=R$S1u0%1i`Vqzu_-T2{7pj$Gi7$rTSMh^g;
zauTQAjt;3K%w;2Ikih6Lj(B1q<-S1U6Le9`LZL59d=lc;qHXh0oJu}u5z;9AGcSK}
z6E%Y!$>>Zwvz*5}>8j;#<l@v0Z+;`uSYxXXqUWvbTJV@_@RL??=)=3k3yJU<_wBwN
zlYRXT@@)Zqwr=yC#zvt%{-d2j%C4;On-!U1mQ*Q>8nq+o5%p>A$+WQw4p*T@bkr67
z*<L5^RJ&+|hU@hLPiW7Btl1OLTU~M8UF@*eIb`R+za5{%5;dl`R>)x%O7Lf7BI5O<
zK1~(Vt6W#~rI3sKqkz3)$2(@<%TCvE>$|0e+A<?BNG{^~A!;YX|9A22`9#&z@p2Hi
z32wu5|Cc)=A4^}v3<fGbLbSPst5jy%^#8GCwQD-9p0*ujTm##+WcHU6{j;;aCx7g3
z^H`(B$(n+jlVf(R4tmD*;a`wvn^x?nJdKmRb_KwDRf@fYSwmVbHQS%2O!|#l`v>PS
zM{;|KPu_M;e^6bmNoBh+YWe(G5$f4DJD!uY5>bZEn6~+?hvcP0w+U1X4Gaw252O%n
z0I9<mc6VPYF!AuHtEq8y=ZBuJ_kOoVBGYL>ui5>2rmz|EukCAVYulj^GhVxX9fO3=
zxJrvv4erQAiR{NcLY3gP&GUDsr(`U|rF5{ZReU1zKxtTTHH0G>q(Uw%4ljmc>|>0k
zHtc6DKYo{&K$<Xrd`y)@r}O|qE?)aN)CM7!H4S-r`RGLYq08PSL*O`E!B+w1k=IBF
z(>#`LSppuZku=`yf8B<Q==CkGsbq>h&8bFjPcN@TJ(zsj6^MUhKGN(^Zc@$|AbM|)
zAK0!Cy$C$G;RkLa-1YVK!wH{*%ojk!ZAajXYt_rvr-XNrrI8Y!k{1s_-v1lgU;Xe3
zap+vf3IzEU*bSonbai!CaLmttJky9V5y1o*9~sii&m|*3p>A&LL$kqekA`waE1@-T
z0VGBbPM2hGa&|7mG1j!TwMD-MiA;aVE$9V1bWR%=_7G^1Hm(>?;I_+FqVsS5wDU2Y
zmbg9dfTqiVB0M!O(=FM~#>Rw!HfR8Z{hjzK#tRv{uKB0^U9)?n9LMdhVg}<_fuG05
z-ZvYRYA{lrU*%+Di;$(Z0H3+#XFOx+Wo2csCUXYkWU1lR%?9Q+HmW5LkHLcgmOmpT
zx40+w@1Ufba-JS-viuniwDQg#VIFr_%aOZ=Y8;*4{G1WvhgJ_}vE`pf`c;^@s)oGd
z<r<6<^4FHP^f?H#v{>)-B<PiJ!{A)Lq=*nCm3MXJPHrm<0^<R+SF~+ZU2JmLS)j9K
ztl8+Y4xR-ZsL1dgBho|@(FSuac@fNOXc6gCG)!44n(FGYqpNoUT7^<DBGW|(klH{<
zh4L){`R2ScRm`w<Fab0(GXuWdtAh6V<<(W?%?G%_(7pq}h?Ei(NO*`r?=VxmoY7N&
zR0MPTP@n<iaU|b2mrF$$b0vfk^S0RQu*DIRkl0vS8uH6zDMe6LRaO%7IY}xaq$0p*
z5S@fCQz)LX3(0YPY0bsY!vpq=T3TA>;pw#C^ne@Ow%|Y)#KyrvDTRe)T6(*~*umO5
zL`eq`6z_c-0|NjYJUzE&!!kG=!^7_a5x+NpE(pX?;C(;aPXXZs+U@#w7%OD*%3(uK
zUc?T3Nh$Rn@!vNfNSZEEReG|P6O(d^XuXsGBpj|=Cu!Gd?Ra-$6jiS;Z)`Bf(KB>x
z9t<kmDIz$BElRD!B|oq3DeVU@y2q#+_~V4N*Vol8-7T8k1m~T2=J=@X%w2q(D~i(?
zf;T{W9TM}Ej-@u~_-8F6a4%wtu%0uY0WcXqi}B;x*y<j9Ik;{Slok}7`eIq{m}7&*
z@7wgQsJTN=%}V-xib4YdV@(ii0~|Jr{TUDOib`Cw!3W1hfMS3+{+om>G$TrbB3z+l
zo;KYt`O6oHe9`-rzg}FqZoUJz%SZ7AEyfbOA{O0)I6~(&Bw{jyw6wLEaBsC`4lWO&
z(qCnH#TTvE+ui+*LR=9m@a5og=IrFIXa(=ZTK*JMlW0xg$+doPyvHfAP!I~*Dp@uf
zv%(rTJFTOoG=kD4>8|gGPOisLT9lz)5o9!~f+$KS#$@jdY$Et{(G-Mcb{TS!0w{7q
zvSjZV3F*j(`CV&4vF<+kQ+KwwkTyd)<%bX)X7r}m6X$dUSUe*J_UM>ro{DsXBb*}k
z#LgzDxx~@0ER<?3k0h(dt;elh$}|ANt<=WN4NA{Fu&dI#n|pD+3RbYyhnv(&3t^j)
zoPecR2V>~$W=>1FBxO3?OTPuW!9gWqE5s={ax6?ECMPFl&{?1B9hx%|ypxemY&T@h
z=zl|mQ6S1L6Bn<Pi3HK(_wz(olysj*(~=d$XGLLjnN~!*DiaeE5544jee2?5EjwYy
zlkGTEA4>=pyksUtDDX&!w^35pD<e%_{97kZZ_=Z{JMlY(LCi$?!`K;R#t+p<b-{t^
z^bEomF_c1L&|3T`5ymj^^9w>e&D1rTC7}mBAfu`Z$LK19NScO_8s`gX&QP2$^i*HW
zd)aUQ+iHgjaZI|T4KW&1-rolQMOBfayB5hbg0WpxUFCJX4@kpeH=}c)*}^?Jtj&<4
zAMwx~CfJb|Gp+%mm@zSTJ9~Qx7|-F7sQxGgEtXYEHFa$(c7&iOyv)&~Es<9aH5Mq9
zj3a~fz%svRD{E_62pR3-l7}r-5meFAWwt@fPIX?fidE3g7dmgOSu9X}3B_QC&*y|k
z0we!3xGQ*oQ4L|j1fZmrKc(*$Yd>UqY31uHeD>{1;WrpG$T_fdsS4W1l(#h)N5;qV
z?7ChrcXobI$kY?!<ZRQcnDtl@9spDpX}WS3&<rUo=UU?_<nciG7R?Xy?|Bv>?daER
zz(brJPwL+crL*Ops=BKRa=X4Jkn4}H*39}$+Sa7dr@Kypkm^FZ+q}CFF}f9BIjp^~
zc5FcNn^PIPgi_=&Uv{>_T8C)>K+G|oI4ej*xw_sr|75@dMl%++{vI>gK-P`x`L!;-
z`$Snea@dlDhzJ`MO56~b_i_VF2B-%Z&Cp=o%o)t14Wg974bIvQX$d9aH`XL2CCxrH
zV~c|W;>vY&gF?%xX^K5T1@+f}T^~TR{Wbp|-<$o{<e(ZLrNkEJ#6t`-9Y(yt9U1?>
z&!UAB1uKvWhZ8IZ#VACE-(B;+7cTiNp-M;v1w~uY&hEh%oczy{cO|ZHU46-5jRxuQ
zpGO#_qFo&^(IuoAmXz_SaHKA!_{05$Oc;VzoZ_zot&Uj_y^<Ob4pV@BD-GsI*z(X(
zFWcDIaJay!8;p}44O|bHKVlSMLGcC8`?uxe!=~~pSFRj|XmJqULA?xET-4M6BCd9N
z17d6R0J@^pJrAHQFVsXX9&T)GfI{5yqC07zw!y%%Q(OWoKfe_VezMPz#a@CRIkHya
z@Cl}RS(#!3QW^;JCEQxa&E&<{p~#r|5B}FNz?)*=j5qYr(Ir}LHEOlRAPzh5wi_4_
zJ;$52tb%+nONnj|?K(O!XwcSr?9p=?Slig>7Gf4r<_ubJC4eF9^4*8Z%7ajrVS&);
z?~j7AS`?FZVRSm|4QSwhNqNI>bk!u7BK*drw66VShP#Z0{7Y+VLm(QL`|~be@K_jg
zEJI4)>UAfnn_Rg-@QQk{zl|HM1K~aO$!;wN7&U!n{}cho`*xci$GBdHIB~m0MDvjj
zVY_{iI6kh%J+r%$k7Z<nO?^!$in89Ef@IZRj{}J~KGnq)<!Zv$1cZ_NTj7*+xWUV0
zUKQ4`(*s3e==Qt(&Y=bKuQAbEJ>0WP3ExSCeTZ?YO3z?cWp93)9vpO>`+PIxnhH@u
zPqyvT#{<iijq>+Dec^Nao!VnMnAq~|o5CTz6bj1otU905BbaISlc|J>e9kM~aqReL
zV+g3PYktQN{6x>jWTd|0z#4-ySeVM}3`PoR6wdh}J^uelD^{1}zomMR6F$Wj`Osqe
zLgT82I9#>j9=`MX4l)E=RYwlm!C`-4sfH)R#hv|qc)nNGZ5=nc4k3w_zP>&Sz7FH^
z@bq-U*qRuC2DFTmI^cxVZ?2+#{jzoH{=N&e2H$-y_w?B_z(_-IRBB<(Ktf~~b@865
zTukiR66?5KNZl|F!u5@f{o~Wha`GF1xj9(BsaXM8gnn~4TdI8Sr}F*rYJ|7Eea#vz
zf)XSp4GU8{sjzM;B5=f;AO1^}qKx%cC+C=3dS=&vrvn<0d-W))86_nc{+ANk(jEn{
zs41Lk1+gexT!2DCjK21@;rj?QPDo&bL1pahBc%LHHV|<Jw62J+i>rKlv}!)VR4xeV
zZG_{{)ytQ(CPqde{^n@Ux?f#Y)z24?W3U?7W(Wo1S1%hqb38y~GDGhLp%0i8<+}oH
zGu|Z_05@@vA0C+f2%F=~OmDU7V~9s9ac((-ASGL7n#<R60RaI<T-^o_&d3gPX5EIC
z-@i*eu4=}BLazyLWXCr&co&tPM_;~t+2vM^S!iw+sV=J9c&vz+08wTaC&abkLI}Fp
z*jifX;$$s1#JCbn(%5)-a`j%c0ob~<xrz5z$+3aymcfLYZ&=0nnqkW25pySN>%#8E
ze{{msqkG!b%dkId#vC6V0al`}u1<;7zt}pr#Z&ZcdYUAxeOTr=2nr+|ggtn*I?Q`$
zAe{y#T+lK3^KH6ydzIBFyb4ei)XVW+_<{=A8E`Zz(7rtRj0~ElY+Z#?np`)N$#29v
z!5c#ZGaO@nJ?kz7S0beG0T|X6!FGdi+&C)2O#oED&1vB6BTdHVx6DVL=D4m;a6;@x
zhMUV-&Nyk!72D<f^XuK<wsa|1sz@oR2&m2%d^N3rZKOJC(LJ`SZAU;`WH5DX*=gVy
zlr4SrmGi)Sq%!jU<4{^;AeyQ9IA#IPyV$P1mRk?HySpi|Zi<=PNK;2beYpj+0761S
za@mz&^M>iG7zHl_FLr<c;F+aa#O-kV^n@8Wp~SrDjlqo1p>TM%K_3K0W#B$zo^*6{
z1n9msSApK+7!$sru7x=sTVvzQk?R*~G$>s6jJvo5K^Jg%SX|x$JQ=A^;l!hoTG`mh
zUIQz0c+@MB3B}6`BLyD8^7Z(}&_(yoR|?yOuV3fvi)ARrLoM$R*#i|pP43@p0Me+Z
z_>5Xbo(S{+7)gE)4nin|V!u11h?f8{l6!<!iR3q;?tSO69WB-ck;x<su!#cM)V*v+
z+@g^?^?yUrwIx4t;|fhVp>9QE7JNor86%_n(k_-Ub&n0LtQgwuS02JSS)suQyYzRS
z(3b3~D(5QOVX!xaQ~TL>6#lw4!~P-Za>?w`cpbuvCH5)1)DnX59S%#Hw#MX5m-^-4
z9jGqsWO>rc57GvLlChxuqvXZmg}VUTNjzoRsG$vtaxmBg+pT9jj6sGS17v)6hGJ~^
zl$DhQ1qI_Pm<iGe<L^R$DYE;0hO_QcI&HZCF~cccB*vhZhJn@B1&3uPEk`=23dw)u
zsTh}y=GZl&j|TRD)o$0I0SkP8Yt!|yr(XfO@WOAmmlU;TrD|mZe8eGor8+TB#S&-d
z10nFOkaUB2_eQU{3?;EV{)^jr-eZ_G&ZZfYx2NBMxG@~{*$`-yKV9k}_Ly9kuvVFi
zZ8|YoU{Lm>#|`ceTak@s?4rU}FA$9}>B%rYQjEx~;-@Wadhy}~Hq8?iHw~!>if~Cr
z+(DZ3rJB!#RJ{un-VZ6r)U2T9y2o?}FS5rJ=^WH`B!)1#&xqTlHPsZcjQX?HvZqM9
zhQ4VI_ETBv3}(9f2pKCxZdAiy4VJjjzbRIojN@zwm>C2CIK5(!;Xsl}j``4{e+69o
zpuY<fHC{kg`9tf@$;2Lz7%4_LNG=b{M`Xe#tB5%D@fAwRq1zg|^a#hm%AUk4T!{Sc
z+fw}ab-Kt7P`ZZIxAg7aUQpCcg}(F3pbr#sw$^fvq@{VVGk2DEQ{yY=<Vk-Spl3_y
zIZ7H7xvP9Jex=cjP1WECh<f>ji8WP_p{wh`L{-hiP$-#jG4Cl6x2@M!8{TeRRn^{5
zo^m`6c*?=dPB@W5i0?IIjl7AVEE4biwBXQDF55%8oa81@yxemmVd6TOS1Rx6Ej(se
zv)@)n^?kk1&rVlTvo1g8;!gJX!WIOjal^7P!r}LII>q4gEOd)QYKFcy_?oIMY%%fJ
zA0b(unj&tP0U*TNyP+<J*yLdRrEGkD5^NB`Z4_M&5P4|nxEdK5@g<qw*+q!Q#l>mX
z`*<h_a=g8B=UVIO@qy;CLPF2}?x1`@Y_?LKEVUzGkZ7O<nqk{2Sw`QrO?Wj7j`3Yx
zoqCLa2l1zCTBg;qh&Bb<Oy88oy>2Xa06C0J<88q8z%v=$vORcc6ATket;gZC?@Q#(
z7>CYVwPL&*s2mnw|AO}b1VZi)Ld=wOm^T0}0j3-5sf&kh0*ymAv;G%fKNRGJ^p+)4
z`?b&vMDiNdYsk@kW`Z59sF*>mEpbUoQ5ri$5nFxF*obktDg*%)!Cf~2ux$XJjPkXe
zLsXH6)rBr=;}wq`iw-tt)y=v%G|{*e3EJ)BWF>?r;|7<B2?)Rt4D#E*;Vf;CXbAxz
zu<VIURgBiK<p2f%G5)d>eGvFFv>A$27vhcu4dHnl@7v(|00Fa-I)bi}_E9#iG)4Hg
zA!h&=faHRXfpN?4lRa@&*6_($rfpC;BR_E{)$}~<!hjR}`0=!pAuJ>WcC4Yb|9T=(
zK8;jBfRcSofwWKlwz?PN^*lXgl94oQ5Zgv<s8c+n{~@^n2ca%Ah&W{yMthWadwZuT
z<Tf-U`9MZLgrN*52{~r!9T1ydsO^TZ_aXVWkr9bI_%M9X3&S~uJBA60+!2|-ewliJ
zod4v+#84B%iH~sK4gd<~97?4YN+y-)&hA-c0Fp5jibKT>Xc)xk%FMH?dtjW1dYSRd
zk7oc~0dDNUbfd-hY|%Hrzy++t+U)`OZQSV}8p@Q4R%E`H^hCeGBihIK>O*;;WK-S{
zr3l9-BFYfX`1<f6G9Q7>*VtGrA``@^f$R!k38dR~xWNm6%)&H9bW;r$18WNK<;*u&
z);QUwyrKNhzJCH{*m%x&NKniPK>7Xek9HVqO5cOMbRco%;qAXX3QWQ=(S+n-DEDph
zZM}LIo4iwe@F{(g$kXhj^QGg#B$$-~WMr|5A@t{=zGH>NP^{P5SQ)q;JJ%a)+)D6%
zI862u5J!nwegs!+kzgu#bru>g!e<_#yOq{j4_|743;y$mr(veSv!+oRi!0WN54;q`
zazJ4QiN;r4`S9Z0HlQe=(Oi6FQJ-<uR}6Wf&*hMjE}74tdr8OHPoBPyDZA6<R>qep
z9ib=C_c%jQ?LI|UxyOHUKL6brh~3%oj<a!L5Kw)JsVwqPVmM8d`1h;7kE<5&`<Ml&
z+Q`^s(cZY6FbeTh$bTvp7f-ZKT`=y#kL{`oDMA%V#VeG!WBGzMNFw8|Ke}p*`pf?3
zaw7CC8Y0nPF`G_n%0}?NtlpaMmq}tNB&vzmyF8hk9Y1#%+9F_t4-P6u;V|P6CpC?G
zy_c2}VQ*;})R6ssO8@1h7k6Tx<Z$M^gYR;6d|VR__A(9VuV7UxeF2P$^6|o>Kfu6S
z8AWt(CEUsQ2b-7LLZQZJ8qY*i@5?W1hRmK_W}Oeep+?GF?POr^{c|8TEXtHVyYEt>
z>#1(q-4A>ZW&E3eH(w`9<aN9za*4>++&s6NZmQ0?%YbDKzLmaYL41a>hXB%fd3lx1
z&oZ?%h5sA3k#Pq;Rv=n?lzlArC`>{mqZScYaYK*}m1T1dE^h9}6nFRZsHC>t1inzn
z&GJHO4*OLOhzAc9v>CEiyK8GFn0`)i+kM8<UH!JraM*Fy;O&30y&@9I0rv>77y3B_
zj?#{f6+)B%4fmT5?Z#(t4=duKy)t*q=+6zC*?v6gg&PyQ7ElCB0Qlar)a*Pwv_W$a
zH>cCCtU_=Djp*sLdJP_`BrE_zA;jg}F&pwWM1b}!kr15D!^W0g>$nh>++!LL5MZG#
z6~RGsjq}w6QD?aT7Z-JqArQ~l;#6i)<SKLV018+&vU79OXaI!MdIwS%%OhH$ttIfD
zw+WstxEPsIccz@`?$tdo7y|eLe%DOSz*&GPlS_WMb*lT13!ts`WwoTKc+=MPN6Xby
zqX%_ae{)hu=AA7?Vobu?@81xGlXp}o<pJT_0VNU4U5sjDO|>u?MqJoS;HR8%P(U7b
z)Pmnj!21Gd!`T@clP0`|H?a!CMn*=IbDk_?sFtHE#h4JfLU~P!Zl8fd2gr>JdmgPn
zO8XX9S64IKGOCN?pJ$S3wvS3u!ttiBpF(~E-VXD|4L{7=*BAuq`WC?x38n&gQamgF
zENo2$)*(R39M$;-2j%dpImFOhTu?)^tkR;Ag2Ebcdp~_(`u}0G0q7*<T17*>;nk{}
zAwHSnD#B4+3xMW=b#6q*&#zzKv>Dp-CB@6(^4C#HnN}Qr4Zly^P8Hb!7L0J_eqP3{
z@IH3&*8c`Pg?u7*whrR~%PQzS(~KFu$t14$Iv{`vH@Is-31I?No(Rf)tBS@(y%fuy
z1qVcjSd2+R53&R(u4w-HBjCzuEg3YCQbA7xT165@k5d1g71>pZ)^UH6&19WH|K-ja
zK+tztUM;#e&4zu_?9Nlp*^P|c+ue=o22KBOAmAM+TQb@;gAvdQrG)zW1Y0+Cbv)IX
z(vz2sGcpmB&PS11BW}rnA459=X_%G6?O+n{Hg;>!BQ!}~1garSZuL`%X1=i|U*8VB
z;9!$3C~hukG6j=)2*Uy3FM<|<bbc@d1k4##<h#2byk~h#did4MBktF;n}GcRCY!><
z;O->7u)nw0KLaO{0O{j3Yd8+XXRTP1p2T*?EI8BxETs@6BZ!@oPW$)FRr<t})C<fb
zNlJ5);5I<=xe`=!h|9+r)y0JQg2t>gV+NNiskN)e`swc3LsU1B(E39%OQ}kzSi66J
zeR3}{a=OX4$+)_(v9U2L>(yCu{<$nbWB$!>o?L?Bi@gqH2g8iz-Nk2CtcZfO$GD!v
z?dua&R&H)Q(b5*>6~QFlf5B@s=g@hm31I$j#Yi#bU}r#n3SfFWL%8HiCAR2P4|FYi
zDPAsGyE3dei0^s@L3tjioe|F?u!p95%{jn@qE_<so7!8_cgrdD(E@dhUFo#xE;-k3
ziXW|pw$GO{7CdTMaL7|uL%b_z<mTqCsj5;$FyX$fY!o8I<FBI)@*_`8#^GQTKRSSv
z5O#}~F8~M4xdbXjFffR$rl#|i;esdwhFS&~E9O{OXmPS%49jwr?NNep@Sx!2oU2i!
z1~`17QmT@^0+=HNzErBzC<ILp@9Ce0kJS+{k5gCvk)5kwFAT->JFWn%b^}#R5sG?{
z=?ptYQFfK{sUopbW>LQ??BKQ#38RkbiWyh%mWIm2ptxLs>_vK@t>>AQ<+A*eNnTk$
zHxx7Gc@AkB!a4p#jfWUlJbYM;neO<N`O7Sjol>z&V{T6~ZiSqz%@A4}V@%}o2yjk~
z*$83=b(QvIycwTwFRjv1si!>&L!|HCxgu?A(Mb{6LD}}3ZG*!Q=VB|XevLI=-f0CX
zJNK~P=lLxt>_d?0Qok3L2}1hiSR>&Wg7~A?tLk+ML&6-eINlOhNkOI|&{t8ILn*bs
zz7F-m%>(7xkDc0dN)^<ye=u&JntpIupD?s=7Qv*H0!)gdpF}(x?6+>9niq>r1okZ!
z2m}V3zobe-{dDWW$5M@(ParhoN|4JL<j_@SiyJZ~hwkZ!4ybozCP_x9xYPU1^LX21
zJcPYt{KvD(>gpIB>G3Hj+4(nr8yEoW<GnQKL>RqD>M;d^C=n463Ho(s;$h0buIu#w
zcb_3x<7{e|s#OdX58p1`<6*dyMptF<_QSNTG;fp`yLi8fk{>x(Zh<xF6PE#&GFGh>
z0r?>_j5tN|C}~)a@o``9MKEbqfvHI)WoczK@2;o&k}$XpN}J=q(rMN7n14CcQc4*{
z?q7<6KJCx;iHu#y*95nT?uhdy^;&az?yabX&JSIp>vE8hxiu*e*`cE6XM~j7V@&Ws
zOT7)&Z5<PBm!qMA@v_gb?6&}z(&kQ6+*09MC)jh=8R)V73+7t~U9>NwJnylJ+leVc
zjgG%7)*BBMDuF(z>3I-S2c-k^knCOZx{_!xn#Yi{@Je8(`B02g`Cl7i#kwaEnI>jW
zpLPKi8}KTVXRYEZZjmN&>EQ!4N6(G-p1yc!AeDpzbf>NbG-yi>c*YYlE3A+VvZa(*
zj2PjxhT1^77guyy8xy$+z%Pz?TCwX2&e9&#?jq`j!O}sqXr!!+b>b04xP~SA6-%H!
z@biQc0h(?OWFg28Rtj>a@Y`14pE)99@@VKGpn?**lxr{a29(Q^MFPOZ#r68ze_AsT
z4O(6)Hz~DsOWxerz(q}~tsOV#019w?T*zJ27o2@gIC+uSXA~1$Sb<_0{Zv&I0Br(B
z`QqUxbB-e~p`qP}Ecg)ehlo*pNr!EQnIvP(X!F}G=Z7HH;3fdgps-tmmwnbCeN!I9
z9|kN60;^_XSH3;_ACaSc*@>^{$u}j#kcLMOhNNyF=^|0ILBP3%xI5}e!3nAI&x3=5
z;B*)ln18H73Rvy-*D**7OG<Lgp=}!|IV(6&!yA@X8c#>=K+YuS$U!lmgjp<t88f64
zzIX_QW~hfMB0z7Nlk#6gsz=4g+Cn|&Pzz{V+|Gh~Bf#5Aax~pg7w$nz5?^bFbT%&*
z0+eQoy%|g$Z6cG*&QyeT#{w8qlRN?R1ske*GT7<y*`W(z%r`D;HTg{#Dlf5yrbh%r
zL*vj*_2fm)L-m4T4`hzd|MzLOWf~fe0=f2JPvYg0GXDWsA~KvWGH=Sn_Z7mSaOwtN
z+};2U>9l6%jCK*mKWd`&6RL8Pg&nejrNGF<e<<zW`zXwNTgdc?rjiX33&pC%YgO4b
zHK3MaIQCRoGH37)J>a~MIN;M>-1Kw$7w%mb|Nr+P)dkSX4q^taMaRbDmE}M@{h8eZ
zbhXK1-Fg_rYTxs-y^j@+>enQgLZQi?C9#weA|<Yb9>cQ2MVl@&)5YT3j9-=U8FCYg
z%#-ZKPCltF?DI(6y!Z&q5eXMV)CP9(l^V!>G}OO<huOjI?)U2Ey8b>xx063xGc$S~
zBxtW@e2#4Zw7==?Itz%O6Q5j3(v!vBBz_3lf9xaj`Y4i~R1}8@^xg7%u6Pl6plF!o
zGEGk5{>)PhHacpkaMc$Sw6qV=B91I^GGD=W7=28wznq!{|6HUO)i6~+HyG!YWwafQ
zzCXX#CR2GCXLsoUC6()RW_+@DabU5RCfQZ-Q@7WCmTA&%?M_Wn-&-x>z(w77tg;Su
zz9H2RM9O$q!&(xIP&SUnr#@YRPd7eo)7G@#+T(Dg{&mj7<7$$sfEx3$U|jj>@~7ci
zhATV1#6xG6r+V?{8v*V)xJt_{q)Hb1y(g}ZDn&PrZmbKZ#I<RNxKl^&lbPPy9-4~2
zw9qTI;Td>Ys@7em3HdyX*qJzm2kH1+w4E6#(TBq5`J;-qVR(MZicz1%It2fHkwce_
zyQ=-*`$_g#9-FZqI85R>-@LNyxHvA(^i{b-{j3ubk{;><%0~6#S%sW>#s?%74aXz9
zo@UGKaea*{#j|5)!;99hUWZ0{R&>Chdyg?T;rXVKD_JNWy9Ta{HTv*(qFFbaqfxVA
z-q$cd@a~85=Vc#u_v%ZM)pF{qefQkqrfz*-KIpYQ{^Q*xBV0*3eYif#-*g&3lKH$)
z*aP1mr7K1~%FDER<Io>I!ZFm;#QG#4Q<Sx*^xgaK`|Cx*9%DbplIw-ipSZ@Q*l-#z
zYUqj73iej5adDo0dsqT5bM7O{<XjCSErzF>+kDb!-u^jwP%Bsu|HQR9SwOj&=Uc*@
zIZ(TLpL#~mclX)!&UHL2>=h@x8mpXUrIdPJX<>S!nzy(zKjf-K?Q0}L=M(wq6ozH~
zEh<aYauaQld7={T#w|j@$fDz&)|K5WRHQB^8-`Cy-Rn;W<#QE6ib~h#59B_2A2o<d
z8^Ms+Y@QVb){GwAd4<EWwZE?K{Y24tokug*?ax8LRyDbNR$o$0!?D_<_Kub*fi}FA
zpT&VScE?MnFpjUx8L91UHXC0xKd&<S`9X9bcpj}}eg9)RW4AFWshlD%7B=<b6Ess>
z^&f3UoZQ6iRcm^ZviOKs)Lx>YmqyTQ%(zXo<wDmbLc98U${x|xw99==YUwk#Vc~VP
z%%|+>+vv*~xV5da)nu^|*YLlM0Od}%cJ(kKc~P&anVH_Rwq|4L?!Hl)s9%lQgM)kq
zhqV+Rwyn7?nDTc>no<jiKIOAatVdBA^?x9j&$DV<2_scA>k7)79VOM-kURT$-_`qN
z=U0@GfPS45|3A6a*7u@&8G<_Y9~HgtpHTSvHt#+0B;LTmVn7K&x#KTG0)kI`iq2}?
zW~RgXj;!O>onvl7>|K@wZ(}?n9T!^@ZM$qUvPl1znWw(69v@Lp^juSl>w<aoA#MIA
zEa825;k><st5))Y>;wz0oB@?Er9THh;C4!6M4X~n)9!T}s|08B64{RS9&Wyt{YXM^
zW9%V(Qd(H9v`{R2zRypW^-P_TjxrJ?m+kLQYJl}Pu36<I%=EVp60UXWZ6r0g1?GFP
zFcB1zNQdw?VRz*;XV3C$!^|o(S<-$VbDK++l~wIv{?WpqLFf9GXiuC#^lTOOU_|q^
z?>t6jbmoT~8fk&KaT$332Nl7mXgb#_W(s8ie6`O9rIX!VsFnlCOx<llzQN1-SH*+Q
z5_Z^`Y{;5szu7y?wZ)nV9b@(2zesaa_Wb?*((&@~X4B^K^0>ZAhhW7=r|!>_W)Jk}
z*@?!x&GhL<?bc5ASC{oul00@uIE-F)W(`{O)PWw=+8&gUp(5I5m{(Ggd!u{EnB8Hj
zWk@A%s6}+!Rw$v!^DLFkh2M2YjU=8`^y+_DiAMuKDEnH9h#=#H{9ESkUnEvVk9^{+
zHHG+VVAUquaCZ058O>AkOW*sqh$`PP6)@v5>->RERXqNz<Aco<9{cp)KN(U=epeF7
zH*tAx4g6#vnQS_m`aB6Ih-{OqOZz5sInGb+*|Ztg-KJ&7bM;5<4>)?Z&`iu?Jsn3M
ze<LUvQTUiwRegqeB?Z?TP#3e_#Jt~M5Jqbs^E9?KB<&D1Ohz3QQ+wfX<J0^7$J8^8
z>lp_$dS1NV8y~5wBpf#dX=N*1%SQsPlR}qKru*Y0`~=1cpc^C$deVQir#4G-PYR!>
z#Zb)crE{sw$U3yXxAh6O9t6GTKBE_RX2}Gr7EuUstbER$y3h=T3P`~J{P|<3p3-4V
z8v$78E7v!Z75v;@a<F5qkDsyxCUiVIPJYVpvv`(eYf@J3)$U}=P^E*_&`DzIxYWIc
z80o<a{TUP#%Xc^Z8>`~n>gV+z#J?JW6A2F7pMCtv9*6k#b88DTuuU5eMvu>UcYU7R
zZ1m7_lp&OFQiBDB-B%A!RSdolYk0(tfiZS{x~AHD_pEzSUx88Z&rw-s><Ra#h8~|p
zAP2S`>_Rnzn@Pj%+D|c3yrn$fdu3hmzhw1tvMSPQyohqMSC`;@J=;6^S?+OPNe{Xj
ziMMjq0apc)iD+KP+XtDIdp2!?QyGFk8)I42N>@TguI;?2_A-rWk(HX^%wHKm)6^4x
z+QAMx&0*Z_ET3UCcQ)}f!EOwlHrk8H@vun5Ml+faAOBm5hMjz$w(oJximU8LZ#%9$
zpU1*m?*nUgQhbygR@k~^+Bp(pt-Z|V)>j-jayiX}Pk#T@Ke?*wq^mo9aw5=h)-zsl
zzB*_0q6{|U8t)UK<Fd1*r*hA6B1-H&R%_JA%e`l+HjNWR0l>Zej5Y@}^EIh9QqFd1
zdK!N0uc@gI;1}IFa^IE>T@U8`FsQN!d(^5-QSI0<ittf?*zN&&(v#Ub=hZUIx%IuU
zQR)#AACU^xdrz|&@b16LEXn`bZ=N@5_rx>zcq=WQ4EtgW$<_TDqnej;@{{HW`*@?i
zS*z(m>0Pb;>Zse2^&~wDsVjgXadL1N=%gM+-&Wb1dUx?Ulv3MwOp{8u+1ZQdSMEgR
zBSw!xYMu4vbhl4F4Zm7mrsug!+q_+oGFP3uz+Q1pJTH0Y5@*J`#Z*`TA`7!XA&GHM
zV%RD0;@wIE3!hfYGDeuhQ5S$o?TwHsu5&VV8bKQFePRPvxu&!F?56Yj_&P!5&9ejG
z%_>}_xlFZ`U_8QOvbGP)yJ+^6WtLrpX2o{g_~8$$Dj&RjoElHOnn}%7*NZRGz)?#S
z<gO@@QMHHRJ_BwrSO9B9f-n*OZD`(2UcAfr@79oy3U}x`V?ZAe?G-?gM{=D$AX3!R
z`IqLp<O<jsv}Faf0x-uMdn@BY6z9K}6K!;i17i*A2I}snGV)`9g;W7uz_coCs@XH0
z^IPOktI=gA<5$p|N*5kfbS~IVDcR2C@6{`)97K{OtWD;B7qcWJG*bII_B*OS#9fdZ
zpU_AsJbXT6<Tbr;2h)R;o?nZ<tWKFPuGZQ|-i;9OqYjlgkE<ztqtnwiEcig4<01B$
zV+u$1LjZ=CMl~Ow&wtVqy5T$Cg7<+g(`RRL=y6jKe5&zzyDySOsrrn9(`INccTZb^
zU}pw5V|}^jS+@50RP+n2rmOOXm0a&JDpe17ZM{3)L<yRd|8HD(*rS{L6lixp%NR?s
zIp=qS@2;~mblN^h>6@*0Eh#9F(Uql^QaZ%qxVYlK56_(yn6)82^(bb8?tz;uYQ(t6
zO1L2dkOv^6X_}R1_$|;k^VG8-6YUNMbpP4QfU~vRZ<LHnD5XIN@#gh^IJk!I0v{{z
zF80CYF>jG31ND8^0>ID?;eCrnRxZxY(Nj+Wv6IQ~C?^hX10UdP4VmZbZ@G8vW(nJF
zYm{DU1dR~8^WRJ)uJx_<+;N$s`e2s!<GH0H5Tk)Z$43VayIl(`VhTxsMJ#=km6#V5
zk}p~O^^4_yQUI$ZI#AIQ-`CQW`T~3bbB-0jy%}6xTw=z-Wx|-sJe`(8yZU{f(78^N
zF`&4r4ju~1jb}#zg&F{&dA8hAcr_sIqz7NrSHXwF(n3eSL>@hA1y78I;(~(KH~-G5
z!ls~dlj8J<)epfMmt`?gYbJF~Qy{y;L<JywoQE6^E@79o_eZ87p`lR1d8_A0oTG_W
zn!UTl#mCpFw>e(1`02$Jcvd_4Lr$<Lj;~P2=>*r{WRDfP;5NT9zqinC!chiItD4o_
zkcZ#@r!|5yqE%}h$Jn)-?sXYps&Z>hswzV$IhLF3Hl@1cOP=-$pAHJ2ZpSudjzVWL
zRO~-IEw$a<a)I=4A6#X&ADkbioD*bRQk{c>P(1eGV9MENy~*``C<#3+G-$PXpp??~
z`Lh^(bowY1v*05C$E|wqtj4VUWyzBzKd-A7T9fBybeQpa-n_ORM>Fv~KbP}(!dx8&
zI|7TPJ;Wp=)3J=g$IGU*v|}Hs1)IK}onZtg3AN=fn4OzEK4>y{_q284bqQ1E<^vbp
zoz_=DdXeiEJ1@9GaIDEq1kyrgHP`;!T0VyS5iKk7`b0=|4(mVC5K5OReZ>#MX*_5h
zm8!m-KMSGTtoPpT2x;zB_m%3MT8NERpD`=+LZp|0dtLDEObY~F32u$)`n3u@ko7@=
zb=bW&LQ1u;|L274dck)wgTr4<M4h^`E0GHHoybfS>h+0eD23UZP3NQs|83A}y0sH*
zYD&0a?f%W=wm!pi`+`O@=OMq=9Jp=ixb2451Z&s5inn&HCWU;O(0a3eY545x@5bt6
zuUU7=r0@$vP$n3-Dfh8q#6RQcy|n7jC_4Vh?I?b12rN&3;N2!xui#EYBOpp<i?2F2
z?J}!4tUZixpxL>iF2`T(=;@-~T#NFluFki~<y`hJ2J>lxE|v*u5*+tHKCi&-vUNno
z_F$UG0-^q~ot*EvYy7Zs)1l2u#_Sw3^Ow()2b8}XhBy_=M=AEMzG}0EM2pguTMTe;
z?Zz1US04oH!_zg)el;*1=>;zwbNIcOhi&os<iCv~IyV<PgRzTaaLjNjEGp$kNa>U|
z6+<gwhb|d1IUbtBui}BM`I|7I-n*QQMvwTS)aB8rdZCt>Cm;W8!#5G%194`&+m?xz
z$m}|c_r^(XTXXMIh)>6F8b3G))gp$3VMW)1aBHDB?5kblBYv_f9#f%XrG_0y3*pzd
zwh<)azL(d!A2Xq!LcM$1MM&L2n=cN&?6r*!yGDO<-CDMT#BRNhQEoG;)L=vralhrV
z1X;IAAP)>ZL_wL3?|n8IwK)!@+aZKpiQe;;9$f6LFi(N&7fCh@#{EfG{b=IQX3=rQ
z213hH^F+AX)4164+<^{-?y$c(z2r{em4)8VaInH8g2^e(<*xlcKK^|EyCDOD%S=W_
z6@`J@u76<aKuL%{M^kHw(bd!Io_lOBG8x5PXpD8373R8^+c>HBV`=tI*(ni1yh@gC
z?O9=IYwcX`;`QSEnjSM#i`JI(&DXnT$$h=>5+9z!c7COS|A(xvj*5E!qE!T?6eXm)
z8xcflL0Ui%si7Gf3F#IUkS+m{k`n2XE(IwOkY-5f2I+=(@ZS4=Z>{GacU?xA`OYU!
z>~qfE?SL3iuTO=)M%0_kwXlg%_)ouGC<y1cZSB`d&6#v=BYT<<3Wy3vtbB`GD*IBp
z!~1iaw@F_m*hE}R3}|E2H{|;o`BIXMsoymIXs-!ht0j?ggKl*KM&*}DZia|)_@;z?
z?JO=#!{fPg@-RGm@#p2p=9>}T<OLJ0dnCAjd-l1?O<i1E{9M4wju_5ur(I5Gkylm@
zhZZF7lLPtnAFqUCJm(54&GsShXN`<Fack|eZMY{k`eQKueBbvubzQ@0ZZ{75dF{+Z
zu20?eXdy~<BvG)VQ?GXC_X}IeR=ILMkE;Fk+$;#p&fr&*A*wuaqeU5b>~nTnwE1j@
zmCekCP3k^|JLO~Y@oaHQf}kNT46^=Com!;Z5=nk+)d4qv1bwQ!+bCkDnxv%KlX)Sh
z9HP(CS&x}pAjh`h@l@K5U5fkEour&1)a`Q=ew~sTwtUQEx6on8_D_m;n%=^<p}LEk
z4DSzT=iQ{|LUvqOHg|(_NN6ZzU8~&M?NS;qy5YAVWwIi$%_{I`s;S!y9qLbj{M^fZ
z0lq<LoQKxQlNNR{{<#w+KCw~v{eiv<Oyw}CTrZX?Q1IF@1Z@K-UaMBQ!%i2#ofi8Z
z=04ZJa9BTTdtK-OxqY*ep7*v!Gs)d2SThsWCGhx1=>e=!S(8G!O8C+8qB*6++|S`E
zYhT=?tO-w<QxyzTZ*S2gB?Lz-G{MD;5y5elFnTpwG4y?OJ03N*OYUzSJG!sQJ@yZp
zay2Zyz+XD-?t^l%a<>i}cV-taByG$+ew6py&l{PZTpEy}N-KD+AIYvCTRDk|W;_Uh
zU$lEmd}GcbNDyIKV%srJ(P_3266;=0R$;Y(M{c^XwRAjIkhD7MARyy|$sCpoiT7h>
z>GU#R@4nl;PC06=>4@~%`UcZ4np6c2+v+w49}W*+JMtnQCQoZS_hI|ZqB;?N<QQy_
zcrH0kO^jHV6OG4V`_qd>PiZ?7qlv4`2Ncc$9);K1)EOLEM58>{XYqPjU|51J2`o<8
z>U^@8=G)9ED?dzCjM1jjN`A`{+q+`SaJ0ALLjYC4>}N{oQB6ttz$+Yd;+Jg!PZlfc
zxv;ML8g<gtK1q>Zs*S={8`u0qU_6x;G{nE}+Oj{T^=yk(X>wt2^`00r2gAm^O&G|6
zf9b{eOr<QQn-<Lb&VK$U=~!~UW#4rhWfh`C%)I&;Da29>olZp&=k0<6$p$||yCHq>
z-ro!W<A+iB5$2Ye&6s^9wV}_c1nCp;cg;jk{xmhm!tiGDMb>+hIYqx+#Z;7=(0J`&
zT<R$Dd?)&l{R(7ZCgBIHR#H{h2y7?!Hje?eEW&*>rLZ_8W4&?4De}N_@qTtYkz#{O
zdFl*9W-CB0<$!QNg1=a5G8h)^h}oG2P~CD=;p&`TC-yQ0$@vm<De*a@N0kN~oIo(C
z?+!mkU%WM7pyRGbPP=EU&(s%v4_mDI8bosopN;Us<n_EN6qK6v&KsmaP3{}EkBf$D
zH|vD;M*Z&Zm5HCt3LJ=gE+$`*lP%tA0e2XqOW-p2TO4bo09@L0Yan+n>k|uzFRv|&
z@Hw6s0Tc%QA#E3~jAE@W<)3*o1WG0wE?<j)9=8^G1sD}Ilo<#WPxlB=diDLTt`^*L
z&oPJYF^|1ft@apB2BybdNra)&R#z&_T-J}?wog3~TjUn5=l=AeQdeku=@{o}9|uDH
z^Ie>r<^OsClJA^H2tQ>OWsUz5AwMJ{Z}C3mdD8jJ#}{T2(QjBX`hv8s)5B1k!Sq^7
z@upuktF{kq5*Nd=TKECLP1_EQO+$lMqm7Rx(jGk@+hS>6rY`rfnY@b=uQj(<Igg7f
zd+&P8M&(6N^|#&Tz-S=>njQT8)z3oHU{-Y)=|K`iq49QhgE6*-Gef)7!S5TJ^b6V`
zs%|2{;!x(djoPamXx%hYIPan=dT9BAvD3Mgi#@GHW9~hmMm$Wq!k$<wTo<9_kfV%}
zS36+!en0A9Ef5yEu<+$sGE8eGdlY;ZnD`jNdwz}JUQOqFPDybaLj*U%-)_9IT(y$!
zNd*tGV^F_JDdg(wZyWpDQaeU-S*;i1e;>=O(}fSJhqkuv?0zE`R9arqCV`~-acz>5
zYr1^l1wQ1xkyjs0TgIch@BlF3^kmr&{N#e(DqD77Nda^*@4lPjRYjvLQv4zpnUZPJ
zrnf#oQII;55m*sd^&r=EXXHzW$60?z>-_m?fL)36$*U2f9IM9n5?rAbxmQX6QuN}>
zvFho6Tsywid*}87yj!v67_Z-(RyM!ys$x009*!*b9#5BQ)84O&Wn9O`Vhcmau3+a=
z-5E*l>PC8NPS%II_hrL7o~-6Y&h&?`hN|b*`zBZ4Ub{nKW?fQzNmF)$e{=K)p0S<^
zl~qe6+To5@-wc5j5w^<?qq&uX<FAQP#v3xJ6TvfErFgGH+3MaEZ%R<6o@oT7&E73t
z7UxG<`~8se%HvWEHz^t2Mm-qWme?woH-0w4=J$hFwAp_^NbnyQdPyyxxj%MOcoG`*
zwJq|Y{ZS9NNghd&Ub(`^L|yH9xT1gdg2=S}hq7Y)>WDmtdz;>;o`@qi6gWSn6rppt
zO>iTb&NT9{r~um2Qd~~^<&e$$E;M~T3@-@d`G+l>k#k56w=7<pe6-?2m37Bi37^{S
zHQ_0rHS7Pf`kOQ@?%H-GpXw8H7uYRy)GRF9Jzu<EwLN+Y{zFAU8a5YD_$t9Di!Gkr
z`QW)0dDxO^@L)KU+q2nYySMb>Y5>o)SpV@=o6w1KBVBuDZxv<ivqLdv&Q~HoCU&}{
zoNH$sr2I3~^m%aOmU1uk{2Qu)#eOXgoTpFajU?nAzyDzEH))%b9B?>2Yq;D=+!-39
zYTn}NmBDj4CXG;=S2R5|T0)@?rDbwGsCug^QAOh@z25oPK~K1MhR@v?dEn@>8MgaO
zR|QbM-(KL-vAj{EtHZl8yi>#RV*3ws>&36to!$Q~3giv_2`V036|La#@I}Nl%(YUC
z{@sh=t>g$!#5f8{>-*3y|2dl;%44Uq%wj#<=YY99Lsj+I3<==zZSHM#Px32(3zj;a
zgm_lldz-TKmpm!vNVup^IYa*wn^Bn!0K9FEWrg;KAp?2JkMH!E{lV$*)-V3~g7AJZ
z?_&PscyrXKDlg5NG6%-zuUqR?OggMh)qpu&gu>n%tK%BVrR#o76E@!E&%{);=Eww<
zKBRbQN?X9s(H7}dEAWEN`LaXMXcVJOM`>-iWhk(mSea`kIX`D4?8Eirc*lgNa*rH2
zdw&<(ugkW1-WF%;-3yOe9`=cJXE(oT9)&vZw2~i0IY=Ag4|h~2r4&Dfni{0x`2|+=
z5t`&W*@l=p&?;!`jXcapeiU{6V?ZA;vzZCY+k7L)k9Ef;?86GR6k#jr1Emx(1ioc+
zd)aDxR<BO5^LjA=<G4YppK-ao1?LqM#mXOd91&i*WK9ab;PO?6(otT@ii<K}Gui&Y
zM`%r)n0R)E@8Bd+X>+;kW<h>_ewI>F&MN|6qK8(Up}Vuzs2m64=P03SQsLdLJBpM)
zA!9Jfc@DZT+7Vf|5x(a;)Y%|b{bYZ^??+J#vI8K@SW%ZVIH9}O$-!q_gkj(!?*)#q
z-S$@LmfPhppDU!iH=7)XH3Y5>B6ag>Zq-FHZEN)EsQ5Kl*DlXJjJjVOKM0lm6&XHl
zY*ykcoPKW9PPcJdK8(iB)DAK#;nY4WqfsU$X1&4g*piT>`@Xhg;?6z3CqhDA&z{U-
z%e|dWe0G#)X?T@C$CSNyzY~E4s9L8m(xiZLa6~mA-IF0<e=Uh!oso=c?c1<9XaAdg
z8bk|GR%bdly17x=FOEBRt-O^E41pJf{Y1MZi{z34`?pnM#MfB2x6Uv|k3MVaGXFE4
ztUZ=ir{bkH<`8r_(?6-W1d|Jc1oND)IkO?#WI8T~D&yib0N{{|q4qY78Fe6)0)r_x
z$&$71yo`Je(Ju!(O~WBOfvv6ZsickdXsh1`9(6;>W&wrjNIosK@?n3K?VF(<&SF-5
z6&EDx4`V-<W<vq1W2MjBV?Q6vkD4qWWD!s-4F!ZW>Jd)f50MnermnjmjU7TNq@4Sr
z7(TLHRUQlqgz_0-7d8c!SxF>l7_xHMs5mlmzBoPb%%TX>+uB-jJ@BrK4i%1^5P6u$
zUCaNIj=l18cC0Gm{YT#}Gt-#SnZ$Ih=rOjRa_XLy`sT>Ec>d?Mrop$PTw;Xq97ymI
zEY}ZcCk-r1;$1z~=ko*HIHXv=T+%gS7W{CFEhVW>8QmnWutRJ)L;HTHDk-LM@ox*#
zNr~SSqt{Kj_Um@+&IN3tqBKVJyz?f=v-#7f$UimWPf2>ZPI&YxhhEFj-kfLFChT@l
zyIR#9o9=Z_w$4wJ?C5))2!)XM(|jJ;j<f;$uDWqOMt6z^-v*_P#q-3nov<NSV+XQ|
z|Ht9c|Ka*)&ezZ4PEQ*$z0=1nfasun*dE;4X3e?GS%Ql>FEcC@#ksr6T>+AT0F_t;
zT|vp^^OLqZjq?xn_(};@BhwGgcBi4@YH<!p&C~7{#$x9aqrGXyl^H*ie(dn`9fHG7
z{c*3|FLRm3C+DYu0NQgbGD(d>__nWiY9)9XqQvNf`o`CN*}Z2LVjcGn$>NSWZQ~rT
zuvIO<TPoKcynt=_3!kSI6k0AG_?^rb4r`WDrT*M26om{a<;3lp2>El*nX=4SGnFiv
z*nQPesFVCtH~*EIaMoSH#Jzu;%~dT0^J3d4T@}Gx9*IArQiHI~(C^*m_bBJDMC5&}
z0y0a)G(#l`6DoJL$pLd=`{F)xf4jbJ+4*fVa+qb#CX~YL#d^Q>g+;8o8%bU--OQJm
z?k2Y#TTGg|=(jhE*qaT`jCC~c>!S3dOHNEvk^;w)nEt@Uq2JEIu^2~ZcF=BqVdNbG
zudv|Bcju3aVx&YI;V(9lH_yB}KOG+5WhcFngV#ZHO<ULTfF&ZYYDM6468-Z63l>Y!
zU!TUPd`Nx{`peHeYDP|~%aW|j=ksYVcOLKcX64<C=w5wB*!+j3=eUV+9hFq${o3mS
zI({-`&wdD|@9!U|QIuP=Uw~bx$#X+FVk7VJ7{%3$3P^LQ>-OE+iC}H=J=2>Zi|bg`
zPwD<;1#Va%H-CoJZ71ql4kSt^YR{nlB#@x9ytLGvvn8yvChPU(i?x>DzGv|VHUe92
zQwN3CWrBVEDViKn!jW_<x+0{`W{P*C&yigGDYvr|k&oJSBJUD$Nz#VWenjJ1@2AT5
zjeH8tkCxK-pHXPE$-Pr_i$(cN<>C@$pf4wjRWA0RC@iEHam=c`x}ogE>f-O=99R2Q
zB5G#yH10v}RvZ=?j>}x;M3j7hG0?|6D37!KqAB~Hv;JJZCC*DP*%h9gSDQSUr=7Z$
zQ=Lq&J?nk8Fy6IGMT}(?6*#YMhDulb`SYV>49502a$fcMN<kMgpyNYB6+zo88Zd!O
z7~Bj^BmT0({r`?oz8qn5G~yF23CaA|YhSX;sH%EEh&{C~!H*fx1bzImaVJtNFLwj%
z{5sKm77d9~%dH7=kK1vCs=2efeY}5*Wlfy&*A*PMhp(L<;DMmhB&#Ozp2K+&k4U~~
zZ-3$0@rln#q~2(^mzv8#H}FMKR$RJ=K2#Fm;W=0^wg}kW<+^)feMX+ktdiN;QI`gz
zEj&t>ma+z}{S<SJsNt6`485)#zhC}twfdrECFY-I-AcnJjDo>DWPfQyo&t@?=P|ba
z*l*HzP!(F<drx<LoC3~6l5h|?DYwuKIQyY2L=dq&oGWx<)_hZH{dSIo;S283zVK7C
zv#AyOmaJs9+Sj<~)_lfZb@!g6xCbRg_gH@th(tlr`Nd32=v~2O56KP~Ku-038pQy!
z)|`gIRN*X^<RcvyGb6(=r)dG06f{y97r!^%{yY1q#132`ovJyy1`EGn5>knVDjX&&
zp#f_m2IfXnQW@l6KDq1*Fwre10rLo3$j9_Wz=x`!1uNx2k{&d_w?AJ(Hcs)>c=(E6
ziu~*hUDS&A_h3k74zOmZtXA1gy)w-OBn~FC%FD}nZviJV#jV>sM+T-C*wA9y^-wqO
zH>+yl*u(^k81l)O^{fLzFZG~=qm;6@3p7UtMuXDY(u(WqbbaRM#Bv;Y3Ad%wi}Ujp
z&$aw{Zx&z<DW;xKyO$ICERvq224qV7r%-OtS7SfZ5+Jx7oMy5dU=D5C1OuLtG?+)N
zAoo^1zV0dNvoRs!{_&HZe7EKXOrUiyz|h!HJh2Go_Ms<GfuwZr@9z2q@9aOgiTO)V
zuP{!89?(@FEJ}F}CsyN@2Qz-)A7N$Y4WQZfeR*^X0kbNiz)c5Lt6sqmSPE%9EV>%)
zsad6KKsgZ&liu*j&}kahJ1{^L_()zfFjosI+rQoz!-#neDX{FrC`BsM>63O57!EBg
zgdz=;DO7XBOQG>VE0+JVTzFuiZjH2WsvL9CX`k?8s8&4nxea5r!c=A(Z>8V?^J(N%
zg{iXA;kkPV^vd6%#pKv6Zl_@h!!RAMyc{F-fF?(a=ndiSne6o_8l77O2@;25JM{GQ
z2a@4pJ4F%~^~Uo<F|n+3Z}J<ZLmCAq9{H2R!<dt2Q;6GdfcY!YtgvvhGHmmuB~VL5
zdm@dDUe&dTKP)r+7N3}i<h~n^dljZw1_?x}VRrbFF_d_)UVV%|X<BoJ49DGj1chP^
zqVrIN;u4U;7RD{Oxr>et3}jB+BgBRwZFfg#mVWi^<2+&txIlVop<%zyU?Vr>NAryR
zZl9haK@re8Kqs8?rur)?WZx~Ql*5w4M64%c1MtXFEkoaZ^1^O2brA#6ghNn;0S_p*
z4d8J}sm!`1`#s?!bE+A)w&1uO+&V)z`{iBO=LZk9Vqq-EEtweIchIn>ZChAKN0<ue
zS}7x5(KteJFna&^G?ygwU#x+iY>2GU=?>;l&MUDU6BzAYG@K}5b2{1~KO(S#;rR5@
zx;4+Py>&#n%jAfs3-;iIS&GjYY9Xc15Refk=tAQcK|*{4fsP0Mq_MdnX@6BQw45Iv
z9`e@(|40h-`;M+1(#U%;G$(yKm@%QVcLgvz{_403(7!7QFf<bOOE0zm9IX_l2s+6-
zq+WDJOb2uG*|xw6ZELyPJlle`yAViG)|beVQGihfLhQeiuYAgtNUcG&yD-KHbNNAO
zM^-bAzoG)tqhVC~FYQfRN$v25<<|v2Fk5HrJEs=4z;+BJo@i5e1s~2Q*zOST?2X#u
z;o|1~Nd|2Wr%K=))#Ly^oW;~EQxkcm&n~ccJCyBwDVY8+80-8?a!Qs#RgUjR$>Z?%
z8s^#50|O%Nimaps%u0#BYG2!NUYV-C@h8B0?oR-f{QvIk3iRTi!#opzTzc~B0M<jt
ztw3eYf$*TuYjCik(SIk&ao43~39;~Dqe$Z{Bao`MN1$`bjXFlle6R5A_K9U`G>4mm
z`o4IXICUt%7&aAgwG|3=kl*gQ*Fghwv1Z5ztG|>AITN&<Eg2J@#Yjmwihs3S7&>@~
z5axqHr{}BVUXKxG$pW6#qr-RJ{Jz4F$%<vb`)t7^fbg!5qbZ@e(V1*!mhA5Pc<CBK
zA{!5Wlu)1I`&Y?d-u;@tz0QmBN04IKXfl$rt{H8bx63~rZ%n=aA%30uearZI`3Jd>
zBpxi<uf;PrTYtyN?8PJ}CK{7(ug7i3n715Iqzt80^{%)SOq%dQhwg!v?||0029Y|g
zWhFMP&J0LCqVerm1^82?Niep|_V)DLav$kQmqF31vBe;@KIt<JbI|$%p?m2uOtPIV
zJYo5s@PNV~_cO-#Y3Ao+(4f+9xw}4IWA9^NwKh?gK%5Pf#0KFk0s?UwnLq(%ASsp4
z`bg$GiqqD@%4(&pO%~?l&H1jr_Fe0y8>Q{@wafp~G=$`hB=vezR|lN2$S?xut%Tib
zZRArG?k@K=w86O#+l?`}eOP`H(0c1!lO%L}BU`;SSh`$Li_TGyGTx{a=CIY+^ccWQ
zMb4tfGs)c7Hs@5|e|8~_5qf3ngW9iOt*<V=U^zz&2NzetP>*|nf0q6e<#wE`S-<3U
zswwQ`_2f~h<Fta`lw?e{6GC~6>^w5zI2B^sy{Y#{1Nd%QMg4sCUa0p`+T4U;zEAJB
z(U(5G?N>+Znns!fvzDEH);gN6%SIm~nY9d$F!*i#{=BA@33n*pc(h|e(=gef$Gn!}
z$fY!Vo7wto#kr>fV@H#AGVQ!NNtbr#SBGx(PG`z#bl2B4A*5gC`O$|8iOTAkPK2`P
zI-)}CI0FCGx8LrRrV;j>r$a*wv1l%4Z`je-yEstwLDdow_K^Z8gBxSDL?*|RCl9*z
zE?=Tygi(ZvrV-%YR$?Yz>JBPHi|=@YU=I=IsgGrUNM*pB)K;s+2a4}y+A!!RCssIM
zsoT2nO^%gq{adS;%W#wx6|;`ht=bEr<VJ00b_q*AseCJ$8>V)45gghxVXo>&u(<yJ
z-{Sr$G4WpLJl^nWJJqT@*o@8$Lm(q3-J)H>&g7UKcHW#kaVoYg9~K}->O5>|*7bc%
zA(e8hSZwP&w4ibOh#Pa;FuXu>OR0kR*mTLk3|@hKLP^!Dbc{tZ`IU!xCR%e6*lJeK
zm~`Cc^8Se<+c0z6mQ~=9*3X_91%e@GFYnA@fdrRoucaj&jR^ZrDsM#w{7Ac)9C=gy
zDYM#I$tELhIrW=;@<V-7?i)qU7LDpVp)nkch}VysI$|*v=C8PXsuH7BnqcRl)+0*{
zj>5ZGR`&y*6~6_O7^0~0?yGK91>#?QPNiM<vVUM}nP4z>Y@N?t<(?rr^I(sRAbNx!
zc)rB&=Db?u;8()#>4vBt8Hx-UCQkEJWGG_cGWU}!iDI?1DdDxG_`;6~wm7faC<QuH
zh=qL-r`g$wZGYzxRo7hdBPfqJ8Lz7gYuk{v`%3$|8GLqcU9&IBZC#A$)xOA?T-JMa
z(X)dcTzu0@mso=yYw05p3O6PVc!t-UCupv~NWAGP@Fr&HkhqY1`DS3?(TRKYieldw
znj%oe0QmvuFOaK%ZUuG6?6y^*3_*eodt24KK*z*8xRPv-uquj{!)jsBi#Hy7Db>o#
zs&8eN1g$<5ziJ*P&1XN6hh8X4g;qf8MtNQ$>#G`2ZC`X0yk|t}550T(7S`~C#~Ys0
zkE+?jHab8`U8WG1i1XkZk=$%9b^`7c8BQaA_Q%xlE7wIZX%VrFXkCbXv4e=??LRIl
zMOg%uhprZ1+c(a&7aM5Q=X(~qBJ{VFDuS1?*!(@JJzgNfSid-BbPH@a9!s1{O}Hl2
z_N`6bxF5-qRrT;Odld?2zg@|%oU3Av<j~coqu9^;j3GM0o9b-cSWo=Lu^#o=dbI4@
ztx}VREpwFBiG3$mv6x{Sv1*y81npE9@YhZp-=4^)2+?!0+o!C~7BsbxyLE<2Z~Q8<
z?N1BwlxM&v(jlX{6?W;T-8VNTy9KU&<}a1s^`d_3xLVlh{h`M76_2nF#=-e)?FUp=
zB}BDi$-bznKOrBFsWecrtK;-h9cUFBc0;%N<lFDo3=h#MadDokHhP6SZcxc{;n*_c
zf030ymi&HwtycVcmk3tJuW5X@rwk;oNfHwX<QS(gLn2naW20n~wGG{+b-o0n^Tg!C
zEBAPWy2+PF&eRzCo9=K&+Bv-N9d#L&z2S2!$QgtF8+0wP27Izm9SII98eN0<td><$
zmYGmWWo~YcPZ96NjjdQ9v5XGCi+dL+Msn}o<K;Cf({9HG2bb%LOxW3$hLNAOHGUFY
zLaBkFAs3iKAFwn`v4v)v__Kht&nG7*-}gt!9+vArdzLx0(tdZh$R3Uh4D{vX1f{X0
z_Y9AYc7(F<3I>c>K}<Zf;wI9FWKewmoD=J6uT)K1>C|A~JSViLq*;8pUJ6HV>O=rY
zu?vTy2^g{>etQ!f<}@-%R<^vdQ)Dw=oDrAAlTg)(#+@ZEYr}iiDrh_TnX#?1k`%|U
zT<LMjpHOXCQF^w$NC(w=*6?ln!*7=R?On}Sh{SM7CK-C}30ErJseoJw1hLq-l)plh
z-tT5Ca?;bUT%9>%+#Ca#5pREcDYM+XiNnNT<|mBzy=t^-<E@2>ouFPdyYMvg&6`rZ
zq_l-`QL?h$uRfNP`el+`jwPzynDV2G<B4ki5>@?xXvdWy@z0F0Ocn-OYJi?ij%H#4
z`N2xkFG+5qg_%p2BJ$MOo<Snh(%L#hT_H->g`GjoIjGvJLgYg%n8Y=>I$m2RGajj(
zUvE!8;p+9_lks!0t|_c=OG@?uvyw*zZh0_~tfZHEKpWSl7_ysbP?MphGLs`f*R^My
zG#*r+6X;oLuy&EkynE?SbzOwv2CBp{;?$*~&B8Uwg${>UZ2kuSOkm%r!j_(qiC+Ic
z!K^5%o&1#GMgH%Zh+gL~TpYY}z2`qYHTzv>q{B4ZyGvhe8+Ksx7O=X}x^NzTdyg_|
z<I|dCB~IS#)t^4S8yqEX#EE@)+Gu@QfVw>I3C-7z`5>2BI<|+^3lA(u?@gFs{_J)L
z_^>Hwd``|CwI77vuB>4d!u@AR$hSe1--egsoYW`l-&~wV3ao-WVbEf@wSvOl54@8%
z`=7eES~atzS!2EpSFuO={!tb*QO=^n|M~c>Mdo|;d9Po|ePa<h#ANu}V-DR?y2>P>
z_7RoX)nmnIA*yU{rSFOjV}I{unlMUD<}HMjMFb&vwffUP8HG0lz6!ateeE+zHn%+6
zbu7ziKMf_0XG~GDEL9vU$CFZTO*kw<2#L#{C-A6eU&AxfWtL|n5)sjud@9RueEYC{
z{QH$>bdkdQd6h*peYXFN#AM(gJEA%?(f1?j52kVUc6)nOWn=<i4EMaoa`eWt)_B>+
z-bLDAtelAJB%2<##2nZq<`J_~g=$A|DqLxl#j%s70!rhQZViIsWe3L1c}g-<W~v#K
z^XCs5E<HkgiLQJ<*x1LSl?c%~(JO2x=aU#++G<;8u&%zYx|Lf`GEG_a?h(FY)o@^|
z>*ukx+UnqkEmDla_d`@!B<iK!l`7aKW3IEh*C_`*=i=myl!YPSeQhqu`-El-MsxL4
zGQ3&qMiKUYR(F|B);_){d7XUi|H=l6kBFCZR6&PAlAsgYAgze|&RY&$<8qFy>~DSJ
zO%x(fP9jKd*<Q5Qi)kK<HHOg2LMu=LQE}hUA!YTag^jE*DgylT9#Q@=(rjXB&rY%J
z45R^c=#O9Z(}lD|Md9!5o`=G0wN-m;<VziJ0b-h*xp{wXT=xj@_fy!Q99#zj{U+C^
zH{NGSAAhX!#Gp(9u^{NK<F+1>S3=iT`97VwTtMI>iSbbtP~L1LR?f7q%e*<S&!<J)
zR=T^&f!Wm+=-`lTjggJ{Lzj&emwQ*gLSf%8>*w{cTE;C-`5*EG;|F0nck*{8YZ=FD
zm=k$$&`$)izGED+w7gdW_=87X3+Ojzdk5ch;si|ZJpMh;**+n{Kq#rKp+!i%hON~7
zgg5@Zx<8fsm5L2n>}>+&K6z)cmGh3!KUvOV<Vxk8U4B%e4994fULx(-jGV4TrKE(j
zA`QG$`q^^%$?18NajikHE(4u!=Mp(Os^l@lr<k3p$x3BsrFNX=FTCK{3O`el&cSNm
zamffoHW-B-2g<8!sT>^BmUwNg1c}~=XWDSA@#%fA-Afxa_9G|ncdQ;^-;YM+bFulj
zR=NA_c<RJw-m=>2Q3q0Nm3+UK*5kdT6!6I($i{pllZ9v8;^gw$UTAI4x0Z@PxU0}b
zuCwEfeV}s)4|JK8Z+NQIo=)r=BCoBEUnw+m-f=x@G=h-s+0|TNy4kyZ&1Mqo@i5FK
zgLBdleHRjrJC;c2W6RhrLT>dPsisygag?+Lfi6RTwskW+>%W&i!Wz!L>KK|)-nVZ)
zr-$7>#a!3&ehq@JUoC93RtHb6$;#zdUNEfD>SeaS<1`^y@&lU}AG7!CSMl=%+k05u
zGd_F*`*^r{9f#ndZw4k?2MOS-z%w!@LR{!~Y}NlOl%PAp6tpKm|JrW>SynJQ<}`)m
zWY+H>SC(uyK^kbOX>kj8dm*-dav6rvE+<5I_gj|zNhI+v9$i~`QBZqGsX=IAq}`rS
z!<RdKt=6d#t8_ROz5qfZTTNYKn;Hk0lUA)t)UqF&qZz#9A8GIjFB<#sUoU`|?G>OL
zTk<?XB0(?%)Hm!XUvh%N_&XZwMVQv>G^H=x-rACVmzH`>lm8AEq)r!q#B)X=fqj2z
zKe3HhFQBj;$;Qo{MF-6hf5=EeTfpyrW(UEXEmLr%THg$K{)pP}#skOxv^90_fDE<Q
zcbrz>8r{=@@kIC^(W6EB2^0XM(dOCt`9z{ppm2dQ$j#566T#Pj)C*X$feS^Qg`1mL
z^e)7Q@GiW!8;Mk36{RltGu9AZ7x|j=Kegk)Rp*+&(j=*vOs_mB+a3!8<dpiv95hM_
z3zdfzP46WDsS4-~*xK3x{Q$ILaOD&h4lOy(&LFMc^1W>~;L*$<1ks2mEYNt~tU$$s
z`xN5Xpv^ZDv@!CGk7?nHefZRl&8G<IACd!L$cM2}`Iqx<z!Ra7xxXZO_#ZMyLjdix
zJ$&Op6WUdmn;ZPeV%>>{EEI}>f=OZTZmmOLv9dDbgbi^^BzBW~UGgUj8y7pEsZh!S
zn1zuVsD#&6RwOeXs^K=lht}ySH_GL!M}IYr{7p(qN^1+UXE|y)FeAM#Xk8mQ1SM%8
zpDvB}WaAxZrU|qgyRHdDjv3f+VcB2(r(Y}11k6Zep-h8;(3IhCSgHFJp5>E8DT-o2
zyBIJ3E-uUfS3Pxt2HcwQ`Y?hY5`wo{$isPboCL>9dfKjL3|MAbd?1eiagZL&+}Cz~
zYQFwd22vT7Iu^gru2D~%|89ERfE~g2Y1XfjG=dHC%1Vuhli0HtS6l!tzK6L%>cG`Y
z$)w|}@tw84{OvOz3SPyNmmy@KDwE-5KyY5U@qOvhRn3dty;#fdhW|ddIGNyRUmxmF
zbe-e-!#>AEF3w)BmIQNV?*?*Pe$`Xf9X`hc2{nd->1)V-^T)z8&)A5#$xsq*@Xh~w
zM!3j^)3w5fzSIsrCEf>Kdye1Tgxcur_4MRZK4sM`IQP^u+`p+QJ&1SiPaH8_OSvk;
z;IaQmV#ikXly`z_WMOGt?$s4v{#3%zi`Yx--J?dE2#!p`QPopAZd5(?SklE`KTAXM
zdVhS;7!@VkmO!Vp^C|7?cs%2%1}6T?T|kcX`Ye?b&A&PM9q^GUCR!?hf`vvHj3M8_
zjdtaJubm&J---2-*!Chr7T;=wzsP4HQ=!Hk#DN9DxHw!|0oL*Jh+Iu<tH@`YuaoHG
zs>FUwo^U&RtoG&wQUB&`VtR5m&&N$uRUFs%$DQGk@{GDTZv>$j^QqE<{eU0pMXijX
z95IAq0Uj6#&~xa<QDbH1$hP)VyIFVX5?v$^*=6VEaOt19=-_I`f)Ec$ivdpr(e<lW
zHn4Ue08a0bb$^quZUu)_uW+I33H}8Q6w2BZhYyZ=@iE#>5F3H$oI%nc=yyTd(2B(e
zsCk<-4cJt`A!l@@1!X#=th*$*$Yb0lNcPgBiPf)9VY6O_D;$(3S#CXh@zwq@F)MS{
z%Hh?^GD-`IIz6~RfG>?@$9+W$)H7S^C9zrJJ57Vqwzjunrwb2Mt=@by_{ahlM!s-p
ze{YY;+hV{HCcxhgSTfeEudFnse~C~3^vRz%49f3D-Dki;1sOwszg?jWJim5+K~4@`
zPvc4foNY(0$b8h=9t=On$j2NIQ36x%l4Ds}8N+Xj57Odj|5`?U??aQluP{Und%_<z
zjmDI9-ZRD_To9nQ;ZB*CRnzKKQo2aazTGiXuM(VBBJnp2c%}PIh;MaiT`RFD@KG8h
zh*g}e!%`K&dvkvy#joSP*V4Q$UK<b|7nw*k+=3io`IW*IBklNuiG?sew*dckv&@_*
zWc}0l-@Ta%Dd$TLFVZml{2WS4XLlFP(pQ}JF5ZogU$#Sje8cf7sRlSG_?$7K_vx*~
zN8C{nAALNewfF9`!3W@+5(m}0<p}!1)Ao&*dK?KJ4u~!7IHoXC<DE-RjgcDew4iES
z%!j>7`oxwPCjCOHceBmCy}b<$eJLu{*g)fK#T#Ftyzsja4of@-#LTJU;;LU1N{+SG
zv3(ON&`P(buBkfD%I^_AeSw<U-{?X&TPUeNAJ0;+7a~YZHoRu!?-~59#-+WrDXXYL
zQ%5u6VB&QMyWO!;TLlS~*#6j48I1OpCC4aw;Nu3Rrtl3=#L^2OtrQ}Fq$O{|N5IaC
z&2^H1>TS6To(rM&eeLzGH&pI&Uu2U}&DZa?J9JA2)}`sLT2B2k(V3lr@N-J6Bk@&4
z$NIR~5-u;-mX>_1-yd{MVy?KA&QDCZtgmBwrHZI6pFf{_{&FW>YCK0N3xv?eT;>-R
z+*TsI`NnR`{`&o!#zjgmD_=e{h(HOXj>h!U{cRFmuWGXX8yf{mmUGDM;^AWA<z*m~
zwxxe^hnrcgIC~4u!p46nDn4YJ{A{u=`xK%~-4uN|3Z3<7P$scvdYtzYr&?!h4V@G)
zqGBxE^Nj5tA_bd{JAUOh_Ko*^W)<fb179he+zl#onP1$@4792dZJ^7|(@ZR?3Zqh<
zIDSW3pyL)r#LmiU=;1mvH2vneJpf6&uD|A-0Wz>To-lmgHph0<!p6xSFVxHDaln61
zgh`$4HJ8AKcx4Z8`vS1_Hs*)XBu5|@d_dk0Y#Zl$T@$C#O7n|YetQFH4rY(&A^}TW
zn()h*3#5^0Rg}mp{3U6-m@2SxZSwlz6-e$YNB8tA0#43gd*Y%xPNJ30cgd~YIfVH?
zh<z1{5EtKhF33qy=ioq>{qDON#Hi<;*?FthdP#Mf9(aehuRuiWm^^#z(Ma7usgy;l
z7CpFHp-csnP8$`<O>M}MU6F~e`K-mf%GJt5av>r}V^-J6nG{F=Xe75Vwfo}k`JD89
zvlgbG8Tj6RfNW6w<<Z_BiTdV<1>f0tT_bJMvxAH#ABUjh0D-fKD+0C<@&0t5a$`f>
zIy>k@u-Y|UiUMLL55?T(2woZp$>bFj+=zh?%KBHs?B(g)f`Z#z4Zw;&+Ig=0;3X$9
zzYbV{bzBCbe`DfVRc-AbR_{R-?jkDCQ^@T7+_{%y0`M{o`TDOrQTR{HSK1N)NP`>%
zuHUc|gGpxXo6Drm^sF8yX4=~7CEI<pTiWVck$orrSv4}QAu9V+NS%q)4zLQcX2oDg
zQuBk4?1#}CT|srzNFSsGLC+N@VPOEPN(YVuzS7Gcc^jJnFf-(3t4Al;O97mfd_1SG
zM*7Aj8>MsDXxp{de>d9^+o7V<r*B}UcPDM$&}E`0s|p~s+K7CNprVwEiB#mo)p@kY
zm<2L>6ZZB`O+bH=DY|`b%jv<Mo#?9Hf*e{%0xhP@jF<BLYd7Uy%5})co~^8apKF!F
zjOR%7#YC@^w1bo`$U$w)*C?va<5h^z;-;)C2oZSTnT;^OE5nKy8=Z`EbwA8sR3#GU
z5=s@Lc%k+hGFx_&eY@@Eor~3aOw=d8#T1i&6S9}CuHuz9##>9)q@+b{Kw};!w=cp)
zPjKdH_|VQ>+Vy?UJ{2Z9X+@<uv0SpBm=b%sH^n7<m-4KI?}4{<cntXO-E($y{8Jz!
zBXg~yh~(?1sX=sOu>UkyF3Yi4R&M2tpjkh-*hHjci3x7g&hBnJy{xZVnRb4Uc^hF|
zWsE(av|XIfPLIs_LHN0>p6z74h-GP=CH7iwWB654xNDrsh}L>~G0inmRMo-SAMWg&
zHyN#_dV*i>8;<TC#(d3qy@S&#c+*gnesIDoJ(gxh)o$+wLYBl9W}(dV&zr<fnh>^i
zWqLBqVk9C2wf7AP`s8h1u;;!IW8G?>HwP4694Ve5fzYtCpy641TgESOUzB6;2)xV5
zFZWNM4QGVUPqu5HpYQve3mm&^e0N{dsik)prJvnZ#m91CA&E&+lpiRL_y6;eBBfhw
ziLg)JLQAe~A-Jrp6@8}3D4)7^<JN+We)G~-Q?|Fi*X>MA=Yjii*2&p<&t2q|_#f?w
zRy1D#id-V^3kc|pZl~_sUsZ^e>NQpXa9=oL({Djilo}WoM<RI$D6BTV9Vb7bp;4C>
z;<=>xKu!OjK{-ugER&Am)wlsm6B6l6Rlw@>I-jZopECkM-o!H&9{n@E$NW+IxY`uJ
zKDqwHN99LpU-EV$$4l-TZYvAUUCRYrBBMKQlvSFE6S4cAr<j*}t&oR-tF02^&9MR0
z>}{;th>l&m#S#gGOiBKkoS72p>3Ao@_zJmhXL|5ruOAT>1{zTK8bl-B=}5~l&^CxI
zz6hAvt7lHs{llrMOgxFB@x*iS`ow<Te51l0Bn3VZxSFr}DLM3HTgTL5eC%<-!g!5=
zdXs6cGB%YbC6DlxV(g;~r{p?nC%#&?zEM$?L$kX4mG_C)Eme#r9>6gdufJsI85}g_
zjUQ$cz0{5i(atv{FQAp-;o-RhmScBhEv8NICXl=_adE&H1z}CpmD16*S)nHggjL08
z6+660ihI>2CnVUAqy)`~DL(K5Tmt$H!ei4aF>7y$JhiJ#lsH$2XRqZ}2na+pI*v8E
zfOUa@;gC5ABkpRDSLv9bYX=>xz|>>4h+<i;SmIFYY~?6~ZekIH6b^_mxWTgG^3Rl|
z1-oh;+l2Tya`R|XLPdM&uCfVplQFTqej~+;UDEd5brla$r1a?1>n(S1)H?5#xNv?-
zvoQh>|HUb2tNd(@ad@9m>!Gx7l)RL;n9XetWjd_5%`wE!-~r}%-9U!fZ45KBY|5(X
zSDvc9(Fk6?`$7v~y*-0_OYpT!{AR1nH0%cGRVeYhrmkILcP!76(cPub_(4NDyRZQ3
zFa@zJBv6D>&C4-pvTT?dvRLZ5AV3w}{$>;--XK2USEs21grA9-kTrsHW{U}$9SD)0
zDIv~>)Gt$YD!!x)QQ|dcdFWG>FvS!fYpp+?dp%B2&H20#qUStwTP<#NVY0Fax#l;G
zccrSv!1BTZ%0@~nyWmyiO*f?)0&45L#XP|id;rT0s47zQ-lle=pk31as+~EX7Zs(*
z;~=j(yG`TC0Wsw1AKP~g59-pq+W+EW<u^Y^WaRbtjf0(_OlBxty1p-zG^O^vVhJ&6
z?n*J+r5qqz?PA}^#L-03S}r*Nn|PCIiJ512C}RKC{+D6*z^Resr#_h(s`AB(swZPX
zN=d3LjefTqIL(MnFsg*_i_lL*Dwwb{_+d<qxcFX5c!R0&*<&$g{b^)|6gX)HmL?M0
zjjGY6r^=a|&pU*_RUQ65d3wG{G0pHu60h#8;SDu!jBtgJdRw!^9P*k7UR#EdN*j{S
z$JP0coK_&5_}_a;c4NKD{$2sCGV5(7>^u$)q8-?+4HvMU>(lJZ;?oxg|Au1U_V#wA
zZ3II#ot*#;BPe$>E;?2NA-4yH1<k}TufO4R#t+vO9^58!zE5CbCofWq37(&j%1(Ub
z)AR#M<C-Jp_3qxd^JW{S#d_u27~||zSurlv;{uk3?02|!^pRmyW^sUuy$(^(R+s3-
zd(dh7$lHsz{BaJOVaGl9Y7g~yiI}!U*@mg-Xzj&t%)xZL4SQDvo3w?-yI9{%<@$R*
zU-s?Ug1oX6z@Dl34wNEF5`fgrhw<54vHAB^gjjhnR3-F{LAfV#${fimmk>z<VWYB3
zcTh_#1lj1iHuuir(_1yjx+4-LCS)FVUs=Z!;FgNr>a_^GxzPsh>%0>Y9(~RCq9~i6
zO_F?r?dDD5*#HeLj+=5#lAgm=AE~lB+aiIJxaKZ98G?zG<0b@A2^DRH@E`Se)~^`|
zwRL8%u`zQ<`d)H-dcSIXtEc1`^~+27_uh<r$T!r3doN^J{9ns((FY>z4Ij=9F&lR@
zMxH#M;IeZu&Adi4p>;DciC&+8x0E#vkH#Ko&2*pnsgdT<r}t<+dnP5-KRAdj^8gT=
zt*sX&Xoazl!4i{_Jd~DwLfGu_s=U5F`AEILwKr7S7~=l^ZV(Lr3+dFjKywEu(n4BD
zei!arZC}~wn)MU7boLqsB>rhEygwt+pTRiReksxw{a9<t4+^%{MFWF_+WG41y^y9I
z9l7Ig6H0;2+g88yCf5#t3WnFP{pH!ABT*%(fIW6w1?SRLUKiw!9p-E$V`=wR{L#_j
zrcdy~V9du9vq9!k0*79Ch`=d{VjmzU3|}kn#)1MWlypEuy8t-ZLH1EbT3S|jtDv9&
zz#L$%|L_T=%*cy<-L)Lt_k*}cf3eBbW~|Y7wRZA)l{|KaqZKA3sQ5I(5y34hrKggW
zuJmR$S+#g0teG$5^;~Prfdg%1bH$jQt>C>&wMg!Z^PdU<;Cb+lxJ{c$RL~JNll5K+
z4wzsv2uG?>8ebSbLo|Dm{gNXlHm2xu_OC(vg}Uh2m|qKBJTl6U+i}1|`=zfm2YnA_
zm(GbgYjJ6oaOUJ>*HtOG9u<(-*VB;NrZ0Q0Z^Ho<`sey8_;(QFN`wz?$a5K|vz)xT
z>p^f42LZIH6d6d%Sd)_ZqD13~4&b7zoHyL5;FlWdh32-17)NPHktQb3yHuW_u$NqZ
zz*V0TPb$JNUHCl4>?=P%Y<%UJs;0P9kU$Ch8<|qM%y4?BPj8G=rA-G-4R>|v9nE)7
zZPq(%PZQk;xzdLGT#vcZ1;Q(x^T7J~goRI3R7udMO6U$`^=Mo0vzS;n4{k+5UT!Wl
z<@E3=s)NRSEgLU1NzA@h!5bs#0X%GPVrYnYwfSyBCz2cBIXFX~(PJi{3CR2dQ!$7Q
z!^skuSatRQ0pBJbFeXThb=9qhyx0MGUvY%Xv?=-JvHT5z_^#<ZBk)PKT4TG+V<rL*
zS3X60uY{?a1rhkN;itg#I~s+(Z3vK99uHIw?<G_zw9*e>6(Pjl^#2}liHd?E#Qlye
z+dc?lGrWgS<ffKB(|x#C0Cm82%$J?SrTFv^miYKb)e;MgOHoDA_^im`Je&Z2GALD!
z@na<^$((sce-wy+3}2yghMg`(x<OxP!p)<##&&xG1VIC!fXp*E4<5Fe+leJN@3+$R
z=8Hu8*#Euy3m?XfKca}$1l^tq`ezi^hV`MfU^)*M!Ki)|>rxZQ>SmbOH0u}fB=P+e
z=MSOrt^_vjtSVoGx*Eyw=6=Di!Q$-uB{c^D*zIy0VP}rA{n#m+E3I6JQT45#1)7ZW
zuE7j7DKV?wmW|BzhL<buPds%<W-M~y-DGfa*~BA?x-O-n63((BG^fUOu59zW%kg*D
zxTe`n<{N=_jgM~{DDSm}@&&%3cE!iqD6K8*E%Cq$zr^Li`z-^xk$j*1%B39c#uk-q
zHCPGcHgK^P4mk+*fTS^CwU8pa>BW9p(i}I;mE5vJ&Op5t#y5Z`75<UOG&F*>6=o1z
ze`eWBUNNm>YYXTxV3O9tU&_kt>D3E|3TK*WC@WW6patYF(nKyW?<7c>Q2lGk(G^>`
zi59$xov|_$VZ}|1o6Pw}*7ktDJtEn&H@87A%OuPo%ML&&kV`h_dw~~ttc;VS1kkcs
znKnP!^;bCM@;{4B;6!xJMbVnnqLUn^D}T6LGu|aiN2?Q^sh*3ng*~rD{>#Mt=i?Eb
zH51dT)nC^4`uKolyfjQU<%-M91!lT*K#AcxE8J(dQHipi_-M{RzOdW9>{xA*e(N%p
z4#XzjfDofSOYix<+^$cbJCLKAU8xKtfMhoKz5x_h(TK{`;ZbUru~mj>mnOjg?@=l~
zO1w)K{<^qEyuei}L4EZG&8-CW8-xaH44)-2vkm%wyg?HW0wC03`Vr1LRbJa6R6|-?
zTvI^UOQph)H@<BLyHN_aDPrN90NT<Gf<0(a=QsrtZ_Mr`U`16?{*s^HX<3GSd@84Z
z?tc}K(vTz3jN;E1tq(-yKZ3|Eba%?MQDiU1cHr98{s>x;C5f2T03_I62tzY{uGD09
zvp_R9Ta7JLy6GNnhp50qs>k$`%I}`YqDK)#s2Rmv!H|_G4jQ@45Tj`#arqKN1L~cm
zk!@M+YK9k>Hv)7?R%h~O8zu$T#YT;t;!|@4PC@nty*u#dHc)KJvKF13csk#rEKa@5
zI(8qmbam}k6aiqL64Td3;@R?mo*ynNTTBjkCURju#a6PVGUrBdM!fIVI&`E4Yq1+2
zweF!hD5+y*D@(~A{R_m4V-64-Isk$WPu=ynWrY_ci*cL&FwN?7XY3HTJoax#8j)Nj
zGUts2sVeP!xXc;#kpq@<*ZKO*`Nj@+hluwd+P(4=;yo=8$Ayh0Vv5dYf8{uWL++kC
zQ2kJM$Fl5$06{d$dVatZUH0CcQj^s<GZf~047KhadZhT(zj)@`{|C;b)r<tX!-z{R
zSCFI?T|gLW<l!1BO<!1{0q}l1;qV($w?|1XPb|yaol2uB%FChWpf?qoqJZ^%$6Z9Q
zr?&+JUAZYHJvK!4W+yy0>Y)2ApFvtMSoS6_t}!2(+Z7YIWnexp$}g>dvyJV7+kq+1
zkgV|gihTfQ&T0V1cg3i2rve(L2FN{O8A_g`%u<T4sp{%-S}#@+#6Z%a_Ypdoqb{2<
z+!B8k8z=lUcc7z-jf^P12+A~})<uPd-R6AoFX)_U;+ucQKG(^&+q0^w_EG+4C?JCo
zc)C8Q2M7L8^#dkKs`@e1q4#pK1axO8_7O@&k{M;-1&@;d0(bC2TMeC6h-nP7i3f>c
z!&zcdCQX8SH7E@tg&lV<TicEK#QO|TeHnm%JDm!2z)(#IQ7{Ns%Txs+_ddR)loVwZ
zH2|gm1U?5IZ)o+9W(f)sq}5mBQ;c?ZpM~~3FC-FO*+Y`!V*kJufeH9h3hSEHw|R5|
zu>wDtBKVrWohR@ri2`&afm33b80LFU2c#?gmQHSN4+8%H`C^@3_^>b1pY`oOFm?HV
zfKf>XP?u0BW%W%_lxpg54aMnuw^Lbo9M`bAt6Img2MbVyc3L!}-d?m%%t)WqVG7i&
zwt!*Vcnbc_eh<w;ocdexHSYUzSB>oKc+LX%q`zy?4&J1In$sA)706^Buar#U$a-94
zJz#K2@M^CxXOhh%UgsY1d!u-dT`6LFFIPR+_&K^k9}VK4Ls!(?wYOs+tF?90M}Z+t
z>C4H;lpS=&K5qDj-O_>+WFX+k1%csn@?s7et0W62?Qq$}+s9}Y^VB$xSQ^48Z#&%=
z`<@w~cB}Gks0T(nH;<UIi0fCOpTAyXj0wI3?t-cbYfG5soU|9K2!i~pO6PXRvw%L(
za46KPlMiI1jbyor8~BJep))o<?k{Z|P5Sqrv5cZMBreMhk1SV>s!N;}Al`?JDHA34
zaJl;Ya*0U<FPh8xwCTewoUSQ3`j=n)mV*<*IX}C0I(JUly})7nOKB<IqFs-l{($b=
zO5fmM>_LpqB(2pyyTXb-KrHZ~V-C1ZhpfwGZGPTXiln_0UjhY#JG(a+u#C}}dSAbL
z`A=;q>LF-1$Y(ZQmwyozB?1mS!6O9v1p^w6-_+h-SQ|bQJ5SDwUDCk5C$%?O$0hdX
z>3T9W`cYK61=p&dhy|3gon7t0l#@tQemd$<*@~Z-fXB%dIb>u)l{qZkj;z!zD*HZK
z5uEI`j42Tuyn6qxJl(AgHr$3NdiBeycsblQZ`$78xrTVsV_Irh@{b6z?9CDr*2mf<
zF0D1nCLOR9ZKfS1Y~EiH*hQ%?$n+MTPrxo&ms=kfH^fmNV69(bz_NcRx*;xscYD}6
z9%)7!Q$x9rT}}~Gqr2tE;xCMsjO-{OiZTnfi@T(HD~G!PC%74ST<IlSpFG*T6VPOT
zc5-;1-O<tU?V~ghd+P1&RTSZElU?Xeak)=1c9)u*e5QUKq%b)-pIci$^86YrSt%iZ
z^WM%_rPF;1^To@9vJ@*b2@#x<68}`rd4HgishkGFBLG?jS=AW%(vlLSRtW)T1J`7Q
z@&`ttjO>B4RkNgx2A{*N2Kr~xAkh63ZH%am*QQ$=at#Cm0k{xk5K=0QRYcGxNTh;5
zbR?A4*48Fgl=PVY?DpAu95rrZCZ?pK2J)h|rw1MohXLL)TN?N{@<#jo$>Dab-502Q
z-_hoX1#Wu#8ZR)hz!O|k2b|4mpL259pj{9r5KPQx+Q^g?YW}a4xul`edoy4iaku+x
zi#pcppqFsj<#O*8@IX*(Zm^p|^Z2clMF7<*GB9|cUHI$j%Gz4<+FyhB@-f^6DDd4;
zL6v|VXf|3}^*le=B)EoWfsO5=Y9uH72tzVWoCS%aEatB>;|?_SOwmiy=!H+q%F72W
z%O)!AK@hBmPZz!^7j&Erd9dRLlK4-F5{?cH$@K9pse`ZxG$;?Hkeh8}Tv1Bzi3q=Y
zOU@m`P#0`<FLh?$QsrgidkMg^-=4O)Y0pbUP@_f?!C*T464Ecz(yFqu7|%rX_kSvl
z*ODG>92~f^5H2rM$`3&s9?d1WM-SDO@uhi@|226825SZWOC>&tk;C0$RElb&FdniV
z(&5@4$kmA6zW=m?+_sjMgV_1(#BH5mk;C5*I>+Q1h9LkvuKyd<0S1$yZ*6B+R9g$x
z>8PcBsMakF!*@G<J8rY!oCY*q#f1>(wO<{ubfWWMrnFn7F9*}x$UI2<%ySQA)=2NQ
zl3!tcFX4|DC|wP$fJj&%pZebtIlXPpz%pbYut(06_z`2faw#0-5o|G*NaUMZb>I-L
z6J5UV|Jegdm`Gk&;i$!DxdVbpH=1aApY_{$9nL}PJr~41*igc|{$G9k>s1J^UelVq
z0hNIJNlF0@9KtK_afk>7WGyT#6cw-Aj*dNWt%TZb|9-wYJM^xKa+hg$ir!Tc>{z{p
z#BBck`{TGxIH0o!!>)O|U=2?cA7bB<PKOx*wiuOe@vJ_nPzpGAx*0v@BO@bu>g-r5
zG4fJd*E1L>Ny7+yuHH1{w#%#gTbobgdbLBquVV~idFl^AiSc0s=Rb~h_~ze=)u)cQ
zbCE0DwhZj^m8PW+D7&o6c`8Pe&|5Lszw+Qpsf_KS!Z_j0Euk%d338PqGq>=>=*zcZ
zZLIGVPM2&S`CT5NtBmtqgK-!&pfk5Di%Ql3k@!BoKvicG5RER=)(~mCdKv4Yh<?$s
z_wpL5<jb7PST&?J@!mw`A2yb$d5_VAivjOSnG0`!!FS0XV}1cz((FQM+WkE1zb09X
zW#$vP7e%yZ;dT7@yc`Tf$a62WI5a4g0-B~CJ#6L^cek}2tGu<M2AN-UWQ6anjF<Qo
z1o_aQiEB^8*bD}npn**#OTjlpN(O|6BC8!#uK1&;0$cWyBe03UnHH$J5kG4IE#QQo
zW=wdeW$2flO1E+`piN<Cqg@wy36|7?-EMpE6fc7FHF6WWd4KDP=U{eC>03vjNdznS
z3c1*4U5Zk7*U(4eu<4cRI^0iW`TRZ>nry(nC<I_ZxC_!B08@Ptghu1B_Vf0(KqwDy
zy{m@ybd{|4(&^t$7{7uGh3(GUxe~7r5f`XYMdi3ir$-_}Kz-MOkIeZ8MR~s_9Y|g(
z_93}g#t3B5|LJkKM0=;;AM)p~?jUYoXi;Vn%@!O&QkuB7^uYV%`!~rCTVBT*S|ySn
zMm~L-@7kYi0ACx)rl7I<@mjA+2hU?KFOiCV{G~sP<KF-Y!Ob!d>S@F2wQu1G=`q*o
z5x-Wa*Uc01&48wX4EL(AZ}Jp`hClxgTVEYk)%HaTNQsDww3G-)qjZ;&N_QjDpmaBi
zNSA<ubhmVaf=GjOgMf6m^qb)I_r3Rhk3X(n<Q(?iXRW!$9COSujaYdb-(l9jV7~4z
z21|tJmE@@vnot;St9fc~W#4521?cB@jb1?O-U*`hWy+P5jCT89|B9abhy{nevhs?I
z(t;HaBi=LP#S=>ob`ricTAB&<O55T4((C?r8Tl_xJYkj)`xA_3NWuYLK>sM<L;+Ik
zXLA(sP)6n@70K+ku4x0&DI|ec_4_Mi_bgxK;nW?memnT?s0DS+Lf6tYLWz787`X)F
z59oZ9ispXYlkT@*E3T_v!+>Z37|#ma)(u9aXAvclDq-oY@eFZcBK)@k5N^>u&cKyy
zOCZ^-jbB#1uilpH8DX0F`OLAf<JKwLbu45(=UKPdv4l4_P}*deBKb}*Hxz8JEvFNf
z;jsL2oC2|+Y69lopy09&L3(nkXc;VFAg_w-h~kcXLjP&tE&chF{hf(P!{bRQM{A#8
za>Dm&&#Go%A2C;(#4fl)*O#@Yw!@}{I$ugf1?SwxZ7ZNemEno1-7)Cs3n(?%3~>yH
z#Tnke#j}BuhGG2;!zdMHgzm$0YUgtF7&3CG?iFyx#YQ+we7eFk2E|2>1F23u38?<!
zye&q9|2)c7dW=fyG6t!Qcb2E$`L!_M0Zrpj6Zy+ZFA|Pe@WG#|hmjuzf&lp0B5(vY
zP$`qMW-!7me*U~>ar=L7Lfe+hoKmT8RDC;qSrswjM+b95P#j`5JkTqt2I@bFlU{Q~
z!!gBQ$VG`h%5wELuPDidmB6qfD4t$sC{v@*!xi9umaBUvm8}H6@~;<gckB*9IJl8e
z&y*4M$<2{P4kkxIbUmzvbd~|{hO@Sg&Upp+oW>CcQ-m*pGUs!~JJ2x@y6TeCL7n~W
zpPTM7GBPl@K>ZbKj}cVU_4<ldcj?h7B(Akar$?9%7i<$*N^mbLeLl;tm{OqI)~(IH
z91KEelTq<{GD52NcY3+sEwYR=?(VKGey1&RSOh9Pn}>5NhCY;q_2`7TrKP2*sc#$)
zCM-rlul9DyOS*q#q;*^w3W#wYf0kuE=`dBj@-O4!?S|y(3QIC1T)p6*_8+BSbj7&R
zH4E;zu6J<=*^R6FvpT{%nn7QKktB?OIWsj{NPiMhtN{Ac%$kQ2!g4ARpcplX!ks7q
zN?UiS0qXEjSy-M<2#<~mJA4yl4nL3aiiCFql&mE`JzqdeO6#Ly^0{j5(ohBp=I^C8
zAUj09^K_nsD1o={gJbnsFK(bE8@T%RPY5OQHfaz4r#V5t$TR`b0hFHCC}bT&Gix%J
zR;@Wsp+@-u+6`(#ps??6TbyBa1Hu0i+>zh32JOK`T0bw_^Vl6wp-l8nLAMMjT418;
zST%fOjbb_r+Kyg*OGktzm|EM-FDm~U73$G{?|-P3t3>ho@ZiUPr<PiDmyx2X@_l7g
z(;rY`%7|*NAJw1E`>0YKLb{kR4J}LQ*EM@NRf_Enw#?Tk9_M!g>2-i|@-QCcy2lm9
zBLZB->JiVz`Q7VAI0Kak8CF3a0XGDC1^10kXu9aDd-g|+(N;Vb$XEU8wSu0)FY3q?
z74siU^WJj1_?)hIWGX7Cw0ALubp@Cj!p&iCwE~d43~o+-?<z?0m=3z|++S)RH~zTU
zOg=}!$ZMK<EfS04L{z~X>hBW|dD{x1Bm>`i%@XvqQbhA1l*zmUlnB#!{%xFS>%S$d
z=Srx{;>r0hd0*g;@m!=u@{9iM)ZjaG;v-x}nmZ^41u1veP0DYVx(?q1Whg6sN6!<T
zT6SQme*Cv(1aTRvpzm@)JzOzAB65MUQLw{&7HByzOF|TKs+HmCiduohfpf`?$n4&O
z_cB0s9ty<$O^EMPx@?g5@uLdeWpV$et!gXpBy6*d$av^2@yKMYm{<Mii_>?H?lE-@
z;QSte6Ndgmkyxo_$p$$@?%?(52Ei_wjnVg)VmQyy(NR-d<$0T<-5$Y2WHh8zS&Hyq
zIT>h5x^IK9FT15gj~hYc2kKMC&X3AfiR1n-V|vNfI73&ir`GO$t=N~Z#~3W3#^+cp
z1<>j+RWbOrH3{vi@8RCP6sY<+5ImhiTeldL^XdX=)$fNHQQneht{>~=pQ+48){{d~
z@T1DH7~@2SuJcAG(Cu!ic<{I)?5|0@Y>lR>>3e@AH#9RdvtlJz{=Kkpx!WQ|e~BSd
zGQw0uU?1)qh86h)1$I7d#10|tF!gA1FOH0_!_=FD094HW?R{8B^BFscmkH^-nY2f2
zF0<>Y<Qdu{tIrRjlvQX-?Lmk|mEiA<>Ge;Fh-2&uL+MPuMR^BNWkmHAN2FNhSu;U@
z4db3}n6e7Pp6=IDo-fwlOIi4o;oi1)p^J~Sh$XaENC<#jrASWFhxX44@BTb#l5CTM
z8_cXh>yHo~d-5M!^@|q5M0V?p)n`FZfo|`};+5dw;Oj%o;mRtN5925LUWjZs@=zJ3
z^9qv0IxpY0-Qx;)jI;{r4h$kc=>~6~kzp8!4POw#Al`p7kKWgBPL+T5dGC#9pCpg%
znM3AD{8pZP)@!uuBCyflPyZBj*)vJMc?KoNTEhLety|V0IfAeQi~TVUcHg~X!@m(;
z7TC}<8b}sb2C}(YWTJ;)Jm82wwV+!5&moW?sV49~$jw7%0r8Okde=>h$3J|}1EGR?
zsY^+4T(moZEpG9Oq)_QWTdA`jQ>rIpKiuM!evy@Ib=q%97O%JFkPpl*`$42HDKr@^
zPJ|1<Z>ans{j;0QSuxpN8R~Q%a(h-LACKm@;}fyAIU@G;eJuwlMmP+3{vHcwsp6q3
zV?P(!*kMb9EFTXv*Zu9y*W)0%apFL+PIB<4rMX{%vWYHLWL$A$Q@0o+L^YQBFx2mr
z|5-tQ|ChR6BgbI62`d0D2Kq5US?9mZ$3ci^pp~#uaw*8!{CKKoFJpTRVxsUuw!brJ
zgXi9#t74J_^)jLwin^ib%%&e~56WmfR3?kLG$Z~=erPZ>dayK|pnlv)9aI6gYQQ0Y
z-B$_l{<3WGy|j4i@QH$Q8z=!4cN(FFG>*l8T?r&($rlv7+UgLs9_N4N?fcGK0!Jcr
zXTU~_R1*!rS0MTk{nw=T@<X}>Yrl!?Jj{{jQm813(vU<!i1(xY6*u)~-iIr3X@S9<
z>FRkbL3>rKSwBA(#m2>+%X&6!t)Zb^>@Nylpd6C?-<tMJIG!4}Igy#=pjV|aG?MXq
zV!*B21&xQmM+S*KI3xl7FhA?&8O%RDtJ`!BJqh`!f(PP+P4tJPcl+Dbx=K&=*f-~D
zJvqK;zQ<ibphf!+DYV+bQomYTVQv2<H(o&wSE`+8*w+XYy_ejRvV;uKGTb){jR$B?
z3eJ0tk6vOEqdyIw+`3`frlUoO*U0|qzE;)GpAYjfEtVqh{<mDil7W5pT0KWwdiqRZ
z=bB)5rNyWDQ1sR`s>1W@eYdpGhYj9Y&5I;a|G%&Dhgbbw?&yyT{^nZ-u_gTcD>-Cv
zbp5^9?WY9)5p17O2JsQGi{|1htmDh0r=53aHU-*#(|?U1`%`rKRW|as9?MW;b0|p7
z43Cv15E|JD$+hl!zw_uwPL>55f~!wVC_`vp)C+mt2|wOMv(s@ubm{4X-)J5IUK2L;
zxt>DD_wT@^dRa*$BiUcQEbIH<XGVw!zx(Qf$mxRq9<p9+ek*|piWOTQ_$k9-g59ix
zP>xlN)};m;0A3!+zem#zH_0L`DvGkfP3eE`?S1ZzXv087MFoSIYdsb|8CwOCk6uF5
zr>~aoWFeUQNd^7A@$&yYoM_*ozfmFV-PL=Unl1wm-NL&JR7+7|8yqYOZ&59LfJ=&v
zCED<SKS#dth>D7Of<h_O<SpfYI$D@K<a-0Z)%?@*cc3E={<jG1-T;8~1^~&+!{fBr
z)fK%ReOz$<xLcIs85Nal8kOrE=`R=%7He;TvP**jZ>Ksd>;1hjAnm+%vk4tfKMVfy
zSd_*NgOB}NzE?2te3$JE04^&>SvLBTI_mc3Rjp-i{LK#k-r8%aMyYVdYwzcDUQHH1
zbFAGx2yvX&=~cE%T+<?`taLqganU{b6oVz%E_H(m4(_6~H-7knRTtm<ie!Gnr|$fx
zM)oO%JTwi9K7>BvOC4_f>z-HK(LYpsc-i<I(@X)m8AN?7frCnG7-^>3G`jv2<K~%0
zQgYApj}?E1C-(&_WE0ovzxF)To=`_n%dUDL&B{#DX!yq93)hRd2z=-}w&o;10>}c{
zgP+=?|Bj44MWC4n2hnu;&pX#0=J2jPr5deym^ouVh25}AO?VHn=u!(<&?VM-UZaa{
z?!%~c$^{wm0<jRCg2`%t`?grvS5)E30;ER!-L_k&Y97BbYbR5Zb@7OfJKz5MoXc}^
zb1k(lI=ru;Sp_1dL_}|lZf}g&lD#+&^5b@KlhWa$V*czQT=&yczS1YN#!p+AtNSY)
zwhVRJr9IGvrMZ%5ch$k(Nn$$7XxPFO7CgZ;*20f4g(_*tI>iL|$`XJ5!MHKNL8cw7
zn~}khb?CRe_zP*BlTQpXx4wgd10IIHAUxd(7%LhJZSN|Tj%pp+i16HHnpPXVK16T$
z+rFCB+O>Spe&=BdG*++mx%!nsfVV^e1<&=5S>K+c(m-Ln^|93^?Gd@7As~@u{DGna
zfioW$>&742JkXoOa_k*UIE7(B34QpfKNLO}(j8GaRl8nBTKg1_nEP07=^bes*-ha|
zLF)-DqVInHL4!0_B|~I2-9s$%7-|dAX|+IiI42tP;fD58Q7&b@xF<&NJ@u%QPpJ<p
zwW|TEx}`>g44C$yYvcH|J(V7B%Gmf;gILDtD8D=ZqvhKNsh|``3+IP$x@_C9Ez2`<
zh&!eVQtmGeOqg#~J50@-c4RJ^lR88XpvMjJgYfZYJi@(4xj%<MbJV)MOU=;sid$?d
zlxHr)a)1RIvn&h__x87{YE$O(=SYaA9KKYKaBA)a+b`dU`*Q1!3T3!?J==>8-btC+
zE>5$VgU{d&0E(ER3fJ*uGo8f3E3x>KtL}J?%%fmfptZ;d*EOKKdA^FZxnBKoK4$fV
z1s4h-{^9rx36Y9PB9Zd0)zS`Ni{HNhwd{&d{brinKf-0HO#am;H1HMhyf#64-qZS=
zz`Gnf_N}I`ts46MbtZ56T8UO>K=X>p5-T3v<G+0W@e^e+30Ew?Ie<IX-NmV?Pbje=
z0zgt(aI6pDKkdeIXsy~vG<yxbn0%AQwy}wJO8%yXfU9?^fAxKZQx(A7WeS9cGo~u}
z@IxsK2+n*mu^uTywUcoLnf-&_a{X8>jmVK|Wdb3{YtN1HH<wB*{D&3>XX2_1pb&d+
zN>w8wWBVi|%iI0T%H&Tk?hRK9bGwT~7kEK2`{R6uOU>;h9V|J=6A&hanNu;+x49IF
zjKsuqbSDaKT5(kx+nycUAAq5<9cWU4TYUL)PNSPE++4;*RFoq83iPW{##_B(*a_BX
zmA>DwLvg>gvNb~@Xuh0S^E3LP>rS=G8z#pY%bqYxWXJH*Jdm?&n+%9-R6>T3X!n{|
zsq2iPrv9VK#-IHn*aMeB(O-(Ic``n_$_EI=W%h7OG4)^aJEjz&acL$N`|VAB&etF&
z?){mX?8p9(>0v?2ZvPW|V60lG(@f&GH)q8q6VHRYSlZeL1<W$L6h@g5iA=haZuocs
z<d#oM(X2LOycSoMQM|TH3sM&h*Fsp~dF^Lwg~jbw_#Z6?8pr>X6Y!^DyoxW)gWM9N
zhVW6jr{a~Ta?`=$dCoJN_l4q_?#PaSiLv1QjHvCJwjsyOD?T5uLvikUg7X65<#XGE
zF{u!dhCh0!Ewjfcs~{8Nk2lMvSf97X?YtSheCh=+qAdp?NtxJi0+em_!3*-hL*Z!X
z%Us^L&cgIK-;Ilnfsx+7ze<)iigK~2_}T3ysfL`@n!^uoF4rEg3}>^m6htof+4jCr
z{5y3&At@6g4!yTav#AhvVVtu=h2_<{pC-$?X~*ln@jT2r@<`%sq)!;P=&Y>eT|W?_
z=0t-045Wf5416=o8!sN>+>t3OHcMlO`q(`e&s*Kb(M?ytT^OnToIbFgG=>aQwKUE{
zSUSUQ*8OdKBJ~h4hjrMj9^mL1tXX<I2c_E`(2Y^@M7r%p+h^HTLtI^Dp-KuZ#GCF=
zhhc@v+27+#eQbQXPM@@`qoc{)uN3Bfiy(1~ALAfl_e*U|P*Q}yw+oYO*BAH!^77jB
z%j{7<?h=cpkT~2)$8>ZJr+qS#h>I%+$3tt=LTJHTdg1M87V*#;SqG^Osa89NT6O0k
zk>~Wj_~K3*pE9bl{ezpAmrW1$M@z_FwXx&BqtAW_CKhGT1Cy>~W1MvVteH}2ByqBT
z_@xE!@tyWtDuTpB)b{I6+Y5<y5pTsIg*9+U{9f_7wLld-Jk(~!{-6q4mR9rnSOu4u
zzg-OgBmx^{*l^Bd>80?K#Owc%LeP^O7E&%NgbL_F8)?^D^CQ(R+LGe1Mj;&T!+&hf
zIbZ&Ek#+_vbi^TR%&{5UejXX(BH-D>i|iIovpYl9J6PJ?ypsuz)$s!qnz5DTA%}?{
zQuIB8JvRtLQnDDU7OWeUL6~1x1^aKr>jPOHUVl4#$g|d=`jvZS6Bn7JVAD{DL^`^2
zsovy#d}v<gK!27a!Ut7#b2_!chTJP-fW(7;7ySHfP7p%wW&iM%SX?q8UuUU-;M2sp
z>I5T1O-%<=Fl<r#B#z;B2h3j}gIZgK#F=C9+t%j{)W`X?j%v?VSYX?2kPH><Jpy`R
z+N9E4_RrHQwNEW7ZkJ~bZX89c5>2gN%iW^6;TA}kcaYt|b%ew;B04H>$=P}2z4->%
z$&i#mDD?FXoH({Qms|uV6Krhw^UZl++TCdF`;ZYc>-*jnZpQzlpZT5B%pWgUc4ZBI
zFRAFN05$6|pN6v$MR*~d1<PA=2K3=HpLYUSY<s4+{vms<takJ-3Xr2hpD(%Muv2vp
zD3-S}Fz=@aw7T8DA47l9*I;#$BazPRz;Wkzg7CF!3?vQE{+mNrrCn+XXyPQmVLW+h
z_q0XWWi|y*x4mJHxnBOpCp+WuZ)RYqz@;q(61}0oP+1V~ef)D9<8QB&0Z%j-=CoFk
z*K;UQ4D;m{`*g;$I;PQPW_yq4ONu=U(}T^$s2sH=kT7n;u=(b|*ZT$~0flmdiq^oh
z<op{wdHvrL)gNY7-G^7Kxxe@&0`wNdC6?DyW;5588YHmvMBUpvJd|HVUbz%eL+?jX
zQnIY85)F=2vMuqwd#+Gf8r9|8{bg=u#t_{y{rPapwJ<gHWxvDC%dHuvQNA_3o;!D0
zDm-6<fxqZ%YA6?d`Yc);2d>5?qaJ^n$abPV;7xFGXYQjXx*w=ZO9x9(#zx>X)pP-c
zT{kyb!MPT8pA_olQ@QRFw`c^n=AaSK*|$96RF-~O)bzF4vEbQRWBQvn7stDa7b(@%
zE-<W>5_{6~qUnUpt#XY6K*&AYFJEe1%L)Nk`{$M08UW~fNJ2sa{c#kVX)faGr&h1b
z%~^?p(T8y&3TB8Jk}trGtKqP2v*;#&#U1}^H_z~-_TME0p5VT`(8!?pmw-I?gXMko
z_33#)&EK&^LOCods$@>mC#)55y6WiUXRWtRKRU{+MgCb+kZdEJa2`R11h3X>kM}jo
z>nV!DW@lcgb$gv4NK$xIN*!#3N6#`pwBWbaA%#+1)W4(YR}u8j-W$H~$q5R(F7b_w
z>GdnI83nK4Na%Y6G&s>^8#rmGe2Rd0z3qWV<pNu6NVP*4X3F_IXq|79{f8(J_`Lsj
zy{P}KU4;6B1INOT;yXJ#R)*y1Ko28u-i|;{xubv%Y#WL`t{%z1ZsXf1+3ij+j9QCW
zf)N)Ncc8)sd;)U=0x%}TM%>P>U9(`@7_SI*KRX!Due=v#q3^r;FwxD&6-AQD?1|@}
ztrrtiUjI94$$2ip<IqHS27szcnJ`Qs4uApx6A&Qj1^+hkhH<A=4*z|;)}<8<n%eHa
z*w=f2axS<_nGZP^EVWQayUQL&-N<zKs`UGEo#@^@Sc#P<N0)>;<b)B!@DGqXy%&?f
zzGS-^-%ZU}X;oTz*G`qcx_8;n`0okp5vI}2nU5uE*SX*hV?-zdOac0P^Cu)xmk;`i
z5I^udZSfqj2=(iIdh<U*oE)<mc&FFpnBEv)#Qq(dU-;ne$WJ4?0gJ$0(K^q-jgrsJ
zY7=b!+yY)NrQo)Kmg0^G0CKo}17G;DTE?yN(A<P*n7lN+S_&PDlp27+FAWziO|VMn
zfoS{9WRtj7dzD|Xp83S)HEDWVTtK-1ys=<Vu<wU0nDw*I^PFujtmwb^iy)x{xsqz<
z17~OFO8u~vjluf%lbM9u>ZMS_oKe3dnt;SksMlGGn{@c%vK*L6J-HpM0T37%TVSl=
z1Ora~0GHA7287R7Xt}VJ0O<pia;|nA5uMq;)c_1=YYVW%d#Q##zJX-BDdG2Ap-pn*
zy2uB$rG_PeQOQJJMR<Dv!815=|DR-miohqx+`6$GDjM(`KQEv{o4|yQ_7!RGCvX_o
z+Q$jk^CFt|Jz2q;!VH*8HyY_**w}bho`^rWMgI1`Y#o8#3$_L+h(~Z1`C1i@yc2h(
z4k53q*La+d<Np*6{ZJrQaW^g$Lq#-WAR3;ErGJ0^PYdA2^nbxvXTMfcj|UJGC#d9A
zfOJEt5R9?Qx-=zn1BZ)CUKa0R2pp`p9Zea2)#5%Rf3tB?6wQRG1r2W!Tvq`60CMM|
zX8`no%YsZe&5(*x>nsvW_E5c8Kse<?ZTHX+@TXe9iV3PoSKCQ0(Usa$gwp``J^Qq>
zCq|p>c7c5KR4s0$GyMt$5Kb9M<H0}~r2or-^$g{r?ywBazLkayAoH2|@{RFwiD5=S
zkhQN_NFjn(B?JY72Tw8GVMzn{lzcDkJG2udK@k2iYBd)uPEY_AC{SxCLW)qJYcm=(
z0i{2;@d>y&JbiH0+7I2=tp@*SpD#q~>zn^#+Ia8{bXqBoUoE`iOuHeIK;Rsjt?I|$
zb{^r&A)mtlq#56$Dfnnv;&cZ{$;{s$Uq9?>Gbqk_Sot^V)^ctR))N`OMP{C=r0D&l
zR2hN%gp2$x>G!WBXLkk9?`vOHC?qH&viz!mXL77P;n<XhUN!lDLFkK=q)yZ}Osrxb
z9DIov*uQMWV@dt(j&M7)kj+?JFu<n`od{`>C*afu?8RL^YpHDTJW1+_7tTtn0I!C!
zL^A&h^1HjvC{PpJzIh#$a+#Uw_=gnTobZoJh8Ub`S8YC;EZvX%->-B@qk9QOU(oL)
zTErA(R9DxXO*-_>1%%@THqT%_>@FBB;3n>7ZaB5cy>)x2A6V%rm(_f(dwJQ}cl@7{
zx$pHtcbRIPE&y|01wsau;EYSB8SSDkkuk`Z$c<w|i+cMNuM)5ch~D9-moJ8*o^{Vl
zBcpfj?oi5u^aEVMFFW$gxreQ4gEcDe@>;_R^a<*K7LksA{%U=H-4{X6t{iQEIq*k&
zlty#eq6<35rQQbzmvcQYDX}~M$Zhi|Y-K`_DLe0`LvcH%!>~xpo{I#_kCj`zRoI83
zf@P|d%pki1ch`ISY3nT&zmr}Whbwkj>asV;l2dDYdAJ2YS;Xi$1PH)zrFoRwGNl08
zAi&i%X%wF|UnJI@gua@4z9}j=@KgV}Z>d<yE9&AWuEh7`SMy6V&LtgR@M7(YV@%k~
z$2{faS|y0{LW*saR+x_98g(j}>he{qG}U9{7w0=@+(w$>H*oJ=Bb!9h$EIE9`0D#3
zUBNQyW%J8kj#pzTY&eASyx8Kkw-eW#_|i<;qhFo>@wjau*=@BDf9OtQv^cX{zbI}z
z#aDi=x_BFf{UK5C3R2%prS?vQRJxdWjP~ygo6rUus}&fdD!XCoJg>K5W3kZna;zfu
zVEys>IP300QF)0V`)m8o1inaa4}DRTo6!&aSX*S-aOv(7ylW#C6lNY;{K)?3E&agT
z9o@YuEiBzVgv0CqO2ptJU&0a%{u-i;s+@iN-<Fn4l969sS24`x_H_9rWS;BM91bn!
zp^GYuDoW#{iBg{DKkQHXpB>ifa91wGIi4O&lv>K3j4FPbd$=UFHFoW!sw9(6(CO~5
z*~y|Ut00$Hi{-3_b#6t*_UVv&&w47oNyTVJgrv*o__)Ak{X<Vf^IL1WhceGy_HQl^
za>hE^e>zwX7p$LC**VsYkJNF0upMN(j5nf!c6eUXp?vVcfZL`ryUhMz0Ie_AQqIZt
z!fUi?WyofR=X{>45jRJwjro|}eCRY3Y-Lr`KmAF8Q|dob=<7<>f;u7nz}Q%|acDid
zbbO<=i4#|p)C-Ijy2GK~RL?Cm&m{A?#YE3~jZv0474f5KuR{JWp6lLcRGjSA`EkF~
z4x$cHG%cjH=;312dea2fTwhOL@HiL$Q4mSKaEFN$%2iK0!kfl;^H-<1bH<aObLG1(
zW;t9eXlqyhN#kv%Z&wpq*I?2*^SIdBS~<l+ox#XW`Hg=zGBRRj*s&#g>*kGrAIzZA
z-4d39_p$tnF+%r9Na8=fXFR*7$n}?gS^rm~(u*3L%?qOMQ~m<;)&@9n1A3_6`3Xv=
zD|~GHOP-tE__wce3$ct0wZa*(TJZc*ovyRlW3;4S_-kkTn_qF)USe-7)XO@IO2FmU
zd`~A!5w`hct+VYb?Qsy9!HC10r2AivVNDx=ur2$guIqZ437ZDWP47X#I-4-qa#*|t
zv_Qi;uK5^5NV$bIH79k4Q%_n%&br#wstRL-=+|`ld|%qJ@jSlMgl!T!p>HO3zBgR0
z!Vq+Be9Zwx=}kmY#Yd53I;97fOI2$PqkajI?k!xCA`Vq#8=DKW(BQ|oxF<qG^rG@B
zYwan$OIur3xNOKOx*$W3(}8<NSMsV>J%pzw@rjn$^g7$~nLlBSZs&1rQ++q~ukpEM
zKM)iue3CCzDFUuiYxtvyXGkwm<gWj#8I$IT#YI_*M#nWi=HcOqj3p!@Js<Ww8+H@W
zy*rFFXmozMly96ky)`P{Ra5<lYpYed?hi}Tqjh817c@!7_qCF#e+81N;QxExx1K!c
zX|JTvm7P&*O(Zd#Vj;kMb}>C@!bX6J2Jn4Z2}6;q0AuymxrXD%J=t?HFo2bItyi`k
zMk#A8UDQ-r2=V$Lc*c3u@0r=MrWmJ*<$-_ghH{EqWvs(Rvgu*PW0kGeqL8jXDQIf6
z1|4RSj7+vDJYbIv3{ifl&S<UkF6b<BlkQt>5*gpGZaArF$c-~BAR00+tb-Ad8q=y5
zeb1Olc<i#^<2qy_3Wgb!t4l&O*GgEXO0<|-9(8)2OdTF<Og?KNAR%GwG(OqK30dlk
zDy~;Ppw{SB@j`1UJ<-gVbet%XzW6BLf9nTh+?_Bv{&v4yxVSq7ayNrQGGRr3QD}`G
z#D;!`A~m8q{3NqH?HW8p_0^ysVn=vO8yhN{dBVZq^pJm&JYH;g0|__jj-JYoL9nbA
zy)Louw1N{-<gaFDSNbAGsOFF7xvVeq)V<jKth2T3quo@9IkA;P_JZ7FJMF^zdQ}~d
zmwTqe1a&k%+QOzC^RK1W<<k32GI6qL*28zy=uKUqJo##4!cRAz?};VnYx~mpvup>K
z^qK^I+tcrzaV<-A=C|}D%f&$^n;n&8K9r}frk2#gd6!c)_k|*JeV$f=iTlOmK(1vc
zs8rX-`#lbn?mC_bl0djM2uW1?pztF<#D0@lIaTD`$!W*;+5t22oQhvHYh70SfH_3%
zlFV)fsb{MT<+V0)_;vF=1t$y)Qnas{lExVD3zi2@=s;nZLvs{Q+Tif2EF_fK#YF9&
zvl^ep$Ea=LpE5I9M5Lq%@0<1S7A4QTSdah7vvA-<+Su%Fzw>;zpg`Kj2JUZn(Q??6
zSt|SY4bR5clf{xUO#J7m*Dmj1ikx*PL{yS&_eiNdt4cbW|FEMaVY&nkAH4K|fk0;2
zhCl8n_#(c>iTut+aK-TG;9|q3&h-jNFHeE)v$-nU(vqebxPp~A<+UbJT6Q>sh7PA+
zO%1Z|&vpc66Yet!&XZOiUO2#K0H08Xj+=9nvF1`aLyi57x|rTHGMcU=YDy#71oh|X
z9Bv2U*10baeiK6Q57Mkq3n4DpTI*-_nEp9-Y%tqFU0_ek<!RMfx)~xUC#QUAS*Rhu
z6X;=o4Oe&X=f{7!gcI^3`njfLqn;2J%iin2Pd%j8e|GT$C*<&3EAt@#&(QX=hP7^D
z!EcW`k98R*tzXY-Xij?`4wdxuBnm#~bPLw3=#0ORa~ztOAJLFTdE?TO?{8jqKJE8V
zmy6t!FR#Y5W@6M>*8R)DSC3<q<G#Y)X>OO*(Z$@nMZJB-3?qw+Ev9+QQYx%K1v=Z)
zxAUzpFtFhqP|6lMA*kSqtyG!cK~Ammi>o0vpVb&V74D!%)BN~mWp1tNmgh<D7lE@c
zoukWGo9}uAQc}xmZMG_WMKDaPplKKv4lq;*g7FXpiGsm8VPyS)r(Qua;;H=F+8SG2
zm-0_yH5ES6#Lu7ePUcggx(DrdJ-f<3@hTlX8UzV2Wn^WC9m&8Re~XU{!#5|1(TbT;
z68iO4H#ZND-0Ua3XRJo%w!r|LE54h~1na9=V_s0`Q`K+hXVdbsIy3@t1f=I|AWRe&
zzgX}0)Hc{*chrVa$BAw`{u|AU39d2_wp3ReevoNqEC|7>?<yDvb<^PB_p)}fTm90d
z8AI?)38e9(0h>(nifif!NZ5u%@x>-T!et$lo!DOsR4_X~r8(SrcIasMg$It3I;E8m
zZ}{VIKtFzCW1~M)ws1&k9ClX3eM(QhTAK7yN4+>l`(AgQzJY2cNA7Z(bnJM3>HO6-
zU39~g@QqHUbb318lhY%4*n18xkFzu`89-^8^OWI&*wa6Ap>u(xzXSk44>HAdg#-t0
z^YvI0quwCpmd7zv-&kAwE+@3V>%^Br6A~1pmzNZK-REb2|6L4UNbXTFi13UI-F}yI
z5o;1-*<*;m`Sw#u?-2o($EGVq*KdLvg?ZK9u`vygvkt#<o`s33TZNzY8t5O2O$j^J
zEOfh99(+BovL)-wf3faf0ZVPI?G}7jqGDVn$6xc%is^1UeZ98p@t*1awsw;}?s7X;
z>9fbZYYq@MU>VUc*v7mx-*`>{vF4+oAdln5YM72Z4`PSFAjVtz#xm*;w?tEW-!r>-
zoxHUm9Jm%TxAoFCi?lUe_@*$^O*5bI_3lOU!q#hgqL1ibmZ>x75iB&gZ@RA3Jp?GQ
zWMZdJF@*jsY4X|=b8}dq_8lj=Bl(&T7S^&^F`Is_N$#{XNo(aENAALv!$GLK+f@*)
zs*~d1N|*S;--wc4hY5uUZ}P0d^FD)WKZM<?j=MYJO^k)Lr8MdFJkDY<dSN>5ms=fn
zhFw@TzAh8f5bfQt=0R)(LnJ^#?1;L1axJlsDwW~9*J|h=82B(Iq#kV=SI`zgc`3a?
zNui+&NEsK7o(ECAz29s_t|l}L8LAJhhk=)FZx%T^*wcqmN>;#>6d*ow5K<px-Av*%
za|~&jgMU%mScCt9vDR4rOB6f2G4YCP>_IrXTuqOP)SmM@v3wd}rv^hdZ2uehOg{yx
zNq)K9dFAw6T3WlogFmuOX=>3-FN+sPTnPpq<g@Wq=g+?ZZ6Ssaxg<6TB~h^1y(C`p
zF2x*P6j2ls-FSO%iHN6YnI$-!LUl~Sw_ynPwr|W)7F~eIrot2L6L}x0T5SXU*@KJ6
zT<*tv<CYElY;0+~3gCm6tT$f%>YLt|KYZRoLX-E2u~y4rCi&PHOfNx4w$l`qUt3>K
zC*cz{-@@>LwFi?hNYY?9#W=zwqG>t*B<-n(Cp3?fG=R}U4J|QN|F>m^=9_Yb9eR)c
zoSo0!08e|Eld<AjMy4zov7}8J>gTYuzRq~g@I>;-lPA>@gXd<-lNv`>O4t@NR%(-D
z<KvpAEv&cE<rt!ZY(rc6`}?2Z>Q&EVz+*i)IfF~p+*pJE-hIofV!1(^RC4*`2FN)H
zFsYjIIx{~CJn$B}yrELrwoq=N{Umk5(E5G6A8BU2r@`Xz*NX&RM;!tXt$7~J{}B)W
zMpQ?9)UUgGPCFH==`1?=vS%|~P{X0@o`%aq+ta5V^vr}6JVT^^WETGTXAF%CVrNFY
z-*4a%H(zh1*Wxnn;;i{HB*WO=T(FcPf6o;Tqbi!G^e@ILGX2e!S1ke<^9yY^V&L1b
z1Yu!b#*;0wRO#63hYQ_jHV~hN>uf)4>@s~0Rq;3g&|cM11Tf<!Yfv!Yt{hF(v3uyM
zYT7HhGV6k4tbq2C2#O5{VYN<FMsjF!2nx!Mm%f6=vlJQyYh_2riWC}rW}N|TC62iD
zr6nt#c=%ej(}=qUYcPc-w#&%ayIH??GC&H4ix?}jAH4sb$`8!vE{S=<5_1Qc=Df`w
zPm6Uxib+3i1m!Zd(g(^m{UW(JY3j%gJRM`irbm1|iZ|@(BLRK$#&^%-y(c0g1KBaJ
z<Axniyh9B?Uf^;BXFv)Row~bas;dFf-xo}_Pi;Ltt*v%9Gh2Zc&T5$7?Os3=z!M9b
zAkO=)=;L^q{`Ync4l7s%H0{v2-Si0OQ85Gl^sXVnyKZpqhD`k67jcW}Jyve+s=fVM
zkGg~4Dy@+24J)pQwlt0yu|$@Txq+TQrY&fDY@?xqQ5IB^&|xE!X$$ixN=iza+eB#Q
z#6?RzR|Wq3dP8=$Bo5aA3UhMw7$)uZ#a)5KLRh+v54Gu9@!?Q{u98=+&U^_Hlq@VP
z5j~6GB>+XU4d@N6!)*G!dj!QUd7MC70tiuryGN&Azj__~f;s5p1TJg)E@g$V3ysML
z2w>jnm=+Q4Y~|$Z%znxB;r!<dyGH}7bgz{uvA3Q0?84w}_BBcyXTEgI!!^K_U?V4U
zsiRZD2=ir%{WrG+<Q$1fNi*<Od5C?;_YMzple4-KFnoT&=oksBDy?cawIn?~J@UjJ
zS7=LYN$pUT=Yt6(V}5g7`+~R=uPZ1e$JB26HC1279hCc&kg(vab0|zet4BbMgAo<f
zw!FMNi=YDYUGb2;1!$o?d-L!?$&JCyrQQmG0L%hSKSCq3HxB1NVp&IPbPq!hFiaE=
zI+;~m)ESw{EM_$Tr3e{f>@JGKUy>%hUw<mAb^EflnT~NiR%gvo)3Q*P#nWT2qGLLg
zetBCJt2_S0s$9+{yoNZ=c~?yuO>M3<;mDVXs6?=a#Pvqvg;6nV8pyM`Y)_GWk-HQf
z7e(Zr?X%$o`_@#S#NZ9jki?P<WutIH0-k4*!_!~S4+)Xc6rY%QE3sfvUtFVHe7m3d
zfpG(5TI_0nPLUv!Q^6Q5j&<B!kPew!b2w4Rr|;O|W9}&4pyr*NhQ=TZj`4Meb@voI
zo~}nhT}G*y2T1n0Dc};@__{(dAdnPi`diPujV%8@PQ1^eGqlDO#yzJ7AN$NC+qPnh
zBDK=Zd1swhr?8+6)m07~WS5crnuOPjMlBp_w1=Hm9!DDvPE4(>SzHtEKg;>j18SzF
z$A)osh^9<Qe@5%wYYyt{Xk5Y#)!|i9I@Ly;oSgl%Tp@}*&5wcx&7us|Cj}$hK*srV
z>26$C#;e*vGsc3?HO8>EU!mBoGcqzZ$DroYC_H-fNXU9=-^E=Zk!ilkTFcW@kUt}C
ze~b;(5y_`CD7(LYku=a}KIsi4Zn=qxDVH9zf#H;psR*lbUkkITQj3X>Z6M*<iTSm{
z;oMiX9rJzZs~h6vx2<X>!BUtA02~Ae`;ZEuGxjIj>ED`&vD$<BZlSoruahgwR?+pY
zR*py$BLB5}MAIyKE5J4}f)aksPg;@WuwzX`B6s!!*g7-;Ofk<+rsQbSIHneN{ZlG6
ziZo_2xuD_A*w4oGy>ixdHu^aj*zs6PE)bCGxL_ir466s3g`X_i1bMbbb{#2kM)NhZ
z-umPa0$pRec>o#0m-+cOdsCx|WTlDxWhvY-L!=`O8}X^ILbO;%GjvbS1BTYTt6@z!
zP0EIJCmjEZZFA@{I9V5OGQTxZulu|xvtD0di^SfZrqAIZ6OhL6^K#7|fs2uP{tt;+
zwTWZh=XRz;(IR|{OtN`L)JWLvpx>4LiqAjrI8Re0-C=Dcxr6R8rtS7@^Q`M3M432_
zR|<*644UWEHKP_PI8-oJ7+O~HBq^~|Xu$gfK1J$k37&Wr9Nz<_X`RoEthgWeNDPN@
zL|>?@L{LVyrSZpx^qQpMOy*!pam1zD#UBSW>%*&|G2LLfXC<L70lMW$cpY$b<+g9O
zL>(OUIT}2BMja_{TZ$8gqcrxNF<C42c))_Agdx{8yjY*Ar`h^*n?VF+n+1oW&d>}E
z|2?i3>g7)QeXiwwp)2r))@!3BnNHQs$;ru=xcV@K9NjKe*S&e(1_cRG_;$1=T6i&%
zS;jXQROIfQuvoT?CUV5M?#dm~UnlX9(V3}YyldBui`aoD{)09F0Ratb5OGTsLs>1S
zQdSzj(~;W<{0?EaowkR~^p9Zrl%tc{MN@SYmS6w!2TEFOe>pq(Bo{u?n2z~<7kkm7
zaEYf8l&^?+;=5-p2zBLl>^^9xj%{0Ub?%<p$V&$gpGnMm<n1ZeRvV)^{WMYe@v_Ck
zZLLL6%(&@Vvp`wZ{4Ym)<JRT^lV?<Rut;7-bV%z@e=oDyY^a$CpkDQ~mpQR(GA3Jg
zNc!}BAXApQe&BW8>L{Z)R{w9AmEQI>%+U5|Mop=N(bGCXR-S~uIxg3R#rCM&j<Qln
z{de$-?2Heh-cWG@S=jroA6Q4uHpc-Sw#}73-z<*m4HpN%{!G7;@h(}J((>p6YPR+B
zC;c0it2*ED`Gq8blHiCPkkKTz9gbV8{(QTmc>2!|PHg|Tl#&o~EI4?Hv9ve5Rj~R=
z8+K$;fBR8Na>Ox4wq=Q0(5Xkm;-HkwO(3AzAgp#!kB;e>_}a8-t6d4%*~jM@a6&S}
z427}TNA@!LuCMP)lMnUw-j~GgC~iiStm5C-Suyi%=XX<@5vETdh)w{fU9IxxF?Uxs
zH(`($w*Q|Su$wv3lbJ{zdWL5lYQXc*)6<iSc!5iX_vVA-JTDE=QmqozfQ3Yc4dUNT
zr2ei1SlTl0=PIAYFrk1cDi<+2@@JV$4XTWks3tq{tlNfjaHtKh20x`}_NEKae?)-k
z*OW^ta=1l{n#BzWtjxG)Ly&tbla*Dilb94|F1l-|0w^gfKw&Xe{-|5ON{LSkfYAv+
z+L>-oBSRp?1$qt{9wnqvNg^Npi^_{os_$(Cq%f#1i<VF;%!C|cbOlwEfWd~@utG}Q
zNhxYn4$QpTk$I^Oy|q#iPx~LOY?!qb^MPEf%jgxyy?n*8fF}6~*5+ZX+*Q)!S}ra4
z<{$?2K>sXuUZDo3+vF5wEgu!s7HcKt=r)4@OSBgKA}2m@vZh&3wQH!Xb^Vjo_(tWk
zWzt%s4GLT7o7cI+tLe?r$0sMI+YjyU&R6U>(lH6s!&ERuWpx<e#faSe=s7^`X?W%d
zDi(B8GYQrx&GCvE#=pK*`OVOT7{n^>DIm63j~VtX-69bUH*;~R-nlW$6*44}YD!?A
zOcS6lD;r9VzGt08)vnUP?-qb)cE`vN*{gbc<YnlKN69MWu<#gEWl<3hQQ_bdzdM?(
zDok$~Mf)DwajUC#l)%T)sDw5kr5g(jQA|;rPnM}d=K7ZRy5cyyj7pd|g~X@o-Fd0_
zRLq-MQo6ab>lGHpX2SQJkF3Dq$*4q~S2ej=zpBUgyNP^h9H(VYWVo69DB)m5@Ars9
zTc-FZ4_IJ|#<_|;cm1E*l~gY`rgl;W3na>Na!;KxQ;JE>2$3hrWM_&R8Y@k!*cc6*
z@RgNs!^B$B(kLwPPZA5xQ>*)Cfp*$V_+3d(g7~l!AB3ah7A&(U+x-R$Cn?;**%g<o
zMWhYKMQ_DaBLR|-v}P|e2_h-uC4q|@#fI=z`@W{iF30PMk20Cr=VJ0WfYk8aA}IYg
zPM$@uadT(KyURoy%7baTSS(I>-}MzN0IlH?l##l!__?|J*~pA|Q8Jl<^!BCwBG48a
z;eXryRZdjMhcXx%s3<JU@(HvQe|qB4av}s&ai~Z``(Mh#0QR}#{nf$zYCYhx$G)gT
z^43ud?ayYNL7}0Adp<Po<}9D2P}6uMVK5s#_cBesv%@V`8kHS+U%xixUxz2z98_R=
zfrO~N9z!YV(U3^$%11hB_ugBGoB%T?6h-)_%Aix74&7{WA_}@s65~?#ZL0fD3Cv6+
zsivaK*3c~9rXZoQyGEMP08{-woF>U^Iq^{q8@=&MjhKPakslCl_%(ggle?FdLId$O
z01xgQyJ&1?2*q~0+Hk&?#g!_MysTdY!$*!qGoxUoqHiPbw<4yTYzV3QYHI7RYWvkk
zIe(Q0xiGdP7M725Lguu;IE<t;bcA!Th6X&uLt9XUZ;M)=9q*6b+fzEc8!Uz*Txm5U
zVryHnu?`*_{Tb4ck&z<8ZAYsc0dri@hGmj>ps#$WP*>I%xFi@~XQ<Auojg`*s>t#w
z4K<I3x7hu}p=8tzrY#EJhCtagL!*<4p)#y}@_B(Wt+#XDhHYs@CAK26Tq@$X6<6uD
z*v#a_#Pi2+B8vt#7xyhFw&xoV?D8ebWTFTY+`Ctzkx$%$D}MP|X=-RJA)yFo@r7!(
ziQ|$M!}7}PuX;7k@#foYbYxUiRJ&(Py_8PxkR&4j$Z*)09B_{jVfmq<qLyYOh0LL%
zp*bg5f<%S@6LNy){TGg+v=tF04DUpeB`6`@xwyD!F~y9pGZkDraH(5CA;NsuUUKJZ
zP+jjJSPb-*k@Jy7Nxf@Ywu3zT%{PGbwK|0yFQ1mF;P^r2ItNhQLBoa_K7DJOTX$Vy
zl4C^fs`HJdwQqW10#BQZa$bE6*lBk>!?`v<*xCNKC1A^opts20Yhy9G7Ax9tlZPz4
zE~yi%+-JUArI-i3>$PfIMg;En`FleAUnUHBFzC3;MlF6!Nav<R2<S5X?ljsxSRd~w
z^)Vb}Y1uIU@%En<pwQs9dbD<d+@uA^Dqau$>xUi1^O~)XI&J8;@|RKr>>Wu%ynP}v
zQ=GAAW8`emGW*p9{JvXjeVT*x5Xu<mblf4DuTf|-`?EEXaw_Aa4633v#oM*NXyw{5
z-trZh??E~=6>XC5RFyMAYd2-AD2v!u7G!puRANc%;Pi3<h8@=8bGoCSh)RO7QQIR$
zWMn7Xp^vrxTaPGZe?^Z7WymEgSCuM+Jf?r;6*Ei-kXJvbCJM)fR99CQE+<1w2fItd
z*VM3%d6rL@)ud)3bWub^M15u8Po%$>H1+|rLy6AU8_BEvg^JkKi@gSYEYzc_UG`V%
z*cq+}Jg`QM?OHhAq;{fNSXf}vsdch6#umNv%<5Yc8xPMhOnjLfzs^s3EBcp@-@^W|
zZbOCQ6{iQIjC{&C5WR@`kB1WYYc76&P3-RMOnUZ<lp#px<JwushI=6G+q-;3*$u~=
z()q`xM)+dPdR$2`6)%~(GvTP}bh%+!BLMieh<`kE)(Z%RiNt^UWd+Z0=FsWOxFVwk
z&m8UUDf#>Mc)m&Y(tvi0*d(p09IdLB9L%eln?2ZaR3CKq^vFs}_s+o`92^|H+aPzU
zOmLdLK6#V@z2=LH%hq7d0L2~apGbVzf~PaL>@)e@kL?)nQd^ko_WA%TL8T_}o40r_
z7rNiOcklfCmbYQ_-tBY*E6Ydp53Ziv*E6B=ylOR!Qt5UB7PInB0~Y?H-;vEXWiC_T
z&kn!13Y`1^)IO&+MT3EhC8Gn<+fHsaHhr%v9!5&4M-#rY<{};=S^DGuR74&pvTdNZ
zSAGXaE+$}ZJ-4vF&!{6n(Xg07x5@^vHV}r^pHpZUI?NoyC~3nb3`*3;zRg%=@zMlf
z2a7=fV(5sVe0(|Lz&T-<RM^q2a~nOUV%kG<m=H)Pv+}i{$xoUk(dBVYj*ltA!Hftp
z`UuL&>540SgDjjx0%2i*<ci6LD4Rm_uIa_Q7g|iJFgx(SxBI}@?>h^;TE0;SJQ_{n
zHq0huB*cbk*}LPggJ(&=;?CzA`8woEu-mKKHV@I>2q*U7W1^ZmOn7@Eu4CDq3Xuc1
z*Ry+ec19(Oh`^3m!qOOE5fNEES@`Moh2Xc~i|L=S1@TMrPF#7GXJz~O4MPcZXLAjb
z7r!l+w#P13eL<5&_zo!t3<k`607A8U_6j=hDr_sIAlxj-?&VE70P4tz515nBGB8*4
zA=Uj-@a-rWUF(kL1(S`**Q%_>bfWTLonvgA*}q&07CJD26-RVZDw~C;zRT#YXwe@W
z1iEV6@yW?`JDz-%)wQ)e2XmfyJkjtuuFU??$;o^gsqFxh2ug5dgqR8p4D^vcrAQ7Q
z1P1lpi=}lKQoD?>ag$MrgP%XXXAzEj!)B#$iSKjU;7;TwAjr?pD5(JC%myFS3Fd9)
zcaH#PUiX2xTpIrTq<>sgKCK@yPw5fC-U+Det{u5~7#KXo&jRP>g98j7tL46QH-SxH
zS~@>*5JB80By3yUy`neznX%E++n4^p|IBUUBqAZ0p?9@cx*op9B${}b)}G{&w|tB$
zcFhb0o*jyUAwqK)W3DqCS;Q^!>;xUfAdnoRm%<0$EVAwUC1U_iy<>aV>Yg!9C+m~5
zHzm_ZQG^2wS7H>tWoL;dCmHdf%LuZ5n$KWiGH5oqVf7ysQA6ZZ%&<z)t|F8k26_D0
z8pC=OoOCV#ypve<k$;iGv|{1gfFzeU<Azu5tF*>0mk|ul+dW#cd`W|YEh-;(E7egx
z7^`PjCQ~u)WfDIc8rt$YJPbRrAl%DGV^=eYV@lkze|Ttq>ozT<W=R^Fg~O}6M|R#)
zh|iN{3_qCBNJZeFBfANZ4Mz4}n;b_>TKJh4BtGqO;|bc;)|I;t72bPMzRTFy8lx6N
z1}xxCyw4ukOwqApa~%~BvR`^8FpKgy>)TyMtkOPk!?nwu-Q8wQ{)Xz>0?+*nN41zT
z>!W2d%g{LEx)?er$Y-X|eS~NW=BF6ZeK<VD*uxF`w1L)J1+CZn1Mt>~oa2g(x|3w{
zVYDyZ#@;(8&>*IRDqz;8NQ#@4UFAXedtkKSSdl@d6LRNr*~<^5coV{i74-0LsL_5g
zS?rRei7%DwG)U9#*H)}Wou16f!I9yf+_{X<F8<P5sF>l|jRj>1O>9J~#P+;K8r&ra
zL9az4Jz+1u`Q|0>bD}Ds5_m0Yp_wrfa|jqRS^*@40dIcSgSBseWe^BT2&2VgKk!Uq
z2>WSeMCH=4q&uBdKa7l2*&6$vJb6=N&W4AUmO>LH9@A?Ay9W!+gW_hckSaz-6Yi4D
z%LvL|1IMUeT-~etSeTf4Uw(PW(L{$1&I+ZE3e$5lQvG1ku2yok_}hAjfG|MT`CV;S
z1B~T6pBi(-;fnTdgtZy8_e5nHmT9O*GulRC3E^8BE*zy<bs4a%C~8m^?y!glHmA7r
zemw*7*U%`JJ|Q$@g@~Sc2>eApc4MiC9%bKwjSf$e1ZE>0sf(3qG2ypZ&sY(>M7es7
z2nfNLWrl->9v(WX&a9lAnF8JOHXrD|s4-j`xJctPgQZEysDU~ZIL0b8Z_I92W|M=*
z47NXR)B;orpHc|3^J6wqVqbSDl5rt!r<0L3RlSq?3w&tc^xX>>G-HqJg4n<49Bl}B
z0#E!9lTta%um+pfrxY(L#&JdOi1r$=D3i*p4j0Bynu7OPNMe`K-1Z-UCyxWZKI;7W
zGpyjALiUGaE}~!-%#w639k?}_%>K<onB5&RhvY2;=~lrM@U$+jt}wK^XEBnpZEkyu
z4}t>SnnmI8JuE*s*_QwKkVAC>kYAAUn>d=E2i^zE-@hY^?!f4=LHs(M!mHtBI)$n@
zq7Bo9?lrky6rUE4p!$K1tfDlqw0nf)jiCI51@kUC@=~)H86ASxJI>LQc(F|%@mG)8
z`$tHu#@9Y2b|ju_mX&76GY09H>@j6wHkJ{QYtef%AB%$~N*R5$cqQUFHF)3@X+{18
z5)y)xs=B|q3Q|)asOWLT<*Vct3|O2TtQV=RHTvH8FaoF1hr^t($=5K95MmR|`t7?9
z9Hr7VOcaJ>C(UjU&JezHp7np2;I<=akDstus8uEM>3Uq(P+wn)dj&WfItzqL$Z5pP
zRpNG?H5adiknk43#O)Sibv%DfIsU^5TSJ~XfGoxY3x06wUmj=m1Q@<>c7bOR&Hd!u
zW_a8%z#hsoHjV2Jej>^3CvkI?@EqZwYgy#NT1JB^-`;Y+;Bjwg^s4_Y43LB^?ydyB
zuBd5fXej);Y=;-ah=ql9u)qImygc|E9R@DTP34LJ-8feX^FqPzDm;|%DI$WNY5uD0
z1<53WvPhMUa+M8kDk3}L3`Q9esL3v4Gui>tz6i)>?C|GlXq?kYP*xO6Mz#1HBlDAb
zT?y%4vV3aCckLzOqv9-L$i2}pt3}jNY&c7(Ckm;P?%F()8dF5s5oUWG?k208*9-5H
z-}G{!z0=c$ygHvGR+;4zdE#ZXmlVpKM6YxFw==wWK(G{LW@)KvT%jl}E#1}A!|5{o
z^~vzW1dMhs;9Uhl<-h<A>b0EUrWr1~rLY3|yLay@6<(9$*Kt{<ve%ItT<*`zj=1;V
z`yr508+7C-xw_UkShMU5s_H_C_!dm1>gz6(0Bl-pDCb*lF`5!@9*%kQ=4JVK0YJ)F
zb6e}rpWtkSfUt$0Pj-5HQa3(!DBryF83)-ZYkz{Jvhb6nx9px@?oOuoTICy|t1`Ov
zs^+7frWDHjlXf5=yc`Rzsgq*_!AuYqLo8O=zXv<dp6r=~9YJ~BC#=h;S5xjL)c2pt
zLxEa)X?YpS76kNJHXaS0kfab${dX58&*5z&yNpnT!ziz7BMu5(l|nI529U36cW;V(
z=<v~z!N(0a`Tt%L{pJ%vG6iu9cf8CVu982Lir~isz*s2=SE=C~2@~hPpJRjPvm$(@
zHBiFweL(<*Hdb7A__(1hcm0u@9x-%`-58Jt{}{H06rMtDfGj|~I=&Z6wKI74qPnI=
zJ$jIY4~C3KwxwrVN>g7!%+kg9&!52)t<69o+VI^AVk|!gv@F30O86l=9_jnvc5694
z+wsB_u#gZedo;<20WKV5@4iyLp4?4vMux~mI=+4Y|E7Hb2fPF&O&WKIEf=xbce?+<
zOeMV+mmrfF%hZGzoDc~x#VGE)d*PG-*-mvLiisoCZUEn{U_q@jUbk28(Id<`hngqA
zmrntYet5E6)U!z6{RGloA#$PQ4Tl=YI-xG`pb89JFUfNPEWhQoWI2gRM4|wifGdum
zPZahLFa)pA-lr@bp3o`*9akZA@7dl*QY&Ike*XM=)!WEr0)4Rc9y&rbcXgHih&IVz
z?8rxm{L925<c~Qmu%yJU<$Q+_h*Ufz{rovwcNB$|x%LC-WQr0=LYjC8a6pIU6J*^`
zQiK^*pPv8Ff84H-JuDhICXWSa5sYVFAL{Pz1}YQAH0`>zoqPe+Mm5{JA#j_6Y`Drc
z)5W2EB)f(tAOe69FRyxZaI-$Z%Q-HrBYz%h7$Bj4Sr<v6(f<Z$bg(0x9ERMDS6F7Q
zsRC<WVyM(e1C(C}Rc_0GmStNu5x`TL06=AMJ$46y_{=LI4<x}B5JuZa&leRHL1XY|
zn>Ro1VEJh?4TCourDTE+Ajxc1GE7v!_-+E1X2pgLfIVVSq1tieHZ`~XK1HqArBhXC
z`R#1}6(ohS8(2^M9EAhJzU8`KlRzRf4u*fq2*@+gk>!s2KLVg{u|9H3c=53w$r#Zd
z)fdR^&rodRVj{p)z=-~NkJj-S@6x4zabNhEhhX~mQcB5{^>rv6@$;hO95`ZvDQ81N
zq66%%WtjKIPlX^f=(16@_=ro6N`U68iIDWAb`a!n&G`IZ-5aY7*+T=`KybqkX0@N5
zi`8X4rMTa*13{eT{tJ#c+Gj;{eJEpqOdaU-6(Q_JZ(0#&(zHVvDm7GHd(JrlTZ5qk
z2Gjs?L-*hTq*a=CGji=}WjQUUesjt2^QM1?0u_BybwzYEUXI9{ceoh7G|A1@#JD!5
zB`6>cfFCz%P!YYV53P*SQi%<6)0K(5Uv!52UfNseNpE+!1pE6jAs7(F*8@X*6{tI}
zpiZ93p`7qNs-WdH*hr$0PlF9yw00`~b6Q3~PzGAbrK#A0DMpc*F0u`FADU7}RF@Hy
zogEVR|AJ0JbO4=$M)m@zTfEhGWHbAV83Ze{e;XMa!y$S6!Ekiv=+UGf3K6j=byuHc
znQjUVBwc6&^giTJ<l3jjRf%r%vnI|gzbSMNGhE{Tko6W&QMTRpFb0T(V9==|(%k|A
z3R2RY0uCM0-2&1`O6N!kDBY-(fOJU<NO!039eng%zyJDNvwV04XXc*kzT%vH_TJ~$
z7gSS-6VT}98iS;IN<qU$@z=`=C5QC_D*)k33>&sa$HYOlU&275;^?k2Q(S)maaqtE
z(cZb~oX(4nkNVmOT@smfYI7c~**H`G1?o!`lJSw9Ik&6_X;G`aUaFT@63I;gc@!cG
z`#5m?mDGWW3!M>3ZxE<9_UJ0D+1cj5>APP2owe1Prrm_5sBT}w%q)LB2q=tgf}1D5
zeJG=oXyqfbuHDJ8JDo}8iN<<?f|8>F_GisgJSgE$fm;Qcch7;nNk=TBy@NXXFrSLi
z^aV_y3`DM&gv7-Bk9v^v$4if{IcV^Q47n*(5Qny0;~v(srt@K=!J#3$91cy9N0$UT
z(waX*z-g=@qj5`?grynpMZO<n4=RMTZ(79I;SWE*R-0&kzJPO`PHOi<bRP$A*7mE5
zbe~l)Rns~`1$vUMDW&)J_F|9>_V>ej99w%&bQ8Qn2A)4KFu{xA=SjZ9ii#pf4}ig|
z+t_(c=}!W5T-P{5&<3sogp1+Ud*#DDI1_t;NILkn7%~XMF11+3!jVX-Tu^*#pVB6F
zns%6hPZ*>2KBWyBz|!*3kcUz~W{VH8v=qy+ug(DwcKLhIot)903>#8=3;wkrXzK&+
zaZznl5`DhkVp*~J`}&}**g*GX#>k4Gl?>rM%?e9xAWw{bH|F4DXMYmD)T%P78{yZe
zx8@^Voktee(R@+o^1vN|_7#vgQ{9qRb&aHwz<sT(MSz``v~QFL=RCw><WR+#*dT>e
zg%TZ%hVKOVP%h5%+Er;yzdF2x8V=P}#_WTif^fWTdi}gXtc3IQ{s8X9_im#5Ob3g5
zaIWoPDiA3e4qO{5W*xb#CO?#4*pKIrazTr8PS=~k&35d)Be1%8GBMcIV}XlgzsC)J
z#*lL9XWkm}apIS`CQTg``1EpHpv#OMA5=jcjg8rj0I#3?Wbh0kjpHDP^kQ=^O|7@k
z!~uLBf%lOO`+)lg@L{+4BKgQ>wk34Qjt^o)lBe<zwWEJ)dE339m^%>R&t<z0WI3QG
z0EJiFU9?6{y^R+#cHKXz_$~)7dQP}9B%};#Cfp}(_2N__&gc@6{Ny;@JKthCeE@5D
z1f3nOD3inn+#t&!7*x%fiMafydYqQO0>vl-(N7f>ar>~#l7~MVehcr^iCgq$cx8^K
z5T2iemVpn0^r1498UAmj4v@j2MBcpvc_{8{pkINxrgc&R|9v<Sz|<IYkGQXaSOoyn
z3}4CfM6L%>66r6X73j9$#W^!#UjJ&^hY}sCZvZrymy1zE!gRX8qbV++>vw-4X$zrI
zbFP$yk`V?}O*r;ay37FKhP3^Tk`JX)N-MO90!B=z3<oi=0OB;zdbaVw%bp0!A2J0D
zKc29Dl{Ke2sC-kQ`=nFe3IvE_#uq3i5c%ewDE(;tg9*bt16JSAP>Xev#vEE&^A1o-
zgrHXR<ptpQ5Gue(kmT@p!$I_l^x2oygOgyh2Edo0;N%Ns1MMm`BGZ?BB%<=9q@+#J
z>3>5fIZu|`2A$Ak0S03<pN8rE5)F+lvfPL55xhx*H1j+SO_hYBPRU5^xNw@~BAT}5
zI2c|Wp<1cMDlgb*g)z<3^YU~QS8q0A>n<|O)#$mnXbeAC_hI|3BoS-&@@3Yq6~v7$
zYc0Xl;rd$`e#9P?!~_V3!4BpdyeSJC#CNZtA{^8T^+Ohp*n|r)t#pMmasD-<9-tif
z>sR#OwF=GtQyu=jcH;jr+W&i^rxNTt1E?;jZx>PNv>bNR<%{@I_EHN*UJ@lRZG>^J
z{x4`8ApM8`{`cxMYa-TtWkjCPSO)ATf?b9sK#_V$XxJJ?7CJ7q*&|nLVv>Ia46C8>
z=@=#p_2##=i5k`6*Tx)qf*xyS!y1~JICw8U{JhDhohESJ)b#rJ5Yig}4T1mD@GqkM
zIW1(NZvPg+YnxqF#areVl+nYrhnFxZ7n$Y6&x9@lwz1Svqx6voXE!%3nx4)_+=7DS
zu$to@3knuaJddQ>TLY$GPiLmnaLL14*Z1losRp}wI76gwipmcJrzR$Lyksk8$S-=6
z!Sknx34S(Lg$XGrhSz29-V7BBesV8a2Gb`^Ao@6iYk6jir%oI9^~@rpr^AGi@Gwix
zl}1rRL&Jd%Vu+W>$%o!Q;3t>;Q%(e89yqC1RHz;p(cz;1H+5lh!j#<G`%}xX^ORo_
zT_d%Fvecv{3nAozc4S&-s~I$a8TYvY#(m~76creFT|3M4JfT^i)kWUj?>#qEd_5Yb
zUa-I$qWTJ3crIp$*5p=HRPgLWQeIYb@%C$wL-haozSk~a+N&E4|IY~(`U_hKmwzCa
z3u}jMi1da<y|8>VK!>9P6BCRb#ryk~7nt-{%D4_d9rkjmB=Cd%CZR4XS-4X=KJsTn
z1KVnuY#?OZrjO-BhvBm}QdiL=ji*U_egR;Ja?;T(+g*~79rS{o63jJAN}_xiFCdKT
z+S#$Zk3_BDB!JjbqX6C_R8UFo@b<#N2?XKWNBmHp%~DG9;)25u7*~cjfk|^<>Kc*K
zNA0iY^94doXo&%U<ck+jxVxz5g~#zedcY1MR)Bmv=Dv!^3GvclJe&yEfKf2%-`#~c
zU{sp0d#5>91u_jGq&Zl8W>oC(>)QC$&UKMvp$!s}6A=lg{a4sP5nB5y9inUs6Bj>N
zbhsR1UQ^5tCBRkSu3|O5LS0^omihrT+>6WzhAyv`HL)}LHBEgWbq8BV@v`EcKcv9W
zFoTqW0^%o?{IbXrSs57{Qjnf6uEBeNsWo|P-kCZ>Y-E&YY%MF8s6)RT8Cs?_Y;{q2
zD8WrwW?uy5Bj#Lh#d`$doI$9o$I|j#7tJJ&D8Ib-iAn;}0?P_j8wZKVKN#{uGQdgi
z60ZzGd=ixXk%XA|BDf7IQj5$2#2X_9C_(N5xafy&g1{wV3F%(Aef2IbdXFG~w=L<z
zpp5<{rKx*C8T(dg!z;?%&UKRje*`{-Bj<q;kV<_hfkfWZTdQM5b<1Sey5Jd5#3i<N
z(_IhN2k~Qbol-V7Hh}1*omMZS{sRDtV|;QzLK8$WWKtG_SL$d~2D439tw|UDAVqLy
zcg*Gef(ly?)a*-bNr$JVrht07IQzFJElSz-GQ^%xT)hIa-^615%O9Q%^>r{oi}C5@
z4}vq4wr42TgebISW;u1PtB?jrJlchNa;?&+4>IND?^uYx1MWe4&}9YrXk!i+sA>jf
z%w@b)_&D0fT0A~dGV%cue`sAub1khCTGIYOJ70VEFAco^K%+@5p`fV=3uY{6#oo>i
zX)xE#gnqeuXH<#Qi^&}ss%()1j@3YdR6`0OorST{o>1aOV8jM1w`btBnzoflMN*-E
zwZZ;}E9NlfFy`CZvFHEd<l^R56cLG}s*aM1$I`Zj0(Ug9nbVZ~N)e_gSdAf}p(ZuL
zBzL3d5|WZ28n_c%t-wwf_Z>p4<Z(Pi18(R?g=#t?3*H+KNFWvJv?I6?V2?CZ+P#nN
zqcKvFu^P9pg);Z&fRnQ!upjDq#RDz6P5MN&If61^jnMjUq-WpE|GV$YQ2?xC1d=YW
zv?dLU92VydY-^M7*8`9TV2HjYj*enelQ4!3$oSy|=1L5m@9anNwWnR*4Z`eA(~~bY
z1-@rTOfV-mcSkW$p8$BzpeZF!;i3R(Do{mX`uakO=X{Hx7oA?@x)-E_jH*73JGC@H
zknOGP0aGNO&x9Qp{EuN8EbR0G^dLKz`zF%Q7^OorczA`3kWh_DZtyV6<Q8xfTCGiA
z1YhKWvvz!SI?SCs8b?Y&9D6kScz?go@)bk<4@UUG$X|R$uT_F1scwM<_DeQv(l~J{
z<7!cniq99zOX^91U2--gTnyN#7rykx=J6+ar4c$zagfKA7XE+=3lrrd44Nt6uF!92
zLnP9%9W;446jE_(Qq|g@Q9-f3K=Ojit=9LUG!8b0c*gS*@<D9=7Gv9z9ngI;Duf!L
zktZUyER%EpvbPt+RL+xY4zTZPr}Y&ZmF7%Js($P5l>0Z+<!UbO0|2l_>J9+F=9w0c
ztR5a#?z_<9ee(G2a$I2<PqQY10AiZ-OaUCr)DpLp(c6lF0zl%<Pn<GgDNA+h&Ml4t
z{d;fa9yJ?t00q5Qd<l5u#`SV*H+FgOgA0reMWF*CSK{mz582(f;*>F%b4PCVfOv@V
z!s{q;<)(xmaOxDr-MR&JxC;E}^7;2gMOLCEgH>>b_b&lVg6=JZa{anm*uMZVLlpNl
zwCGr?zvFr*UkM`@Q)#c2)rax}M;~kuJf!c#18IUh8Yr&(gSk0@9_rHa2LOcx30%H`
z%g?L>T?0l}IOC<2ww^RYV6C2*^l`)-Mk4zFOozAm8#0zwdBRo}7eS2VL-_`V_QCr!
zCg>Rr;pKA~84Og&$rDA=ekkguUmM+J{J){8<Wgz-NuVXJB7aBmcwe6njV~K)WDw7?
z5OUmkdhce%Si!o?bXL6xPN~EjkX;8p#X@@mXEdA$b{ccP$uRr~{Z0OG4w~sRF6Hoq
zk&u!CX(9s&!LbT;e+%89qyp_b*e}rXD!e91;Kk`ZLDZ^H3W5+4W;B}bm)R{!xxWER
zngf(eMh&?VJEoHK)raKeY*qkZFOvma#{N-HljCy#`nu)XP)$lR6Gg#4F2ICNnt(<f
zHp;k3m1v4mxld#wG+X0Jf+$xM8i5b}<-q}24b#c^RrFgr-&@xoRMcn{x(OBu+Sg2s
zW=F?DJ+8N2gnc5nJvDjZ=*yQcVib32pj1)4*f-5%nU?lTMVoTzYvd3+sD)HhK}uv+
zF}b%|e|fE!#r)qc5iIxDf${GXp_pX|Ww3$Gb4q4Wk0q(o{V5-TI0Bl-qD#(oNX)Dn
zewP;_;Riy29IkBHRC1{Hb4uP3{a?iRxiR1(O-@KK-DUF1V}1PCbG8LL<l4Y%HHQ66
zir?r4sDm58$No(7FWOVO{{62J<NvvY!Dg5*4EPooNPZ!-KM5cHpk89XG88cNb9_!s
zCw7>>By;%~ll&vuqg`K*z2!j&6kW!3Ua-ZJ&$Ik0vBlKY;;^*mur46<=sErsC2Lny
zyFY3MX0E%3$T%H7yTu-;xsw~FcBWF)?XB*B2(&=pBZUF#F93xvjywl34r2G;d&5gx
za8Agi@E>i~ELZ<tijBi5bX{0%eEbh>5#qck%5xAf3{%0Z??7JSmuucK<d{6m1O(|q
zPiaSl?yskzX_c)6>0_`Xiqm2F9R|lyE6<ZLbfv#v*ed{sopp#uH2(49JgtVe!0QHi
zkf#U70rDG7(0<1Dw%fYY)=EK~!RYhgrSGRly`r?p`p6RnEk2)W-{=go?UAa;XDB@i
zg;Q#Tb!p?ZWt)kt81I_1JW%VA1(^i%-Qi59hSDUicRXNN23n2yGV6;sn?*;3u@BuG
zZrn#XxR44CugMyPrHb><)*2RTvyXtjua}G#bd{hM2QqIHdlx95Ou3)ezxoNXyL{QX
z@Bm;G1Jiezz9Pjv#o?XH7J?6&pU6Yebl{J2${fUlAm9}8rm8#kUnjI(5UhofF69J)
zox|<skH@VJ@{$iytW6K%op*k4Mp?*9BEnK>od4sPprCk(d6}yVfGG2MY%A!+3XXnv
zotmpuIj`)C(n_v(NJ%N1)l?_cILsr|6W2|PHPlYf=8myls^2BGcNAOa&+gyQG6O{=
zb>ZL(of^B>+S;Q9D9zhJk=<Pd&W6u+3Ymt6-Zxnp_gA!cY6b^tf<!lVrhTZMqYaEo
z%I;`y*AjnjhnCx|x1SG<&6$~{j0m&)5QB<~5_`Uk@M@*euCEt%s(Yz~4A7@v&4Zej
zX#D%uZTQY0oh<*jhXjGt(EFOv?v!@zuv}-qc<%5EExE}QxqZOJ>!wS>jk`ha8I@!{
zIq3Jv$8~2c+#lpCc2P;cCOsWNW@WLi0)jkn4j2Y9wuFr3;p+V$P5XQhCjvQkcUiZ$
zKc0frpu+MKDes4Mp#X~+4dCA@rkPg!4RBF7&@TZ0sDiq-p)P1#Fko)XYSxbv?ZhJ3
z$t<9!KC76RD<?hS?Y@289}#!UFeszR>LUp^3&YqV8OFhv&&5&_MIv{IG&FaDig^-$
zAHNDJepfm8a!N=l>=Q{G`S{y$gnIdGU~^Le(wKqDI_~nVpWY@j8e`ol8D1O|KN@I|
z_-C^#ZiF$TpbCBepmouk>w0ffS>Z`rFANmu24T{b;`n2bM}X!QX?`JEORQ8`!_fsP
zLOBREF&aR@17-+DvL0y`qTpQsip1{ZlaHI-%vGLF1eyOPN*dIWCiF(!n6h;S^!W@}
z5(l%<b~LVZFd4Fq=5q;_sY{G&5H<`n6=Z%O`JBhnbI+(DpHWlVXgjDcOMUtK3!OyS
zshgqC%N=|Ni&;EOtZmvW?MIOAC@RV6AHT(GXj1>DS}fBCl|<=nMuo|*;Fz!-e&c%z
zUq6uD32!?Ym$K7$rBV#WGJ>l^dX?#*YKSJ;#g)SsQMz;#m6>z%KOO)I3XS2@)PXvr
zrio+W4Wt1}5!|)7_qJ}PwwaFj{7$1WZ8`YQ9VH!R{r#Akm^#l*Ok12jXCk%ZLzdg@
zI^j;cfoc`zLpat|ev#<w{V9)At7l!OY<{sQi@`j^{ojEshc!~nXT(CBXl`LX!tq0&
zuY?APHD3mEy>c28NA50b`TW77j_fbdE8Ot3l%l`oxEG@Br+j+TyWg1Ur@GBBUpY6H
z6CNE*pB^=;#bS}yQy0ciqcWzVb&x*p9RRQS7wvNY_AmW!vC46|mo1`fMosVt;N;iJ
z>lguu%&(J6ja*k+CPw#-bd`tino~w@8RtQBbMvn|)CO1aniGe<e)TzC9V*bwRm!VN
zixe~~)`+FP{2bZ-L9F~@A#W!bInZpaRMD#r^DCO<Tv@#U6RurAY^Ttm``~<6_S`Nf
zEwYde)Z9&FAdLAaZK?-rXKuR*#JCAZUp|4_+x&1f)1TsMWlcZo=!8N7-$!E($+!F*
z97=jdg-3?NR*%V<q8K~0!(jU{e(Xy3u*J~h6&5n$@0a^i9%U+KFk07AlBQ4XjvLXq
zWoD)utx(0>jMOw>NbK&S9kwD&ogZ|Mp;t9ouR*@_clWi~8B!2!PL7)Gol$UhCTE}J
zOqg*pv#ck(V)JD-{IQO65yuEO1GOcQT5M!7+!fP9M$t<CqNX^T3M`<<$TE3Y%U3x|
z#c5=g9c;&ADRMj{=ct^X*3-VmLt3GDISwO;-4xOD%l>&mL3BV`;Of9H_4RFq89k6^
zC?L8)-pO1zYVt?Nd$vD2U3P7s`*`#7hP1Twoz^_M7|GDEu)>^@p*mZRqi5G*hp1c^
zXdYNEoZoM+J1?wm+zp=6b^ZD1y}PUQ$ve<EBOiMj{4VNRx=}ozc#JsuH!#E)#R%ng
z!xB7hb8BmB;AtYT4M#Y~DrmP>b}|W!LvyH4*2Ww4`7)`p2e-*!P9q0K=iYH8i_uoN
z&FA|~h4d=U_Pa$5Dp35!iUCdpq`(fq8#JvmfCe@QXHo5Mmg6s8s6XJ1sj&&o>-^D~
zI8e|&Xoi+Qx3Vl2i{Q5Wrd?&IR--~-1-o<={c!Xz4l-ea!k(Wgo*q*}jGDPPqZZ}4
zFC)oQ`Rz@SWCs6_^GF%d0Zs)wZeMBx$O{8*nSeBQ?whV;QGIW1en+WQ_&()|hmDfU
zUg$`a(IR@YI)<@YQSGVC7Sd>8UjNgNpz17V)ws3P?#^20)r2-N*)sL9Kf9Xb^Azjz
zP0P0L>(!27G9bt26S+MMQh2GW)PKi~;|^f*(dASk4<sT5HCu|SV{9a)i<*kz1d?LX
z9(q}}IH}_@^C(tSB;zgcTVtfBpg5>q9s>LHe%r$V*xW{EVv-Wf*E%})b1_S(HC=6U
z1>#)Ey!74?=(=s4OfkQGP16+%rwz^EOa$zqV0>bk8moGPiTg`3(?;2N3L*>zd)+gc
zUw!;ide6C_>;8xR7?>hfksCSB8DtQ!?lqWhn(^AGB@(h*+DCtX4bK#N-sL#{w<d(H
zn0@{EF9E*zT0)H?7V6sS8nFx>b-k|X8k}X;&(|!l?c3VRoIKVmlzVV#zErxCREyH@
zsai3)ouv-;llN#mK9t(MrcPKF!M8n?pZ-AZ`)S=qIq4}A-+%VtC~5e>Ki9p3VBJy9
zbQCFL<Dcv^j!Q~CVZ40HwE648Ij!gM1|d_9O)xt<`am9H^!J+B#^1xwE5R?G@Lz;^
zafNBt|2&s2{ty0yVnWQt*f`o6lE??n7UY}=7Z+<u(^bL41Z&%X=})bB8MN5udZ!AR
zj=~6y@bj0?hq+DDk$dLFj-Hv0UnV{r`A%8=9y`yj9#cCv>^|i`DbWR&lmFfvWk3+l
z-q<h39BSlNyE;w-Sh{+Hg+)Z<u2`3@m+8Y>N!K*^P5aOd4^-dqKK|C$*kHLy{`a*x
zIPd;74EygHVgl2Syn*p?E$c=Af-(;<`7tmtF-?Vl3hgE+8@{lxz)CJ>F*7`q>UqY?
zY1)~<3BM-1mm_1EX8ODJ==}4me<Q)~20L}``Ek)Xo#l@NPBY?*_`Xs{=%FRldm^#&
zMtUjEHWzU`#j)*pNI$7zHKGpYLW{`J!NEaJK0e8CZp-mE)BoZNr$d(8_y1n=pZ{<$
z4a6!FK+i1@^Z^QMVq+6YPJ-nRGAj-jiZ4%;NUgx!M418_aa#WP_~j*=o_HGZg`%li
z<+NG%)bIE({2U(J%;z>xdLyKFD!oC*SL(Q-V{!XzPWMc-fYJ4MDcf@^Sog)7e;-TE
z28&6`*1KF{6!kk6nlD#eY-*}*Qo_)B(y;W~xH!a|n0~Iu$GXl}3#^yuvN|S~A&(HN
zX`C6bJL{4l-YNbdsPaA=Th|?7Y-wp~_2i(@H?2d6!M%~l%Ci;FA!EmP0z0-39F%Kp
zEobr^^7&8AhCW@$qgYiwR+rx~#(_?EgRS9@&nmg(=43q?an3brY?&P9_Gbh0mnhD$
z8(HRkJq_*nfdl>bk}Hisy~O6MQ)O*iwZE`HU~JYu8_rJuJ(khUy7vVe=%bgb4(=eP
zzp6#KU#YQvf>-(;T2M;`#}~!cL^RWy&=NC-7jbBUZGsKh38akZu@n`|PAWp|AFW{K
z(NKah+J{^G_~e4!%<PC9NoB-4c;|3FEm?wq2t3JUYYw8%f!h51uo`O!z~H%3&$~*(
zn#)Q2rz_xNV_0RTRH%h4crP%eVN$*dv3`kW1*ig*TgfW_I5j|A197IRu<hUPla{A9
z!ImS7nMnY#1WTnHc64F|WE^ZkKD}Pcal9Tg32kt#EGlwyg=48H&O3x+|FCHlR8xpg
z6(6;hFRj=|FPY@cd-th1JRg?X?ezU<cv3!-$rPfS=PTv<>c_*>7pZO;Iz@#EK+^eG
zX&+U00;y@c#z9vv1djzXXhZ2kvyavL_pm4IJ5!E$2Zco>o@=c{1v2+?|GiGR>jSZa
zSDx6GeA%u%OD!&8o}W7!kDGyP3MArimb*E~Gz4V)VrsSApzL0mI;$ywWQn0r1cnuU
z$RdNUwcE-RJ(^>ob~uwU>eXy_;V1;Bo4`O{R|5Y0obz0}GT&ieS6l9*8y>$~`p$wf
zs4Qd5y+*f@2eEmna+Z@CgmrS;bpwQR^NeK&(e$dp;XOYkpJvW&ZEGvM?`~K>QZk+9
zHbC54-XBu9pMfTA&5|GUCb*5()VG|D^#J+qM4{GTfthKZsdd-rT)UKc?L)y`#c1S}
zmbe={x;%$p7g5(-cSd+Qi3#c})yB8CDU?mInos*~da$d^lwcv^9X#_7Q@5?aRLpwf
zY?m`?Eujs3Ju;jEAhn}Tv1Y1xGJkm8K7V(aq}eJ?$KAJFrv8->fw`$b=V}}K0GfqG
z=P%4_kkX}19KpPhtvf7UUxnmPN1%S+>Sso@0*?5#21`$klSvrS-4-+Iv5Ppw(itqz
zz)WZPOPnNKFflluv5wx|3Q?@M68-7@3Q4oKhR1&McBAlpL9n0cK-FBgk)F8lW?AT3
z+G&UBsbrA*GD(+xWPoejW#75%(jRdt*U_xrT6$r;`6xb&#sJP&$R8o>Tl+#Agpa(F
zjI6w5+5zclGb8gk?fVum+>moE`nB+n3AGGtFl-5V8{M<>)d>r8MaKkg4+@G_%UeF~
zB56WbE(I(#WG>0{@mTTMnnn5)C@+XAS`78x=u>zvT+Shmg&Zd_Te^(sXeURgp)uZX
z_EbYvS^nM<<tn8st=rU-H5y+ZSRzqT(x2!Pg)(=SiuVRJ;luK7#uJQyC|mPk>g$Ko
zCpVdsc1~aMK0Gblsqxqf6CCyDvv@+5$r$M7uAG9C=$=-nhNqjhj<|a(FeFT3p+z-Z
ztjwO-jAZ%8!tnJkX?B*YIxK8_h4WaW{?(CjIw2D&;tA((bQ;pR7R#{hO}i-horG==
z=F<}<tDuV$zq-OMt)6T5J@3arv}9(wuTehT{b5US_N%Kc*jrKh>JGoF{Sf%!((gw!
zo-D-d4n5nDZnBj9n3zv+!^1{0-u<S}feCX$=U{J}Ni>6IMo8h2uUwlHbIZm&CP~KB
zc;tI=0u@h>X$Q>ScN}AqL`6T~*9Y|^6p}#*+QXQ!a8yG{oij^#ivBlK4)*bnx-=e9
zC)``)uLsb6&*C-OZaIms*8M^f_`Tw>S!<aET05ET=}PQj{Wlj5HUtt=zFKhP(3!|h
zd3HMPN5|KZyY4k#eUh``QQ6<{;f8GC$?42$yls2zZdLkQ`&3<cYB`+Z{r6a&V3i`C
z!=2^Lm-bF<E#2a_Xwh2<6vXQ1F-3g*x${_2MdvZwZu9<P<hwd*=L=u<bdR!#!itAt
zH<6dDG3MktG^TI~-h_lbuH})lenmm%W@i^NbyCQq+DWwdPML17WlS}%F{;>u7w#Y5
zYF4ID^C1cuBV#7(ko1{air4i$owT~15!8s+YOL|&VMpi0&X%sXCyejnzMDKMw_E?U
z;)yqV<<51yYX<^aq&d@4R}>$ONoi`*#L|3>dDwWqDx|I--!~L{^sJ3#VxPKmDB)Tp
z)sXLGIDto?0GsQ4(&$HAx;a;eEms_ITa)A20NU3y?_yZ1<{L}eQScGlj2=$bUi)8@
z1zBC^-TI>cQ6=x(df8#q0sYM9nyIPD7vl9l5q4_Mus*vTkJia`|Mxe`$*evTM5Y^S
zKK+2Q*{u?Fh~1r-rsdJ@rLdrPH{V8Mki!13#VB6kxdTqw_ntLJ$K(wcMcJR<Gxbez
z_#)U~si(`4%--a4&`jayD$HpVQAksu-fB77^wHehy4sRfzvAJ*%5X1ve6a$&_2RG6
z4-GeJtgj$8;Nu8m@#cTI$6&M|{Bp8pl_+s5-Hx^glp+%oW3Y;Nn>G10R8*TTvxndR
zB`l8AYsT#}FOV-P#9e4P6k64hD{;P^+25xn`LXSW+?%H}Q|Pe>I}Hu7kQOH|0m-Qr
zA7Ot4@)D7*N1{Bxll>s^^klK90ky93R*hJya!`tGbEf)5ZJz(+A`pRy01S+DX7<E?
zRqw}?m&@&od!BBt#_tsCJh~WbD!Lo%{T5b{hNv)s2GHD(QnYtx6L7kh%eez>pVSe2
zjYGrAmX*UTJlbSflXt)pq5QZXrNe2|V_uMs5I0#Nfo00I62Od|!+V8{uGhRk{Y0Y3
zX>-nXGIDM?R)61|pSBWSFp3t(gE{h10a?JPuaa3>COTqUNNDHi*l}-97viF(oqC%&
zjOT6`6;CSa0!8FIiFR9Yy0JswY~gNv=i}2E8Wnk~H988D;I6jgd=KlEr76iWTzR5^
zt&B4vqJXh9ILP;;>0>swKeJ$19ZY<awDvrAn{3$}-bS_-Q~xRb_Q{xy5#&5pGts}|
zk|^p(@%Sxhlobr+sHK~IwGYTkaBGX8i4gN-4ZZzs&)Nop4vciaRn)!RAP-x5om!cZ
z0_b>7{yIzh%K#qU)){xuW$R+nAG1qnzPKrwjIGq!uZ@)~#Ot%{vZsoL3}@=z;BQ#o
zRgXMNVD6u^m6HQf!WIiex-oYX3LaL@OE=T$?SGk_!*-!lrIEUS^X?seH5K9rX?68T
z<7lcM_<HJw_I`v5pW0|Svt{eEDqD^1szmOM6)kVC3v6J~e3xJw0TvG_s5wYTG}}%5
zn6aA<y-L*^INr*L_lKR2hrK^KcNxdf9XduEd>=K(D9#@jyBI?+Lu-<wir*7O{Ud82
zNw}g;;(Kgo3a={Nl3J`1e{!vKodJaoY0sLP-3w%3y``+thVx5c&fw>&774Czx}2Z6
zCsa1wZr^uUEa%SU^(8bP=r^0MGhh!0qpeYvm)AE*gSjIgV=xk#doRKUSvohuzJSop
z&x)m2O8Kc-8{+yGM)FEReko{!M(fJ$vq55+{N2pIo&3a~Ch)7h6AxVtTz}Lik?>PW
z$A#6!NCbT{d@|)l;lvBwACzmP_`3Uvnj9+q8OeqYPSK&ks*cijY?-4W#fS`UD~XyK
z#>6IGHsWScnxWk*xj8vNwcG_h)F;=GK1D>{!@BvtSIV~jx00Fg(Vtq$!XrmjwdrI|
z{GPfv7CE)H0K$@is7#oGh%9{gafq1TCTWR8N=@k2rMq{6CS?>X?E+Ia^EJl7?xEhk
zL#6tvOPz<IVLHi7e2Gz<hjvc0mPU{wpFpPut03imulm7JtlS1_9%}a3WGU=ueX7@t
zH(8dJb8QSHV#?}S%Sr^;_{e9kyzRA?dFze7r}K`4SVDutD)6<CkVQ_E#S@ox%989l
zZC>m7$&l8a8YBBhS#PEH@Zb)iJESPsZY)3MVOS#-o9K8cp=*LSw@X8Ez|xCcokm{D
z>!i^%fVE=cSTVQ#a!UVi2ub7gEp{#^f!Mi3-t82}!$rLq0TpYS_A*w66UMcT;Dwgp
zh6l?HqGGLZx*C^``e9w|Xlcb#{MB82i@C+g)yG{g^A)qWfuKaEr@cB}Xp8F$D!PHr
zaJQ;oQpJm;vqU%5p}@#~J0>PZqQ3$}&m9qrt{G-bn8QxLTb^ETC8T%*#?3@z1o;FC
z*3FR{?gxQ5ER(})^Ne&Y-xf=Yr8UiM=xo-|2AdRZ{TGw3tEV{8{x{(3<45~r5|Owa
zk&KxfPhQ$-)$mM32$E#YWA&lZ$fiB^;2Ch$oA3FQo4lr9#gCB%=U~bqI-Eotngo1T
zOv;D8v00QDb?vy~M3<L;INCHfF_ZeZqtwS_DeKQlm8<gbkz>U1d3Tb$PB_4FQ%q9j
zt8JD6=9bN0+?C9%O^qHk9C)=<;>wqjA<h`+?|2{M+<<*=5pIB{{mdZQz?G1CGVWn3
z&GtdL6oc~rGvGN*?{wK#llK^B{8sS~+Jm+(QQKQ3It|AgD`~%VyVN;vn&<dXMz6gi
z>q@x5C9bz7nX!WES5#_WP5I*1<R<tt1><CtyOA~I%x9INy*RcFFtX|Fe@$ODExE$M
zhD1tWdK)@MB;DoWjiG7GSyaP+mp*CEVHWr(vTZ*fgFZBGP~uU|YZvCGL!pFq-SkP!
z-1*q@au@^g6%N=&BV$WCBu&gZAod7k3)v1s;M}_QB$tiuv@(QXwTkR{v89kn7ny?I
zyE*3`BiBcL&)6J~<BGj#ogYlHo6|m1o2(Dse%0<13J1UKll$XEvOPTr_@U#+*2?wz
z=6{I`Rs<ux*p^nbf2h`0f7P=hZB@Sonj@-P>!=1%iUUgzT^{O8$c*kibXlu@hxbOS
z=H`Yp^^!!3<v8UyW{|IVGj;Y%%{L5ee0yd>SuBQiu|_!wq4o&yntOCxy4ZtjD`evK
zpovnEE<ufeFt+ViU%B0R#_~J`b!?nidFo+)a+$zHa^_3Nlvz58v|T~hekA+Ry}`6^
z84?T=+grxqG<x&qCP`>Z@Bo5{gd;WuH%0MAdaJS2Yw?%UlpE4ijFfHBa8Kfkr0erG
zqm@a=30C<<lPloJG`vAZKUE^d=XF~-B6Eyvm7kNFqgb-<+;IQ9^Ct}boCA@ZgUfSR
zZ!BFEVY<E8xFjjnbohCoYu}u{xA%=4tb0Vcw84j)W^<^IWFLe|@Wsh(w2tTuZ|`fH
z`kt+?6ztw_I1qYimsCu)xr?xQ1n03~cT%>aVh&RdN955n>vxBzg$eB92FjlNlMkg=
z6U8U+OZcRS4VjZ@8fjc_dQVh~o+cB`UI|o)kl^!(sZ}_5X-~|4g#Q=$68a?kphbl8
zd*}6_dabWXwO(Te4EZA}By}FX#9wCw4C)o>Nk~GD14Iia)YM*VdEJB4>7`wWX;bfV
zaj{dv#(<dBWX4)+{;vn7T!x|+J+1xZL2pGNVlS!Ub6FFERa#f=GoeIAA+{o4J+nn?
zIgU$2rpbd6%b4rR+nC&+-nT#dxc=kx^&9oyXfv&shLdRNlh2oJYG2&%Fy&H}>bOfC
zsmI;qTA<S)Q(kRo_I$VN>93Xi0qnO0dEZUP*{m4^jwv4>5^!Mzb_tc5?Nyg;t%twO
z;Rqzb8OUot1RU3|{ay~+VcyqUeveO{sF0+6wL6`3%#{r}JJd_KhqTB^bh3YT>UhWK
zr%Y<ZzB;XiyYJKqQ>gU&c`SY8qh1Jw&>qP?KdX#UK!qR`F*fH%ClKV!O#Ay<K(<5q
zRF86Rq3ZdDyCWRFC(k4mZdR{7|1X%8+rGbaof;wz<<wJ(Gox#Zt9E0Fc^*^wq#+n7
z-%r1J(hmoR=xVmFrJd>7W#;xzX9&3<JzN&XcFp2d;_&Vi9J_CPPd6xaw|JuMZ+%aB
z=vWM9)5uI1Vpl5hk-JYl0zL|s+vej(xQbXo?;m?O!HZ`qOlH^UiPpKJtXTi9y*nf?
zJ1~UJgTkL9xKq^m7pQKmX31^$1!G1jaM<PL4`orrGNiCb)w%CGI}6@#W8(4UYNotb
zT=#I|rlQI*;}hji=6M8d{iG?|Ic(jHc;QDyMTV}0@mf34?#=wDI{&x;d1_pfSCh)+
z1>L_PY;IF{(nh=Ll6p(b22IokPSny)+_W5z*uBdBJFicS?a<KsyLNExm7xlS_mxi}
zC>)%|H}#aQ_DW!<vbnW{CNEXu9wQkeA^Borfk1IhoAiFDQozHQVC!tzSJ|>zBOZAr
z{pJO4EgpBuBOgDHxK8TEJSG*nwI)s;3WJZNh?O3bi!(flAJ^SF*)DLEt-cR~cr1K2
zkaXANwb3CXJ|ZRH?*i~T_50T%;lJ^ctL>>^Nd5GoT0ds@5CRf(fvoP3@|Sw%4+kuK
zvwdj6F2sX?{PpW+vrf6guR>HhjBkcU+?K3ACn#=wM{&f)Wg9LS_j_&1*p#b;bE*2u
zWqwriPU?wp56THy1{U`7=F~;@8fV3$!qgu9%757mij=pxI(g>wm#kLis~f?hn}o?>
z6Cdk;6y4+(HQ&Q5PHqmsaOLnP(o}C_kXgUX|HNOHJZ|l@a4vMy$TO>ZuErzdq!-89
z^JhH$)ZXp(fWWV*zl9kn@<xhH_8y;-t=~dH$pTSY2)$Ac=!kb#zE|J2`6=|`XibJD
z3p4l?;|o;$LZ==AXadtifbx$mOX=tiSmn&8!QtUq83Y1IjNxJU7-K(v5MC~p!_#pU
z_*&+Lr&f7S?ouo`=kwBmeV4cBy_=zi(_A2gBRH}5HqrytDAqS)j`rdYheQWgY&yBQ
z>yKuFnHST14%U@F=!9xsty;29L-U1(DZ58qYmX|mQBWkEOo;#73l(B`+hxzQ1<&Yf
z@0;JrZZeEZr;)`)M^l<%R;_Nny&n)xntsLY>}X+QTzLJGjgq!D8JU`e#oAy#rgAhi
zI(gObcMYJV2yzzkHa)@#bt9@{_oly8Md@Os@9|4&9OE12mo7y(w;Stx#vN^cm%mt`
z9j8I@y7y4%jQ$<2pv^DzudGvK&bul4U!GLrd+DPOgB^l?LnrFpYa+Qh=~n8Nxp{fF
zI#}NZ1_lCMDOEkzeXE%u_&4zv#jKDDAfazvm|%lN){<T2g9yy5`w0=LXS>-m3w5ps
zXM4KmdqCoNRrGUYw9d8SL(03cHT;uU7;ysz>%Z6Tm5l?5O2+dDaxenuqd-(~c;O*K
zxc&8-y1NAqNH5>A39xy3+d4!bMzRbXOR$_TSG|z(*%k6QsF?~(#YVew`5*o4=hBl0
zEpE4hn`2ju4cuNpo7WT);`ob*)Aw!O^aE|mmTSiPzk)pVyv>HA%+K>XOv-Uke@-_B
zblBg5lPW0o8(sq)X>2FnS6ls$qJ@Wk*X}Nj9~Ch@6Kl=nw(7mS<9S?S?(M{{TXW{L
zw@I2&gsxp>gQ${mF*Q2D)*gh%#IWZ5OoQU8x4m06MHNAq-?2l*7i4Oy4d7o~Y<8d5
z;-rgFXP6~c1t>?(oIXue-LnO;>m1*-+=ky6b39FK<j{*Oz6=Qhh?Ua<^;saAqp5hj
zwH(GV_OCqs?LI`ftaqJKZ=uXIz-AScecP2o6@4-;ITJS}Sf?hvPz|>Iey)Ms4Eejn
zF-*91>^Ex!zE~CxaSg=O?&_NZT7+}k=U^n1q!io!4`Q@LNa0kw;`-&=Vr2q5!v_!7
z%hyZvagUDk7$^-Qe<aF&4;2?tx>M(%?X;N&5D=hO07|i{7t#}lY6TXFPPZFN-P6^L
zLDbGnw-#e=eJ_67JWX6R6E1)r<F-+QX<J0ai$=rc$Y>=;-*Cu$IzrKG_G-BAmaK~U
zq?`_($A<S&`t<)Gp%mSum7p!}w&8P0q=It5!~Di5VuQ|K;d4YqkZN!mrJ=$$_mSti
z^}*iIQTkg5gN>Oiu!Dx`JWu@?<Tf-knu2eht%h^sXB^-Z)88Q?TR5oCAM1Vk7Jv3i
zUn>W){r?9^x&3Giz5-|geA4*RZHS=*T$uSM_mJKYYQyGT%4dOXje#N;0o{c_*@JO1
zxy`SuuD)bbW%E;wk}SNn@q)-pznTxGJYf553mcCF^fzYSOuH5#QvQLk`Tct(;#fE)
zx{@a<z26Sd2Q$BN6{+(`PGE1M--w9;hVs*g*@)lvGiEU{Ee1KJ*p0<s$7feXA;#l0
zdXcL83|jvc1wWU;I}r^~%IGD0d^<ejDNpki>YGj(#`l~!#zHvml2`Cqz`tK&)35C{
zilK_bx4K_XFkn8}v^c{l!>MUTLlJUvNfuUsNF7DohBJMN^YfRlAbH42j2Cbf6wI1>
zShsuM@%Xi&Stm@?K7^#ECIAq(A4}PqKu8V2uYZ#UU5sgGfDvuJw=3#M0zeZMJ+|4h
zJuWbEhz1Sc<ZO6BCJM~5rEm(cd)FE2m-jqn@pA#p1yGdG!T2YwP2%uth%dQa=SeR%
z?O*TP+a&`Uy%VogeV(&n#kx;oeT9-6dcf%Z6vpX5pN+-4ET;K&(XROR&98cTmV!$o
z&FiRxc=-y+P5>mm5gYVx?KCy*`V0XzCfdkT!TY?Q95p!T_|YXa=K#X!wX(d9BO;wJ
znC?XtTKFy_+#t^>3V>1siIT1Vk2SlcLMPoeW2KmHmoU?D8;kpX7__%XBZ<?Emppc_
z%;id*#4{#%n82XDBx1`AAh>4tC?QJ`My4;jCZ`|gcUKY#ak@^`%y^`~3nEvHM&!ZN
zdCF*7T(aZK)#~Df3c~ziIcbVEm_nxO6BnS(A+3S(DGN78M%;(Ou}Kcw#UJB6;t7LL
z`Ln@qwFdHa<>QF^N!Ou$76D8kg3Yvm@2~+2>Jd^e_2vyLnL@OBlC7f{zdpc`dm9;8
zOOUA-CDSdpo2VJS^2EcQH>LE=Q;gm6JIrN#N6feg4%7M_MzWo#rm_25-{fDfhzK&U
zi*Quu2vSjeqkEUm5*<t3Jyf)IqISgmn}|LNKm0yBuH1W%7}3qXH(QrXM(SWw`abM;
zIc?B1D&_Tb+<6w(z_#{d3ELKvNs+WP1u+|yFVr_09G|FiZY|#k@@%3+`CP)u&p-Zw
zQs7<!H$Oi^GN^MI@iC)bd{T?HZ*;owkh)C(0>9$H<|?q82HjJ6b?dB&xp@|9v}i=m
zhK~~#JUw#>w7vd%U>akmSln(1!V1d@_Hc0UTxub^am_v?t&Rpqh~LIvFZ!8GfU3HB
zyqz{5Ay{YV2-Hl3S$PdWyB);z50FoEZ2+m$(sHuFv%$Q8V>ro)KLten95)|yJ$HH5
z<Ry<*@<aH`nnORU_#=9?TUnU&9dSUnWgTV24=z;%w~-k%li8S<U}6g}3?zgL3vItI
zd#;T1O@C)Z_`bZn{HGZT6EpaNb24Xn_5|B*r4dXE<BW)uGo)s<$0CAzjg?GnR0r-G
zZB@$6>{_)JV|AF`GDwj)&s9~u(_27yBUi(|<jEGkM0~wI=&3kb%mNI_Blrp!h?}d!
z<oI=*ru~qq^m{15sD=uRp9!&w<=c%J)o<2vj12=k>HWURQ(yZT4O;QJpCD_?i#NzF
zZFwQ;&_S##%AG`&{SRM$${R~5=7t!Ip9~zLg@mh(5(pyr?}LZ~U->T7z3CF$;ERoz
z<6i|pHGy~_%QiRE9-vj+*P!<{NhIKW4crE>wGqYz12FCD?ZMzzVM3HYNLT(A%uzCX
zX)MOJo%r8hwGJ9vo979;F$r?AH3yU1iTz8UA)l>;chPtcD#q%mloWElqO5~}uyUwW
z-vBB$ePUa!4)Yo^Kfhzm;h{w!y+&hWWAQ6e3eVGp*2BX?@EHCe!5w9`zH<bhjVcU`
z*=V|m0;WmJlm)~kiOwddQX@Y{Aa6IVx$>vvU+>%S88qE<shhc@3qRmp%rPKS?+fzq
zh%?sCXkjQV`#qT=Q2!Y({XR=xlI<uDeBriMK3kK*9q@q$sPlfiILGnvMEr|eX#o%5
z4pxlUI`>!nn5c1Vz8@l%36NFmccl-K%J|IWx1nSc8;a^X6AfU`aC`nYj)~^zIOK@n
zubZi$ubY|HZSa^_*tzKJLi?~3*D6=jK0ZkKYh9Y1WC#um(L>$%K83sd0e!*EUh?mv
zLb4CL9&NB-+3qg(BDJnV34zlH4uA2o&$f>ezQ*B~N%*-izf2M7;TizYO5WwuM=}8y
zCH!3A_2PG5NU5orwuqaaY|RMeRo!XpQ$y;6#DfHa-X4b;IW7oW>`l|^wUw33C+o}0
zY{a3(Umk;)0q)+uBGAfrj+$cGQoBL#Q=mfMYCRx0HaPe|dl;1Z1uCZ1VEzXpnc|AC
zfrF+<?F+NWYu44J%3$Uqsp*Ok{ajsL%@Y<?Hp7U@`LdrJ^j;7H)=U(ihl6L0=|IVd
zIcY2HBTpd}w%YkSR`5<h7an~DpUD$;`$<lAcB?s8u&n@jIOzH)v#Uy<ja=I1pBf%+
zwN3(8SVWUC2(-tOFWBxf#1UedpTM!2-zl-PJf!zso2v3@VXx8CS;;4r?|oi%(mM5I
zdtVcv0V+z!k37yiaV64KDu7x(R<b0!{I347c0KPK6+gPD)uett9Xo}6$V8=ZB|*q!
zt?1(x>TCt<50`PnE6U9DqZFfi6ox(o2jpMiIKk6-7~P*pxL-QW4M3G8>8{P%vB*~j
z;?R(=R{ZR#G&v+4e)XNXwHiPyGGD9Y9QZKUs!Y~PS@cY~*Ts1JIv|It)Jix-*Q)?X
zW#JU?uDfP@!YarDsAx#i)t5!J?y3wdjT-_&X4|ef-#p4x{sUev@pE&q*)FQVxfvhM
z2Z=9_o5>46I*+XZbg|Zjwwjn>q`dcNNO^6ZIvayy%cx+!c|{~>pGaE+u_bmdSKb#s
zD(CD5n{U};|H=__Fp+O35JEgR=vx9+TR8|)oKs}W0LAaHxdxl8x#NJ!yZX15Rr@+G
zqI+r3MHHh>Ig<gvhx<h=bB6`AS3|hcG`x4Q#ewP`A~wP;(qx^)F$RW}?13!^tZyIc
z^Ezq{G9o#&q9$*Ge**e&HVV!$veB13Qc}PPk6)3Kle2vypm7?^4VWACJ~7k)c(S8n
zXZ%`Eeec-*`%H$z>dwk7lRy%s_kr$uSDv@wDyrli$y@|9rkjX%$r24Rn+qh}*}01F
zQSY;=K`;+^@rtNVW^32fDXgU;_+dr6q(NU_@IP$lt3+M3$WF3`Se)ohaVW_xCI@~S
zR;8u?uA-d5WPE9?fAVY@M+dp1S!O!;`vmj1*4WqqLx%+1tgw)<ewycSza9imzL?SC
zG|D;tbq*NBt0$_Wm`cCvRZBSAHg=ST+?=?6IGb*_7DSKHOo5O18phyAiBYtX!_oGz
zM2E^tdU@KdIF8bH3G7Ow;}7$T)vRCX-V5GeALw$xFi(iQ3kmz+hO5cRu7OJa6%?WM
zpUx?h_;DF3Qge21OLO0}GlAqTu0wPqOKOTJyP^wW@g=^3L;gNlor%nv`#`5ix%-ml
zW{2Mmwxwp_)53^{H30jAHWx(L&~6~eEWUFd_S_{j6Ao%X<0G*p)C-2n%efk+x_9{P
z%Oe=)z;{k?s`!*Qo4@lRphw{K{NDS&h2(QhfXdHYJuELTPl5agn};@Hc9Et;9qy0<
zvJxsPHI=;uS?ge%xb>RI-=FTz+}xbR6&nW!2Y}jP`AD!~tj<60s|t}xq)w95agjeZ
z=<9iYyIbys&hIbzRt*z{{MmEuvGO8PNIv?rZ&30|&L7c;GR*n}89?+Ij8iIGYlPjs
zJ1JS-qy0M@9rhWpbl}lf+0New!B<`*V;?PvmF`={OpBbxG#3;UR7#WQcbV>XwzbVW
zQ1z%CJJd`Vb0|bHSv(v05Q~>TxNK9EB=ZoP^qywfr%!Knu(F92J2)@rIE8sM=pMJb
z^%-(Hl@Dv-7Y=ML+wsAM)HrPnamq@#d|7oqRE4%~f}6GV8#2}|QI4OXgQjasOX-!*
zM~xQS_(xRnRBn9b#C~li^Tk&XkALwlt&;v<(ld=1te^ej^43|2es_?fZ1Sg?ULx*z
zs^J-^L#W&CheWG{JAxp4_{XRW_R<_{@H=6g#eA<6B<Zrb)J>Ms#wmNdaOkQ!2Xz6?
zpD!XKKB^c{eXaKc5{H*LW(_`_`2YR%f7k{}&`oT0t50f+<KP51wGmf0vbN`9(gdND
z?A+HZD?KpG_%!(4vI10x?q8C0@Ca%c<gENZ6taibKN*aFS>&qtiQ1G0{4?mD(pGFX
zVLQY2%@h~xVhGB`fJQwq#Ij0j+;0Ds00scnOH6Sc3p*0Coof!kh&lf2eN_+$n3e_I
z?AUSB)YL5hB>ZHRs=&;MgSgpHE3PA^R3ffRly|?oqWxDshAh!nPQ3rd51W5c_+QsN
z6fgZIYtk4l{aes%1GM4h-*8P|>t94&v9pyhwMWKb@H9YD1I-K6lRv@Vsol>oL2mxg
z36VJJ996#WU~do6h`%00d|(d)_BZ86;BZ8I7NGa;109e2H0`K|fk7smgpSL4b1!W=
zpHnLu7~F4UVTyy!h*HGz1CIjM%O)%<M$C)x^_C?JKB>_V<^*!(m#|8FGXx`_u-PV-
z%sfP~7qkchX`W1k%-zP{hTF=1*;vyF7+*QFe+s9fk?*Fxw(h|2e&xyKvQobK^z}(e
zvxoLiB6+d<LS5Ad;SHZVy&)Sq<DT|@{Otz&>4@jd&?S`5eLz{W&LTyEUTD|#<AW@9
zuhCwGPP_mSiAK{h5QVo*My-E)$%dpj37D-^8k4nkYR^naNs;lq7ZE63=$t1**Df~s
zVco=^n{_Zw#5@xJL1=#AYmPBlFw1aPW<N6g(sx(uW})*X92c0gw%=m$eAG7U>6{vV
z6kYxmvRLq_k|=_C-QCNa=$H4k^L@?nf5cd?7+=Ydh?NXS>iK8&O%^_+tdF-5yIoh;
z(SG?Qu2cZXKdmDihUt(}UFAV?k>UOM!~bMaGq&|zabM)dfMmEXkP>ZvlN^7K7R=7|
z@W!X#r$?1VJ^3oeMI5tEYcor7cQ-;XfB2O-0vUv`W~U?|<U6P3V!tmC@wvwH@ddzG
zlD^{`S=}JFvRA3PX&$q7OrqV7gA8DKyglXZyV1K4x#8Kp+2JYa=0wB|8ow;S1jES%
z@{-;JU+sR)Mhv770QumO)3T;9BFrF9SGx<C3Ojhjs37?~#4;t=30{oQ?i54iaOeP0
zs&SR-S6C#UvxBgOQxXY%?g=Kqoo~iayOZ7<mb^E#&C8S{*}YH-iJMYRw-r7|ARhQf
zc+c&fooqZF+1I*G_z*3H`ZfJ8RDNjEfDV|Ssf~?-^NZ<MPE}!CE=Ko`I}4yt$nEl5
zY*i<MmVVHDlB&}I%2~TzuQw^97uid_-93_d%mB{Y_rqS!<Ltxk3U|weih&jbyc$3)
zd#Ler4iTGVXIM?RzO>_qr?ka1Bl|1z(7~ZF=ppoP*p<Il3`e!o=?Cpb<FbpyxR<8J
zgRI8d*Uo#fYz0_D7}p&XIWzm2q#M|8CMMAxO*#@Vx*6Sba21gbK%cJqk^HFC<+NgQ
zewnH_(Pd+6<7=m;h*MZSrl{@M&ftRQsTTjXa=yw$L&K>7-=tPUL9Kh7;Yw`$L0I#l
z5Y!j}B|u_sy5f9K;Vn|gg<3(SZt9!!%hFKi1uVhxx2oXMS3oM^0egEo#r@3LFI~F5
z%UkKjvZ-*w((lv(D)XI`51)ukHCL%P(?@Hk=-eeX%owP&ii(Q6N858naVsnULqN?+
z^E@W}e8d}aZi^?$)4SK<Zw?K>eW0R6qX95Qlz;r0Z)5w9e4a{KEruV?bOWy5D#I35
z8avbKoJQZ=Hh|*;>=0w^&`)U7|G1Fd_!qYh><pY=XcRa&hjY37_8(0fHfN{4Wq{M<
zru>KQMJ^jzx0|s^dFSZZlwX#T`a8#mkCi`DfLD<rKi@ljeVP}LJGh;I{isrq1#8vZ
zEhq3?namt6#Ka+wuHC(-s6wp!dvs5$PO4d-&09t#f3T~wv!E#u6GsHfer+97c=vi2
zJ2m^%-_m>By+sSRi71&1itJz+CmqhpkRSAPZY)Slxw=(1_CF?>$|&65w`Sau?d>lG
zrWMw-(Yr;9Cgf~g_FWTa@~3Ja;COsvHR=My#^LUDc}2yjUB{)qPV7*?E<c>_yZ}t!
zcJZB^tD~+J-eqq<+c-4M-aC}b)}N{ByGUEG0MDm5x%JR%cX$>`tfs!{eEbsX+<@st
z`tzBD-R+!&I|&sjDa|Q0&*RsU$OK-y5S>R};{%iR0b%)tg_huEiZwbXpawOhb`8xT
zk3JHpGCt<G*P?LP2bDyJ-&>6<f(<+n6Ccg0#7I6c{0clChL1E(`fftR`GB+nFax)g
z-ogT)-uTHQu7-mNPKuPyR~x63S9YFUyf_ESU?jxXR|UK%QBl<J_xAQuJ&tqDrNDS7
z;V^!F2nql$jf$_ff3<y{X`rdDsp(FB^elA>8u3E@2g*`F(NWP=zhSmt`@Olj`Dml|
zN$Se-GDr>suL*os$g$oFiXTnOmf_m^9-CG1{10rGwo3>xtWE<H7w>~EgdN}D%<D_`
zHG#6aYC`!VhFOl^XY*AHZmy%o#PJ?YW&);{S*Y=(E#z>S^o*1iLxd`0#4u!-V9*5F
z=IeBOt?otbf!@7KVkNNg;v1TkZSBU~qii+~x%Q(4TVI#eVTaluVIb@*^nToxzrUq8
zkvlL^pJkUgN9q6Yncf<ri~J=#5gbSLet<$iLO$z|T)VIf>TD6U`NX}moW~w&tG8Q{
z1=HuTrlq!*F>?1)zLhRjJ<SW@@|#MP1D=w0)5Y@H$4lh3`%qfu*Jl9aM&Kqv3Uqs+
zShIrWO3eHI@Pq<Rv&Tz}78dD)9S5~YtN|t0@GWns`~q((<5zrndRRG>r=*Nn2ovzK
zG?C85Xu9F8ikg}oRA5~-Cts;m*xud_qFR3P1n6mLe5q!FNpoU$iv!F}M+EQW9Ur@>
z^H`2-A8t%CYL;_wbJHi<0WeFW!GIqOu}&wLap`{qWooLG{D?mY=^;{prZboWLqqNO
za=A)ifZw^FZVV_RGY!RV-^V)(Gr~BVO%aS8jNlc)!B}q6PrZe%9*cDDu>amhqL3*+
zEi68|Bc?5XbAu_CCjT9wSKjMBgB@LMC@%y$wo54eGO(kqsi)l`%H1cgA-Izv&#CkO
zwfELhS$$F0=mQ8SpcqK0geV{_Akr-*N=gfgpma*7C?MS>B_$2gk`GFYh;(<iz(dzv
z5Bl?c?-=)w`}a3KhGQrKhjaGXd#|<EoNLbhmh-gODxr)Z?+zLrgI_GC3a@8_>qqB;
zD1H6s98Rj3!osGG=w{yrW9E%F<w=)}+OK|_;Oxjq{`o3qMZlIdqtU|HK~z5Y_|0zn
z$zi((6;t2)HaM3^rvq^;gXe8AqnfmsKt$FRj`Oevpi+0OA`9x(>(8#=P_nI#P~0Yb
zSh52f;7H(f5tZHb)0#o0-5POFUU_*8Elb~eWNA%OoD`BR?M>FuD3r-bUOah`lcbY=
zC4coxQuYUUw7@{0HO5<MKhC5NI_-7}`x8=$;UI!(4qcHT#~Is(LQGys^_HMne{ExP
z&V!Acd)~G(qD>Fvh8JGrdHm|>nSqg}2zsresR??(X+4I<U-p81N6f$!5soPyRG{FX
zzTrOS0CH!?Gv=J+5qCg9vCi^3bFF0@lh&OVmRpNYK0MUKvS&xHJ^No+de{T(QY`yz
z%naK=LtsDY(O+~f`mC31uFn7TQi4)`-g|7!Z^K&rJk%{IzvShL9icEfpsgqIc9rjU
z^u++*4%kF4n+Lmuuj;<m@Y45`x<wrAqk3FL;@^(fe%G%x62N|6;BPFdM=5rH)MeJA
zZp1a4&#HHW3b-zwNKpwbMcSKMpmsvtoc<L6{e4pVkqYUF0QGbd7c!^4S3OT<4MZ`Y
zH@r{}T`ZL{HoolAI)C-d$o40ALE;~KrF5*5L^cd;)CD?aGtN5>Ga}rz#%(Kk0%1Ez
zeqqTQf4xZ>8sM#<s{tx7j!{2Bp=88NJ>_%PLaH4vcy=^dcFsP71iVB8l$H&)pu$!*
z6u?I)z0SY3Zg0YM%@ZbDa_tp`->9vvj(2%^EGZrH7Tl?=@7E;~(>mtSy8EoTX^NT9
z2}b|T^8WZasz0^%;H!GA!f*7+B702odTDW5FP-?rn@_g*Sa5UkwB+Q;=V$tl#D_*c
z_rFgp_ZaF>p1<Q>8VA5ac6lyFOu`dlbtRleL8L=QSu*^=iz=gJsXDwQd4IxsWADXc
zg)DN0mPnve@igK_7b<Gp&sOw>lG^3Xdc>Gl7LAHGUD^q3`;2*?xaQ})Bw&+*Ee7y!
z045Ul*WcsIR#nh+JBbX*OKLV}h2OkFNpc=5;%k|bHO{?LRzpKpzo~*Lo491t%k~5y
zduo)tBEilo)|&?!D8zAyKgNRmnl}=NTD@O>+1X9R^SyhBK#a%ycB}^7jh+OMvH;+b
z_SvbLR^QSi>(7%9m-yQcoCOUni9jlR=OCh|5fOWba_~PcKxktuJC*)~QHcif9(jX1
zYR?(o8&;<tJ2m-4%fAst#WcTw>1blat3a;sP{8n=H(^89rJI)^0q{Cj)aU8EtfWnh
zKN=ONSNdKh+qYhl<V9I8koCl27LFvyTL<Y}4%*pJe@fkJeA#bSU$na7reKaTm2k=@
z5elD2kMOP6FUK!F$*!kzz8-9Cz3#51rF1vNfQdOx^kb%q-s=kcGkJN>zRL_`-8)V9
zGNNk&`nH`f`Vyu_N$oV^d%x@|^UqYf8XG67+VqVB&LAD@OY1d-kAzag!b(TlgAVQZ
zL$~%rE24^cZA`^RWRv6KiyPyKDs6yP77m0T=Zy62vX`TL#BwM>8PRvF=g}F%QEF5u
zF}}bZj_u(<+J#?A@(6(Hsri3<+e^1sL}hXiW2ciAPqbgQMxI!eJ}r^a&UCUj6y^<$
zQFivPu9eN$nS$8eV`>g*0_0^f8Y=4|YuHc}we06Fc=v)9^2YACH&%^j6=CI2IW=kd
zey9t_Z}#O*o0GDi>`CfRC2UUZh<@#u#mq)P6Ce8N64|&WdZ!0GDVPVv-1=&w9F8{~
znszs*`sR=|yeC^d&0FU6U6(`&4Ruvo7`~lSvLwt|l;!;&J${h?lw^*b_bLC_$1cTv
zk*6iJ>r+!T`s;7nDAJ@M7KgtOPfORXq~+py;2r<^I#KhZ0>cE%P)t?|r=+HZp?)GE
zxc>b8`$Yn_e`r`*l{FVzIt(C3w`Ki8Kbowt^+Cp0B7%05NAY@@3AL$&2x)-Cnyg+=
znSZWEim!OXZjg3mf(^A~vozuH%KaU|-G*vHSIfRm(UV!^ZkYW?IPZ*<6rZ_p>{99t
zn3D3~;N^ZN_fXtFWkzR1VJ)hJcjH*um+$y)2vm0C2-rw!zAqCW^mko=yd3y}DlV<x
zhwK_KIUrj?ld%x4ZnKrYYbojF({n26<*{(buCyB-`w0p7j<OaBC_pv_V4yB`1JGU@
znqiHvW5CHnW;hZs#nZ7Ne-WTodKXgaAt^@lv3mgYxjcd;4S(>K8!@094IVt4h6<Z8
zCLiNbD`tfa8T(--KOS@LiYgMRE?}`6asmb-k)l8xpqs?+p7U=t>9ub7ET@G=)P+Z&
zaWMX~%mn;gJPpdq@10UYlIyv&N{!Bk(T~!z)Yg0s=`fhYN(iVQTI*iwefdNy1aQ|o
zjIn6ml}<x<F>h<0)bH+$9=QdQv*#nPpfhzU$r?Si5;YR#2vVt_4#Ng_f45+Nr>6O0
zG(K*8-vE(d|2iT{UqLHGf1s_WVEo9_zIUNerc`8AZ7ZX7N=9>YVgQ-|S}2=vDJsid
zV?T8H^{g0h(?jjCvzK_k04U*ux21|oynxv>cCZRmfj^n;?fh<iZLPZsnrl%LeptoE
zZN1Fuw}+@m_(7LiZjK%R`X|V>cz_w-pf@)sGlh?hZcDyzG)8NHudTq&kO0RR|F6O!
zpA<7yfTpE>Rn@=AR_v%gn-2olY9fzLXfi9D-TAR$r+ZX#8?<y7o2J+8?GxIou7W#7
zTK?=AD!=<dO<}aKpTQRjeI>ea-_CegcsfHtU06ll=WgQXJ0BJRbB%s7Q|R;UXh1Fb
z@(V?%26jgA`qTYL6%%7F&EO?#{q7f0kYn;xU7--ppjGJ&y-iqqpJVT*sgovLjZYPu
zNIs~XaG0rw#b<b7Q*&t~&4+A&$#x3Ib}Bh(19}Ql^&dpMiFaM&tPOf?w?KH+stZ&-
zLMi$dca9(PtdHU%rK~T*$xzzO7dkQ081B-=ra`vKHLe6@^3F;7&b#gG15Sp<e$f*j
zN796po9}^$jhv&nzT3h54<FhJeJ0wEp}az=6fg3D`xdl<=)3!*h5!q$uoeQzKt<0)
zOU`ye#-zQ<VF;hg&9-k)W=|5PV&mtJdpU4?>>45SL`TQm9(Cbe+e_<mE!)a!C=#V9
zLWODI+Wcv8tkr-M>VyEMSa5?5)9ZJkC@4EM=z^mM=ozLdmXDZ0h3de=18~vnYKN13
z{r%A>Yd1IK*Jl~|`6P6px>z=33W9C4VRRxDylo|FOx<;$Ep~8poRE+}z?ur`LmIh6
zE6{;G=Wwd*{0kQoO*055bX*8}Ba9$)bO+!n=OSD7_kr^p+a>e4Ls{Je?CudM{*C5s
zkr@1p*(cOXCS<i;sx?b=6_iKv6d5NOGGWYEzy{*v=RX`Y&QIep|8TI7o(L*gT|^8Y
zfzUweHGvjq7gKQtJ<_-2eNauyQ1Jhx2h}GyMs;j2H07vxSCzV*9KU(<M){QmOHyX0
z&!DYO*8<p80Ad=>{BiZ(q*mY*2<a``@_B|0n5-@>agZz3%w~6kl9W|BXq(<6rrAN<
zgbE27g#%S=sFOs3R=n0ux9<ps-ehJ37PcC108Tr1GnQPgEIsrxEB2nT3@i-ErlIbs
z@Su`Pmz9%ijZCuT)N*PL&e;sedF#<(AbaI8<e8n6VP0k*>~Wfjzg^wHq(C8J#2)JA
zJ$$l!VLV3a$ps=RT<UWe4%AO#Kq<kUuD84UR*f<XsB_A7Sz|t%L1QfCE6`#BihNo&
z&G)41wvKkjngTrHpiEwiCsIoAr^5Bo0*)b+kYwBPN(NCppdshK^4%a+<*8mP74Gfx
zs_<7IM4->2@-u{gYhtH=`Tmda<7Y+tg#4Tw&`$mGEg1j$1xZs=W;5(*Y;@h|FU6#8
z#lzX#gzWDH_>=x{C$e$>a3|_E`JtpGi2nY>_9iy}rm3$h9=cG%`}_SlM1fPF%m;a+
zg(o%I_7!T|lPbd6ptr33StHkjDq5Cyn5Eh7pJqDIov2nQlr`T{V<OgP#f@rxqN({1
zzkkJc0Ta4PcqQZs&v`x9n<0jySF~hkP2{RoIRhhOSl&bZcu?zX(7m)1NY89?ir)L(
z1<7yy^rfAHhL1)jCgCi#oBVZb-EAQrS%rm)6)ZME7yF)QOCiTtabg>ELn4TbO-xLT
zjZ^;M?8Nt)e6PdX^Q@b=NLeDCn5vsW-%5L*R*G`IQ0;PATvkfz?!C<1+z6LP^R=mN
zFX@qz2syGseNQUKrJuvU-Y@H*dgFDGEWiEfQ!LF4&fLec>N@{4VuNz^>hy=oNMiku
zqX*+8Ulb6C&*E^qOIu(wcou!2kqSU|J^~400AlcAhHdh*n$lYH#(PkKq$vJb52_{M
zUhryWq!o}=6;)LwSs4LfTXX$?fbEpa%FnGUN5D{p2_yvJ6dQ0KIW`7Ex_O8~aWYfI
zKV*cGG+4jmH)FQzvEL3*{I*%upsSFnT<i~a8fr`cP*lK+yhIo@#hq&IhaLp*7@u`;
zwXl$P4rSPU4j?!2{L<2<V|8cw+jEmxPY2%kV}40br(0<FsJsg12zR2<RzQ>zZmPdV
zVRr$MbyG!q0D%6|L+K?>k=1si62+;UwwGaCVS8w4!jYaH{i1;khq6<YmbK3_nLqA}
z9RSAg7Fl!A@NV=eyxa2OZBi}wmHD_oP%us^Sh~EQ;SLl{&dxS2XF=Q>2iF51C-814
z#MlGvtmq1uY400vdRur_2^Rn$umG2d#K~%%**HLJcA+vricD9FIsU###SR>9)bj$5
zi$CTA{v1&qj`bU5XwkMw+M30vB2B3CUi{rJv31E$C?3cb3QP)g=<OMzJLmKk?BKZm
zUSffxu25F}j?(^hGn{FU(6tlKWqPPt`?n3Jv>P2QLRuTqis0A(kGuNn4|lbm(lAdW
zzbPIA;Xd<?PQEe%jXwt#s<#5@4Q{$4U*I4+JIjfcI3BU7OH)17O=#uG0{U_h`W~Rl
zo%2LhVqYHvy`qAv?L-TTMc+(p?!j3H6=MeCh%8a<a}W1Frq|WJrAT0gga%zA+IyS}
zJlyf4y?hd!6Fr$k#=X@O)#bjk2{%jku?KzvukNbH&H{&Kk+i9>6U#JS9cxf);PijY
z*x?wrJ1$dAU(|p9h*;Z~2G*@q&`k()t6=z6v??j2HnejWU52NxP7I##1U~~1bT{N+
zY~Mr)?#g_eWz)yM@u3Beu#PPzF|2Ol_nvd1;xk>%6JH#?P6Kg|q<%=hSoO9(rk{yW
z`>ysObJ9Vu?Ha?wR065}j<ucq;m5E~N08#HRH>2)|9tkV>Cy%+dnN2Q^0$V~?BLix
zwwAswRiCA+_kfYo_t8t~bTLa<{wRC?$@h;spN(Hu;iL+apuxPX2v)P&XfGj>a2&$0
zEs{BIvl#|T2CN9}_v+>1?6sZLNq>9ugLgpa4T_Y@Okht{-_F_hjP1P=KSdC*Sa9c}
z?!^Ex1%PIdTpTW(d61TvYC{5&t?iqw+$`k$nh>!HkAfg71qF4@f&3X%kEEj39jj0M
zu!D4K=Qdk-$-y){sHsS_XC*^0`Zb;l-$n5Um=kyoB6}YR&tQVyDM~%RPbvI61q#AC
z!2{3)Z_83-+B-UqXnqEwstI#xo?_splEXzKoF_8I{%7-buTs^~8Ox^<xbuO|^!Gh5
zUc=9Coe3@3{vb;m+a__2<AZM5P(yoXL%3%%yGY8Bk}3V)MB*BG2VP@K7@htE+PP_c
z+IDSpWNgfBV_Mo0ucSxu;s*bkRk_{`eaJP&*2>hFieb9|)x5B$%)XQb<7%@|6KJvo
z$=F^PUD!=}hb~{tn<RS22=NKGnv<!ayc1WT$#Sv|fUZ`|l9ZG>1I|(~>|0_dDlWY6
z9FmPuvzBfDwsWxKb3uIvtOdRY%;A1!`d>T9sgaS_bJ+Vcdt|w+)L<u^DULn^M4bn5
z1~mNbU6{r31U9!IaCkij3Z*HRCVa=_X9G|Ug#v}IH<ahVF#PCp{nB$za&UIy3CzyP
z(Iah~u%PGb0u50ZplfIfoV&BM<@G+WOQ}=2OE9%_0Hy}=EClGOQ-k*EaI8kff!B^o
z_LJ?GKfwQBz{JeNL=TM^_L^`pz5r!ND5dz`zxycG2hhT>7G;QLmq31}<Z^WnFuWP=
z{+(e&GzBeBK)!mJSN<_$Tf^NLS>PHyDnc|_es6PKYJ_EOk_%Sm;)c(QZof~b$AA1?
z_`Ro?(t?ask#H6ZH*7}bk{$5-QBCGY+QLAp7S(2pk^OETeB%oTb=yF5@>A)(X5zGS
zRs*hvM8fZ`@@vljzvcZJ@LgK*RG94D%nVXb@wmPGu4+@fU?o`87GhL8(C9IB{^?b-
z|61j1JCn@+3-YJajfnLh3`~{&L`9`Si)EOm85@Aj!otgCm>ZZ7MzASR!TA4P;^~jM
zPkI7WiKkQ`GbFFNtcqdfHOp6m1_FDxUHqM}%jI@bD4|$mnF3{y%;V$ZK5ZxFwh}`#
zGnGtbI=O_L{QRcd?;?7Khxysqf{p0F3<HE1VadVQ>T}&+U4TVIee~AV-k$U{RsFX%
zzTFQ7v$J}RT*!X7uY+w)VPWWVQtvwI%CokW@zS6=1QRz7vI`^QnYG52MVGxJS4$hO
z=V1A6vWIfovF8VxS}}_Z$JGRIRf04WG?><ZfX!1K8$@&60ycuK{(dkpN}XF-w}%NB
z<|VGLuiuFXW0|!oukQhVB9wnTkv40lrl!(Je0QLOi3Uc;#C&~A@yzGn5Ya0F!aTLE
z^(gpW78IHj+wGt{DRPlJut<$*k|}UXFSm>#!gtOAXmQ7|P<R5bT!v%7h*?z0+@}G)
zd$q7JX~fG3zbD3m<h?`#TLdCur#sAA_^q7A78buEsD3`dxz*rL8vI`l@@tXk+4t5M
zqcx)H{|cu1x3IoloxtP4havp&Pc28D`#(a^t%Y;{L!w$<Sh$o9)~fk^K<j$2qRaWv
z1ikls_hN=b%T(N&_SYONz`!Zc3-xkJ$N2)ESSnp_q88r_X9p`e10y3DM=k)MUKWA$
z7ewQIVQ`ND{=@t{RMBa}&;|vAK5sA;D{dHtUB&7&;DlfgbU}>~u-#3$+cWBr0Pp5h
z0k&hY_y&D~#iJ6oZ&wq5CUFXb{@EH455Ajl(Fs^W&T4tRr0g=d7SwP&4$y}|m|BNd
z2NHlL;!27&TnR>N+WGf;nCMNUHMil?d(QA1W7}YEkOzmGf1I=dx>Ka9RK+8GVcL;O
zh}#YncW~uJVfjJ0rLrd1%I3f9Bku9_S%HoxKM9_v1xqj=Sb~KNpEvYzXjbYRT=In2
zl!HW4Ui#4Ydk6=CcdmVtpiW9o%A_BNbIK`Tss%+^qr@j%$voFDfXZ00YJJ%^zFDd%
zso=*~V)X+gQh>^q^A-qK_Wa#JBIFM!f|}+w1=fFYpOtFH`+NLg2c$Vnp8hr9mAXj0
zV2(T;WMC}D-ekPp=vQ=$1`7rnvbMq~8P%?Kyw_6zspi{|U!QrEU&#)>@9poGdU_p>
zQU)0(Kjat}Ebhn`e>s!hb%)uO4xbcH6tbxjjTKubNl9-LiyP$AnUKF3mB9K}r=@Y>
zRE%XOx23)U)pQc5rqd@~ruq8{m~_E`HmxokG8Hfhjcw!1354i$;aE<94iY;9@hkAK
zPg0@>Ok4{|dXnUUM9E1lGPU%};*m;r#1uDBZ%sS5?D@f$!HSS5Sp5M_lO;tE?pJKG
zzoRewkJW)t6JJINuNd&Rmz9)AUn|Y{8pSKuZ^E_sn3^lO0~bv>8J)4x@9FsZvS^*(
zyliMe5;(p0fBD71fzq*Bm+;!6GmZ9TXsGUxx>rx{n!13i;V!83)?6j3>tzv~vG-Z*
zFZ&+33Tynu4B#nWQ1pL(E>@u9ftl8|u(4XNa(lY=P3=Z&le=s_4|P<~#e{z7QNdf7
zyuo923pWrmQs)ge1A-_CSSC@e`UPqk7~rrWjXQmxR=y86LI$kp(o72>Z3gbDSs6SA
ztnB?b2QktnOE4&bIQg73a0(Qh<)7p<!$?7_!8^BK0Bfb{7H3T$X+SrwC^-k{wddwO
zB1nf-MgEqq?u!*~yekq3?4-e{OfiXqvCC5DC-C+#2^|q+5*Od`SJ1*%dWu#~b@A_l
z-YSTR?>kR7hondb36?xPP3EVgUhUfP)pkltR-K@_<ThuHuB|&oq1<xdK<aXGw6nS7
zFDy+S$8((h#N>grV;pB$W@aW5d7^&(1{2nLnG6PJcp9PV{wTH9rpEJkj1;H_svruK
zVrRaZ_DQMuZHd2g<@7Il?Tr8QVCA`iT@jHV_kzjAUL|yULfDYI`4RnXY&5sT{i?@*
z$4}u2QmNYWp4X`(Ut%`9{Czn&7yq%PPA>})s7C+q?{UblsW}Fd*NFHA<NRt`*H7gT
zO5`?W{{Z|$_6NQnIEcPI@$!%7cVNGGRw@b(>4J8v*IkY$2;FwS;!=_l$ni0l<&B{!
znb3z|c1tRaun&3Iii99@R@HvCQ)nKvxc0`5x69;CM1+8p$;(^1y6JcJB0b{Y8F*R%
zoGONg<7SyU+p*QOzt0XrEoglcN>dkugZVnWmq2y*QeH4VT<lT?;-UAyUb_YZ=$Que
z;njs3l}XtZ@aHJh(9A&G_Qo!-Kvz#2g{CG^q?BLa|AE|TDf4-=8@hv_t)AtSYdEZh
zT=B|VoZUA)%OQ2U+iAipkD=dT5n5)6te{j|s1N)ETI~@LYZ#E!BH%W_qtc#7LK9JH
zt9)G3X>GTY8KH*`yCTjP<yK5cC}(uXL!ifpv=MGBtq6K;p-2dY+4?&G?pGf;X7AFt
zT!PPmB@t?dLA6HpUtS`g3jf<32hhIyjRa_&sGY<a1JLUyAut5KOIp+EoG@BOe&Co7
zqP$?6kbFiH%G>=8z6V?93MOxwMjE|0t#V1@<u6~~zWR<Vb?5h_`|<9?;_q8_`}F|P
zbgB&g{;$;rs&K#pB(O~gohAGO0!{4HJQVs_W#>Oqkv2IlXo)eHb4sa{#9)ob|9uAC
zCIR=B0G00>30fx+=Eq2h)Fjd=ER<gQD_|_kepu}^HO~T?hch$uEbq<F_e0C8FTVV{
zsM1n{Ym4;qY1SAU(BAiV);-9AO0CH|fUZNfDe$+n&+3q~#R(=2Xrw4+W}K59{H`)M
zs95ug<aYljyPBti`(B_GgLy7e)a~$dY0~2~#Xlk(o$hfwuVo2Tf#eCy7icJwwtKQS
zf%IZM4}I#lj$M8n?VmBj&Y?$Fyfy^P;fJnL1{)eb0@ilt<7t$v&Ve**^^`8RiwCc-
z4?Ya5inSMX&*(Zo10{;9xNd<8Qc0ixMSlc?W1Ch^2PgEE%o21H%gT^1>Zg&HchSN3
z!z4tJ^<9jNDpnqWN(d*p%~0&kl&MblZs;H|y9i+Af)KZ?y0&{d&IDe$TpQWD+9?@-
zsPNxDAB4MtZX)9m+wF8m@q<drnbSt$lH#rN-zzYP*#0NSJV(AEen_%dO?7SUowi2e
zHS#;AreCq&MbHKoKySzc9u31H)~jEji$V7d8Rx<`j?>fsiy)tqa5)DUB52NsHW-Hk
z%`a*4zd67B*HcfGGD`qNf+gfeS~(lQ86c%onq;>9gNGm+i&3(TY*K8-#e&;Kn;Kvu
z(_dY<XKwK`qC=biSNWP(s{45p%IwNR^3cid`Ml-h_`eiccodYjUlFC!J=lbei-TVY
zxIsCuqdR{%HTCCe_PV%&I?HZ7;w2)ryj-0)5VBZkALxTE_eZ`8npIEIhZ_cZPq=(?
zmPkPb%inXrSn3o>MRcMQeW?@lA#p%(K5?jns3w|4J~Jv68nMX@&4Ictjz`4xZ_tDv
zmsMvLm)!omgSh<zUX;<R%Gcas0IG;pQ(7Yfkzskz@C+07bFxje#Ok>%SN~T0hpGR#
z;DbNV8aHQFP(RCnS@X-(oMEgedpd&m_?2{L8Gs6RQiz@npmDG8vNc6(FIcRK7>yvj
z1pkmBjGHWlr9^JX+P{~bnnn%)Y4fh=bK(ECc=vCX%SSsp8LlF>yko<=v(Sh-kboA=
z>Fhu5=2fYA6$O*N4O;Y`&E;oc2Phh&8^VS7gpYpIGc(DKIk{^LgS<`3xS9g!%N~HW
z-GaYru7(XLtL%eJP#;(TWiuw3W@PWXum)g15R!Ru2hgHM+<He{X!i!NS4@5UF^KrU
zqZ#5G{rtSX9ifX9qY7cr0^c!C!Gx!@hzUGKhK0AT+sDf_i6{KoBUwCR{PJUbw$?{n
z`wHamx?@bBuX>>gcqy<q09h&jC{?v01T+T-SuYH`wulSm!G9bo{qHUAceHkXVoXdD
zrMI_tQC3X^zQ(BY=E2xW&9ZRK?j$~_7?@zv0P=80NnM&wG4_LrRFN`PBD!0f@(MOy
zKS-I5xmG@Pa1{_ZVTo0QP2z@G$YX_B96)diRWw4F<e>*I^b;|By5K$eRyPu!M(3Cu
zGWpvJZ;#vEj*Cnno<S3Mno>Ac5G=Z^reZUvFcX`B#PgHHik`egL16Lsja&6!z_ib)
zsnI^XezuwM!l`nkyVM6}VB!Br{Z;n2COuvN^$h|7CW5PBd-8kO<9RdZ--(zp>>cZn
zx;j*4Ja*VVG^aQmGKWrSs&is<LJ{57{){xDUuN0gWfM`Lco@ehQIk49*IDDg2HOtA
zdHom4YhOSl2IGkoN-&ARd?T!VQYK^#@JckRHy$=Cuj>trm?GUEqaI9=KbOM1s^YOc
zQdqUB>Gdx>2DKcAopeZ2PE7vECv5EO{JJ2)D^7J7{Z~{UFMv$<!w2$pNy96D_UB;m
z3wJ44Zo5DBV+kLasB)sbCO7_ki75L1nDeG8a{p?47uZRG&w#dSgaal-lq$Yi%qz#s
zgm%o#4X#%&s~~smvQH+~;IVxNpSE7hn20?0&X`5`%S%kc{a1Nx6evm;REK}+sA0{k
zR_K*i>O3L;yVnML728)A2VHpa-n<ZjAmNf07f}|j=|{RDQBpuK_0>G>(WSyWgV~9U
zf&Od|1tIQKh=*bFJ+eBz%~USviu*ZKsE?mVG#wA`r)sNLb5a59ET<3V^^<T3t;XpQ
z#9)Z6h1X27X>I*QHTXa@>Eh~Tetk&eB5au$-TPVrlv664SO5q5O~+bZ^mW}MXjU{;
zOtzLjJ_Du?YPj5ba$p(k_w4PtSqqU-Cu=899v`*~9nKYT6Vuzi5gzwrt@-FDuc*i_
zBqW!3GADPUv>b(FC6{G!G&}ofn7R3mG$ktW<Wd9OL?+s)Y$;NysSvnua|g`ArA$hN
zC)0Lzc8QR9r3amwT#c)fQcy(Jym%k=gG$bHpm3%+gk}wSB6#EY$B!Se<~0LjV^B5*
z`vg<dv%JlhkV`8^Pt93xJOoSG-Cb@MS7>p!^rPxSgT1$0e$mKJ7?Q*Fu0Cb**E+{=
z_hnMr%=%}iJ#Ej^ME99z^Kp9kTEkLO^vW0#nV0vt3ITOFumyS}j1W0p3=MZKo~_!O
zjqTi_(bUkun-X#X0<tH1f`F-T@gW#=!u?~QSWc<>Y7$`dxUU=YdYaC?>IEAq@c&|C
zV;g1>GSM?H35#G3$_X7QbFG*3X<rfQ(t5q~E`4gcC9JS}1L|@OKY78X=3vF#E%rd*
zUXm2LU<#}<=w@51+PvVx0&@<Ey&h1KSQu;UCMyGn0k%AWLVfjqbbtKOL4!Ovd(;2T
zKNO)<5`H{(&W%m1CObc0$YEnz&TU_5onJyiVsU9nXn#IM%Q%3YlwiU8TzFfxu>(!p
zH_ozUb2mjm^jhuw<;BBwE*dHgW<AMH#oz%(2bwd`=}GgVFU`6xu>p5f6UxD&XI0Pz
zAklqXf!4y#@=Y2Y9AFQ8==uitDGXLnoI=TpnJC92NlF)GwzVr%nvwBG2RnZHOJKey
zm%v{RP+t`fbX}ll9We4}k4t3p$v&A^;x^-lqatX3G&VKqILt&Oo*d4>lx-X9eFGW}
zhrQ*1IOdz&=Yyu#<$+B=a@7YrW-5GC?LKRkq1B3>C(bHexq(cer2;N22<W(Zyzp6n
z1DtZ`CeGvj!n(uy?FuAh(Srjukfa0a@aiD86tz|q=EQz?{mYlXKkDG|&+7x*Z+mTw
zm3WlKvAPEAmxC)fW(U6L!`$$w?-Fu{2U*&3TyKJ7(*;68p`$u@h?Bn2qxw>ZQ9A_5
zjoYO~H@V_dTineG7ObCR5aH!`f3Iz}K10y?oCKd#_{Ng9#0^_{^XW$dp(NR=SFnbQ
zxW#!^zS#wvS4%yqxSCyApC4+rH^trita>5#eyWrL(Ss0YAxfe*H*1{tYT8@F4;Oc;
zkht!5IU^nlh$*u<{T>$?k~&;eMOHX$&caMPKCTh@K%00tC3Li~b}YD(QfqI&O&N5u
zw4teUxMZ$H*qB(x81@bKD06vj4dBAz3LE3ahx%cv%#{{>`8x0s6f_B^rl;!a1<f1?
zL{3f)zw044g_ozN=YrZI*-b`9MoQ=3B39daMCaW0T6IR@0-y{I4nvpel(dfr<lIm_
zO{OPKPOW3Sa=+``PVR5Ewbu=4A{U&+&|gkpll7io6skH&$rrRu|2gkad7*a51~s}H
z<+d#%grGzX4Gk?VErDlQnc2wxR=(TF_Ix~_9kd61G7qWx;W?aiZvm32P`9&Rtml5(
zvur@HN+|$J;<A$6%3ukY_kndIx$CDN(E0%Q=1}(_*IM<?=x)`X(Bbd8-8p!UX|Xc1
zDnSkoYi=ip6S?O@MP_>WI<5Rp1B%Nf$o(VAlXUB{wd&2;w0xxfURRoGLbcT%p`76H
zYLeYp1!^m5tjcyPBLA3maC|dHHPQJH<s|2}ll!n;&}~m?w)1}e)1xCdx3$}OTICx*
zuc@*MoGj|ofI6HjYC3E>#k^|0F}A2(2>E*E`*K<oUyGI_m(y)QcTMwSs`9#lfdOQ>
zZQ9E1WSTf0=Z^9dyU~jEeLvPP*1poOW5-)VV+nmfe)#oipFmRI?mX5ipjQ*KH@h@n
z?zTJKHP@c#X2)h;wLMwV_v!*+N{7>3C%E&I>*l{!7;dpD1>y4^yB-XU3BtZGR8(m>
z)g*MhZijWSI+V}v#Pd0kqGp@ZBr(K!(|h1Y$H9c1oX{G7Ygp4^Pd0})YejX+sKZiY
z#6#~fL0+f5mRCB?dow$uSgfr3MXcI2&jVdJkT7N>?LvEtzuQ`4V$5qEYj@52AP=V9
zkVCs8Wh^idc9bW3{i=!OsPA;`?1?Vh>WA~WW%&>0t{zP9E$?vIZ{l|FIlO#1D<tI9
zyQjUlB-E3SjB{9TYrXbX+eM&Uq%8ZQ&L=2k1PSiX4^;<rovfYrEXyui66faTet#n2
z0wytuhtqWH3OmR2J0g^uB4dWHUeT4atEZ#4ldeH=@e-4Z2L}f*y;|-EctF)V6I!}J
zZTj%0!!{uW|NBTsk4$K!O*z<EYjU##W<zG#=g*%(rNpHyyy3O(_4>Bh`|uGa=X|)k
z4goFqISxWNOMGXbIO6;2NM*Wm<7-VVtzke|evf~R=W~?CK(Nh>x}6*m3hqAJCuT67
z8ZG=<ViW5_{uAomV@Nl*Z$BZ3JT?czWPSa#mF*AUOjGiR>Fq)(>^|k?<<-^k%W)94
zuE69rwhLlnVfmEWRAV#{^P2ca@@g3@l}}xm2)5khdx?C;STg#l!(b^5-fgJ%;vj7J
zvH6#xgL#QY1L_dg4d6FePgL%VI+?FG!_CvJ<$!2<9hBl>KLg6f)J%QYC;O6Dur+^*
zH5#}GTLdc$3&jE5+vacIzHKe_rB|lYArM^*6{_GOoNn!_HVrogvChxWZxoGw$lmNJ
zOd9C#SJTm1+a`hMik8_~9RWq0FRn)wt@sE;?K?&0YoDMP9YACB622c|*lKQme){Kl
z0r+txiIQb46EibGxK$@7%>e?0`yE*1@t~F0(D1+pjfH)BYc#Mo2+L%2G*HD6-g@+`
z5;o$Q<4CpB{rx#8w0!bI-v?RGU*N2BDb)B6V?x*{^RJ0!uy)qp<9ce8(O=+WAx#4Z
zeh`=;j{o=D(Bts`IUq%O=+{&XgU@vff%p{!6sh*Mwnr#=eTarl<7~8z5FlZ%?i7R)
z^e@US99mV{Fv~5NWR##*AC_=DtZB;&j#-qhyA!u={1xCDQU813a7QWrr4>+Bzt-hP
zzjh#;KkmE))8E38cn=5Rj%o()_0x~k$jAPF|Mh?FOZ3l#k~mqYZoxbQjDbJh0wIDW
z4yJ5fp8;oYZ?A8IZrlLcoScccueaBOiftT5q>JIKM=5#2!}GPfu{Hq+(nvOfB}woe
z9keC$dcG-DqVZHiBdf;<Kn7i0!LuIFX^G~Y^)^sIn^JAJ8c0NCD_uH5%zGP=)kcH!
zho@*#Qvu+jyK}=KW}35`dza!~6b)!SeY%npW&;keKxZ>HHs-|by>r$BM25X4JgeYH
zA>3J?hNQ^qS={9$LSId1_V@2!78tV0dVGE$%fk}I`uo;5wkpdhi6Pd_J!fYub@q`5
zV0-}0wwE@4$`uXB+HUSBz{~b8Dv)s#D1MOJ|3IImgV6mLW{W#@^UI<%#XEy?07>>3
z$t4P!mS||YS=!t8_qyCAsIxTw1zCLUv(QWChK4Qlm~86Gtq9V<w6rwvafbd4J3C<B
z%@yhakA3B3Aut;S1<<D;QneC|0KUzjl@W8quK@HgE6@{xN{6F&@_cF0v^jZz*E$@~
zq74A3(a4o}CC?DSbQPzzUV$0wnZdrgd<D1`o3mP@vD8}ehuhEmc>}*BAY`w@UL=t#
zPhX!;YPU$LmeQf7smVy}S7tu;Q;)rYrpKtH=t9(<U#U$x#|_{Pl+~T9`>dVM%E~Iq
zO5&rvUT=Me*l%5Fam#Td33Aek3UYHcAt7W{l~#VA+`ZdHCrRjAj|BGy?(*=~nV+SH
zb)B!K$z)Jr#|Xu_i|#GqhqYPC3|C%XFy%OpJKEmctFpmW^4L>!VxcR2ef!9*-rUsG
zhHqo^YrYoCj>F{oZ5UF4ZL&u{uI~e-4RPhx-iYgP=UAOL;nWLg)9qjdZ->w(=WSJ<
zC({cB>ak`Wa{E|P)H{(F79ltZHfs5Oehs>7=UkqXUNBa9LFxq*MpM(=o!3aC5?&#p
zv6JxH9vXpC!a55esI3}V<y<0!I3=uSZq7RJw!k!I@p;IxZYO2!Ud+8xgumKGtc%dY
zcpE$qDl%%Ta_|9E>>j^+)}y+*S|a6=*93gNk-3g)q&f@hf!A$pB_sKB__BclLj<0u
zXt;24lpmkHcvEDY&OqDZqS0^&O=3GMGxLM|YiC3N0jkj|9w=zB{)`?%?#faM|F<)h
zqd4#Ru_T2S2&sC8<<GJug;HvV_4wQ+lXASDKTo^2B9z!pIi#NqX0RH$FYGJ^^Qd4Z
zj&jGgQB)3<vMFWv@Rmt$Prk>chW{BlN-2_TDp1|?7lR4uk6f2^YjIbiCkiDzUvOng
z{boDDoB9YNRb^JW%^8D~66ha$j{s~-v^Qu;k5fpJpO1^`3!*wZJ2y9V9!5Pm(;_{T
zKLo702haJpz2qn#v|bDPHr=RDy0C54V{|W}EV^|DX19~cMe|;O?QB{M_<jO!tP<{Q
zoy;2|G^yK*=wsLnCI#QcE6gazWS13*Cv;JGp>Ts=_QEtyom{=5ph5aknb@xSeVm<W
zhD!E&d$pz?Of36?nT&IdK74Mwzn=$rZFq`O^H9TDo1PDF1wtj_W@~G!C1K-F%@nT(
z`+m}Hh`C241nwQ@JgD{pkcng=bc@wB>txs>7hi>HvDi&w&{f(Nd&a!DFL-w%TqaHL
z<6`{RpEjo#f(Owy%2s9wFiB!sV5L36Ac!|26ZR!2So!_Tlr)U-GWgHxRm|%Qtr(b&
zFnck^IP6Qiw;WpQYA?>!Q~+jvJq<)u_F`!$AP7D4h|}4}_n;bh$L1i}{Yx5-6I6e$
zh@dGfEhUx{^8a8%a9GiDw!?rE##xSl?ydA{X`+4D@N9j#tBY-tN$i{G>vakj(36zZ
zt3^7vx)T3_CyE4?*Uz4u3G8QQU}+X&JEtk~JdE9vNdr)M5;}b^68SY~v8eL0*B3|r
zpm<K2z~u{IGeuHH`;^I$m?pus0rcX!<;0e7w|7hW&xpyuh*?K1cVD_!!ad~+cA>Vg
zvpP8NkWs)Z>kWi)l1a1L&In$1yp*f!0$N6&R>TJbjX2Sh3taD~q=UgrnI2yuQELC8
zAj1(x9A!Ky2DeU!bJ?z@t6-wgL&8Qi!9?>Cji$yTkYxtpxY36<cT#ef8v4n<qHpI;
zU+>!jWrll|#`sDtE00Yy`v2aUB>K*}RT%s}eLHVlsL2(ZtIPn?YR0PE8gF!C!>U>v
z+3y$IIwP?uIwb_Wl3MZXzNHq?K3H=Fm3r18F)=aIwr?Y>Q<2dqT#LuB9don7nCN6O
zMYhf`M11c`motVfyz3CQF^pMeTWfnqv6hFX>Azk)cMmxaLti^D?RIy8@Yy{4sNyXS
z{s!HGoi8gZD>jkef7+azSIq1-Dgn={jVMdiBaGr3D-UJ@N+YR`!moJ?qHjl!On?8H
zz@ORVhu+yq&U`&QbUu9Gb|$S*tz1i6L&Nu^WWeM?RrYd7((<C3i*2J^4RG#O+(^B-
z;R?dvBU)#U&5MD)EEA}*k@TW(nB4!*BzO#CL(ONKTi6l){bozBMdO_}El<z8luWx~
z%EE-{dqg28<b}QlVoGA4&(;Z^0s#r=9d_ZhEx~lWMwTk&-dAP5sT3l~rnaLNG)fSK
zaI<0^s}qK2iyaK-v+LpYUU`0I!Tfq0!v-pk8Wz``bPb%^UvTDtEph&#zNr8UY<F`w
zrpV>Ht@#$Wj{9I71+N*PXb9>+UV|bebHF4oH`j(Q+BNR<PQtS7rWb_v;Yt~i7;M)~
z(+WY&!>HtErm_kXapg=Lq#atnzGzA{&sf3wf3<g8_2Gp&3s=|Udi6IV7f}Pd$-tNM
zYj|-#PbC``{z%|0+imnFN~-zFpa1A|_ndjz1?oWrVw#yuo!d6ZSZ6^ulgsm;V3`F?
zTa`mB9uCRKmbiFk%Td4;ta}+FWSUk#L6p>7U0E5aap4<$VxOv#-GI(GG;&KJ?47)X
zFrtx5jOTe6Vo&#+6h{Q+d>xxJ>__YoH^8+GS29^OXq(t;0$gpYwaxJrV23V3afhGY
znB1$Nzc;qx)!#tst{~&;`h+3E@5VvVYbjPDymZH0LF5h?djns^ln&;$Kw!D`fj1##
z!s2yksGEB{=<87(MD1`B_z{O*=&jVcC4Yw`;dsBJw0ofTfX8W?$MiJjKBSXCc#YR@
z`FhhwwZ@Zjz?)eU4DR?~bR_u^9vc85n5}b4`vXis3qDvbc=Y<_NGaK=(Vl;Lkz_)b
z1UmSXu$yiwA)<*OCJ1)^&6sM#aIG~UA2Xet@cG7O!bz_1cK3_9Q@2mDJiYkSJNv_O
z@Yr0jPLjz2s8xa(=LM>>$?VTj^}ylV>YDGRsnN)#?K2G&^4QtlPR=9mxG;6aXV4|i
z9#4!6y;Yh7JlS88ES6BMnovJJ<PLA<5Jo>xr$h+>LLyaD8y@=E2Prqi6-ju)(~r)F
z@Fn!e|4!Xug`R#$vHm-R{r~(nMr(Wf`3=Z);UoC*>c7Llzbj_KK5~K!Ho+UDM!#mz
P-zNRwv3Q>7GmrlV6Gz-%

literal 0
HcmV?d00001

diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
index be931e8c260..5c4bc835e5c 100644
--- a/torchrl/objectives/common.py
+++ b/torchrl/objectives/common.py
@@ -284,7 +284,8 @@ def _target_param_getter(self, network_name):
                     value_to_set = getattr(
                         self, "_sep_".join(["_target_" + network_name, *key])
                     )
-                    target_params.set(key, value_to_set)
+                    # _set is faster bc is bypasses the checks
+                    target_params._set(key, value_to_set)
                 return target_params
             else:
                 params = getattr(self, param_name)
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 04aef9d0aa2..d8bce805487 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -110,9 +110,11 @@ class Trainer:
             displayed using tqdm. If tqdm is not installed, this option
             won't have any effect. Default is :obj:`True`
         seed (int, optional): Seed to be used for the collector, pytorch and
-            numpy. Default is 42.
+            numpy. Default is ``None``.
         save_trainer_interval (int, optional): How often the trainer should be
-            saved to disk. Default is 10000.
+            saved to disk, in frame count. Default is 10000.
+        log_interval (int, optional): How often the values should be logged,
+            in frame count. Default is 10000.
         save_trainer_file (path, optional): path where to save the trainer.
             Default is None (no saving)
     """
@@ -124,7 +126,6 @@ def __new__(cls, *args, **kwargs):
         cls._collected_frames: int = 0
         cls._last_log: Dict[str, Any] = {}
         cls._last_save: int = 0
-        cls._log_interval: int = 10000
         cls.collected_frames = 0
         cls._app_state = None
         return super().__new__(cls)
@@ -142,8 +143,9 @@ def __init__(
         clip_grad_norm: bool = True,
         clip_norm: float = None,
         progress_bar: bool = True,
-        seed: int = 42,
+        seed: int = None,
         save_trainer_interval: int = 10000,
+        log_interval: int=10000,
         save_trainer_file: Optional[Union[str, pathlib.Path]] = None,
     ) -> None:
 
@@ -154,9 +156,12 @@ def __init__(
         self.optimizer = optimizer
         self.logger = logger
 
+        self._log_interval = log_interval
+
         # seeding
         self.seed = seed
-        self.set_seed()
+        if seed is not None:
+            self.set_seed()
 
         # constants
         self.optim_steps_per_batch = optim_steps_per_batch
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 956721e10b7..3583aaf01e8 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -1,696 +1,703 @@
-if __name__ == "__main__":
-    # -*- coding: utf-8 -*-
-    """
-    TorchRL trainer: A DQN example
-    ==============================
-    **Author**: `Vincent Moens <https://github.com/vmoens>`_
-
-    """
-
-    ##############################################################################
-    # TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
-    # your training loop. The trainer executes a nested loop where the outer loop
-    # is the data collection and the inner loop consumes this data or some data
-    # retrieved from the replay buffer to train the model.
-    # At various points in this training loop, hooks can be attached and executed at
-    # given intervals.
-    #
-    # In this tutorial, we will be using the trainer class to train a DQN algorithm
-    # to solve the CartPole task from scratch.
-    #
-    # Main takeaways:
-    #
-    # - Building a trainer with its essential components: data collector, loss
-    #   module, replay buffer and optimizer.
-    # - Adding hooks to a trainer, such as loggers, target network updaters and such.
-    #
-    # The trainer is fully customisable and offers a large set of functionalities.
-    # The tutorial is organised around its construction.
-    # We will be detailing how to build each of the components of the library first,
-    # and then put the pieces together using the :class:`torchrl.trainers.Trainer`
-    # class.
-    #
-    # Along the road, we will also focus on some other aspects of the library:
-    #
-    # - how to build an environment in TorchRL, including transforms (e.g. data
-    #   normalization, frame concatenation, resizing and turning to grayscale)
-    #   and parallel execution. Unlike what we did in the
-    #   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
-    #   will normalize the pixels and not the state vector.
-    # - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
-    #   that estimates the action values and picks up the action with the highest
-    #   estimated return;
-    # - how to collect data from your environment efficiently and store them
-    #   in a replay buffer;
-    # - how to use multi-step, a simple preprocessing step for off-policy algorithms;
-    # - and finally how to evaluate your model.
-    #
-    # **Prerequisites**: We encourage you to get familiar with torchrl through the
-    # `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
-    #
-    # DQN
-    # ---
-    #
-    # DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
-    # the founding work in deep reinforcement learning.
-    #
-    # On a high level, the algorithm is quite simple: Q-learning consists in
-    # learning a table of state-action values in such a way that, when
-    # encountering any particular state, we know which action to pick just by
-    # searching for the one with the highest value. This simple setting
-    # requires the actions and states to be
-    # discrete, otherwise a lookup table cannot be built.
-    #
-    # DQN uses a neural network that encodes a map from the state-action space to
-    # a value (scalar) space, which amortizes the cost of storing and exploring all
-    # the possible state-action combinations: if a state has not been seen in the
-    # past, we can still pass it in conjunction with the various actions available
-    # through our neural network and get an interpolated value for each of the
-    # actions available.
-    #
-    # We will solve the classic control problem of the cart pole. From the
-    # Gymnasium doc from where this environment is retrieved:
-    #
-    # | A pole is attached by an un-actuated joint to a cart, which moves along a
-    # | frictionless track. The pendulum is placed upright on the cart and the goal
-    # | is to balance the pole by applying forces in the left and right direction
-    # | on the cart.
-    #
-    # .. figure:: /_static/img/cartpole_demo.gif
-    #    :alt: Cart Pole
-    #
-    # We do not aim at giving a SOTA implementation of the algorithm, but rather
-    # to provide a high-level illustration of TorchRL features in the context
-    # of this algorithm.
-
-    # sphinx_gallery_start_ignore
-    import warnings
-
-    warnings.filterwarnings("ignore")
-    # sphinx_gallery_end_ignore
-
-    import os
-    import uuid
-
-    import torch
-    from torch import nn
-    from torchrl.collectors import MultiaSyncDataCollector
-    from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
-    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-    from torchrl.envs.libs.gym import GymEnv
-    from torchrl.envs.transforms import (
-        CatFrames,
-        Compose,
-        GrayScale,
-        ObservationNorm,
-        Resize,
-        ToTensorImage,
-        TransformedEnv,
-    )
-    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
-
-    from torchrl.objectives import DQNLoss, SoftUpdate
-    from torchrl.record.loggers.csv import CSVLogger
-    from torchrl.trainers import (
-        LogReward,
-        Recorder,
-        ReplayBufferTrainer,
-        Trainer,
-        UpdateWeights,
-    )
-
-    def is_notebook() -> bool:
-        try:
-            shell = get_ipython().__class__.__name__
-            if shell == "ZMQInteractiveShell":
-                return True  # Jupyter notebook or qtconsole
-            elif shell == "TerminalInteractiveShell":
-                return False  # Terminal running IPython
-            else:
-                return False  # Other type (?)
-        except NameError:
-            return False  # Probably standard Python interpreter
-
-    ###############################################################################
-    # Let's get started with the various pieces we need for our algorithm:
-    #
-    # - An environment;
-    # - A policy (and related modules that we group under the "model" umbrella);
-    # - A data collector, which makes the policy play in the environment and
-    #   delivers training data;
-    # - A replay buffer to store the training data;
-    # - A loss module, which computes the objective function to train our policy
-    #   to maximise the return;
-    # - An optimizer, which performs parameter updates based on our loss.
-    #
-    # Additional modules include a logger, a recorder (executes the policy in
-    # "eval" mode) and a target network updater. With all these components into
-    # place, it is easy to see how one could misplace or misuse one component in
-    # the training script. The trainer is there to orchestrate everything for you!
-    #
-    # Building the environment
-    # ------------------------
-    #
-    # First let's write a helper function that will output an environment. As usual,
-    # the "raw" environment may be too simple to be used in practice and we'll need
-    # some data transformation to expose its output to the policy.
-    #
-    # We will be using five transforms:
-    #
-    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-    # - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
-    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-    #   ``[C, W, H]``;
-    # - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
-    # - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
-    # - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
-    # - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
-    #   successive frames (``N=4``) in a single tensor along the channel dimension.
-    #   This is useful as a single image does not carry information about the
-    #   motion of the cartpole. Some memory about past observations and actions
-    #   is needed, either via a recurrent neural network or using a stack of
-    #   frames.
-    # - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
-    #   given some custom summary statistics.
-    #
-    # In practice, our environment builder has two arguments:
-    #
-    # - ``parallel``: determines whether multiple environments have to be run in
-    #   parallel. We stack the transforms after the
-    #   :class:`torchrl.envs.ParallelEnv` to take advantage
-    #   of vectorization of the operations on device, although this would
-    #   technically work with every single environment attached to its own set of
-    #   transforms.
-    # - ``obs_norm_sd`` will contain the normalizing constants for
-    #   the :class:`torchrl.envs.ObservationNorm` transform.
-    #
-
-    def make_env(
-        parallel=False,
-        obs_norm_sd=None,
-    ):
-        if obs_norm_sd is None:
-            obs_norm_sd = {"standard_normal": True}
-        if parallel:
-            base_env = ParallelEnv(
-                num_workers,
-                EnvCreator(
-                    lambda: GymEnv(
-                        "CartPole-v1",
-                        from_pixels=True,
-                        pixels_only=True,
-                        device=device,
-                    )
-                ),
-            )
+# -*- coding: utf-8 -*-
+"""
+TorchRL trainer: A DQN example
+==============================
+**Author**: `Vincent Moens <https://github.com/vmoens>`_
+
+"""
+
+##############################################################################
+# TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
+# your training loop. The trainer executes a nested loop where the outer loop
+# is the data collection and the inner loop consumes this data or some data
+# retrieved from the replay buffer to train the model.
+# At various points in this training loop, hooks can be attached and executed at
+# given intervals.
+#
+# In this tutorial, we will be using the trainer class to train a DQN algorithm
+# to solve the CartPole task from scratch.
+#
+# Main takeaways:
+#
+# - Building a trainer with its essential components: data collector, loss
+#   module, replay buffer and optimizer.
+# - Adding hooks to a trainer, such as loggers, target network updaters and such.
+#
+# The trainer is fully customisable and offers a large set of functionalities.
+# The tutorial is organised around its construction.
+# We will be detailing how to build each of the components of the library first,
+# and then put the pieces together using the :class:`torchrl.trainers.Trainer`
+# class.
+#
+# Along the road, we will also focus on some other aspects of the library:
+#
+# - how to build an environment in TorchRL, including transforms (e.g. data
+#   normalization, frame concatenation, resizing and turning to grayscale)
+#   and parallel execution. Unlike what we did in the
+#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+#   will normalize the pixels and not the state vector.
+# - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
+#   that estimates the action values and picks up the action with the highest
+#   estimated return;
+# - how to collect data from your environment efficiently and store them
+#   in a replay buffer;
+# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
+# - and finally how to evaluate your model.
+#
+# **Prerequisites**: We encourage you to get familiar with torchrl through the
+# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+#
+# DQN
+# ---
+#
+# DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
+# the founding work in deep reinforcement learning.
+#
+# On a high level, the algorithm is quite simple: Q-learning consists in
+# learning a table of state-action values in such a way that, when
+# encountering any particular state, we know which action to pick just by
+# searching for the one with the highest value. This simple setting
+# requires the actions and states to be
+# discrete, otherwise a lookup table cannot be built.
+#
+# DQN uses a neural network that encodes a map from the state-action space to
+# a value (scalar) space, which amortizes the cost of storing and exploring all
+# the possible state-action combinations: if a state has not been seen in the
+# past, we can still pass it in conjunction with the various actions available
+# through our neural network and get an interpolated value for each of the
+# actions available.
+#
+# We will solve the classic control problem of the cart pole. From the
+# Gymnasium doc from where this environment is retrieved:
+#
+# | A pole is attached by an un-actuated joint to a cart, which moves along a
+# | frictionless track. The pendulum is placed upright on the cart and the goal
+# | is to balance the pole by applying forces in the left and right direction
+# | on the cart.
+#
+# .. figure:: /_static/img/cartpole_demo.gif
+#    :alt: Cart Pole
+#
+# We do not aim at giving a SOTA implementation of the algorithm, but rather
+# to provide a high-level illustration of TorchRL features in the context
+# of this algorithm.
+
+# sphinx_gallery_start_ignore
+import warnings
+
+warnings.filterwarnings("ignore")
+# sphinx_gallery_end_ignore
+
+import os
+import uuid
+
+import torch
+from torch import nn
+from torchrl.collectors import MultiaSyncDataCollector
+from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
+from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.envs.transforms import (
+    CatFrames,
+    Compose,
+    GrayScale,
+    ObservationNorm,
+    Resize,
+    ToTensorImage,
+    TransformedEnv,
+)
+from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+
+from torchrl.objectives import DQNLoss, SoftUpdate
+from torchrl.record.loggers.csv import CSVLogger
+from torchrl.trainers import (
+    LogReward,
+    Recorder,
+    ReplayBufferTrainer,
+    Trainer,
+    UpdateWeights,
+)
+
+def is_notebook() -> bool:
+    try:
+        shell = get_ipython().__class__.__name__
+        if shell == "ZMQInteractiveShell":
+            return True  # Jupyter notebook or qtconsole
+        elif shell == "TerminalInteractiveShell":
+            return False  # Terminal running IPython
         else:
-            base_env = GymEnv(
-                "CartPole-v1",
-                from_pixels=True,
-                pixels_only=True,
-                device=device,
-            )
-
-        env = TransformedEnv(
-            base_env,
-            Compose(
-                StepCounter(),  # to count the steps of each trajectory
-                ToTensorImage(),
-                RewardScaling(loc=0.0, scale=0.1),
-                GrayScale(),
-                Resize(64, 64),
-                CatFrames(4, in_keys=["pixels"], dim=-3),
-                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+            return False  # Other type (?)
+    except NameError:
+        return False  # Probably standard Python interpreter
+
+###############################################################################
+# Let's get started with the various pieces we need for our algorithm:
+#
+# - An environment;
+# - A policy (and related modules that we group under the "model" umbrella);
+# - A data collector, which makes the policy play in the environment and
+#   delivers training data;
+# - A replay buffer to store the training data;
+# - A loss module, which computes the objective function to train our policy
+#   to maximise the return;
+# - An optimizer, which performs parameter updates based on our loss.
+#
+# Additional modules include a logger, a recorder (executes the policy in
+# "eval" mode) and a target network updater. With all these components into
+# place, it is easy to see how one could misplace or misuse one component in
+# the training script. The trainer is there to orchestrate everything for you!
+#
+# Building the environment
+# ------------------------
+#
+# First let's write a helper function that will output an environment. As usual,
+# the "raw" environment may be too simple to be used in practice and we'll need
+# some data transformation to expose its output to the policy.
+#
+# We will be using five transforms:
+#
+# - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
+# - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
+#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+#   ``[C, W, H]``;
+# - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
+# - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
+# - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
+# - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
+#   successive frames (``N=4``) in a single tensor along the channel dimension.
+#   This is useful as a single image does not carry information about the
+#   motion of the cartpole. Some memory about past observations and actions
+#   is needed, either via a recurrent neural network or using a stack of
+#   frames.
+# - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
+#   given some custom summary statistics.
+#
+# In practice, our environment builder has two arguments:
+#
+# - ``parallel``: determines whether multiple environments have to be run in
+#   parallel. We stack the transforms after the
+#   :class:`torchrl.envs.ParallelEnv` to take advantage
+#   of vectorization of the operations on device, although this would
+#   technically work with every single environment attached to its own set of
+#   transforms.
+# - ``obs_norm_sd`` will contain the normalizing constants for
+#   the :class:`torchrl.envs.ObservationNorm` transform.
+#
+
+def make_env(
+    parallel=False,
+    obs_norm_sd=None,
+):
+    if obs_norm_sd is None:
+        obs_norm_sd = {"standard_normal": True}
+    if parallel:
+        base_env = ParallelEnv(
+            num_workers,
+            EnvCreator(
+                lambda: GymEnv(
+                    "CartPole-v1",
+                    from_pixels=True,
+                    pixels_only=True,
+                    device=device,
+                )
             ),
         )
-        return env
-
-    ###############################################################################
-    # Compute normalizing constants
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # To normalize images, we don't want to normalize each pixel independently
-    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-    # shaped set of normalizing constants (loc and scale parameters).
-    # We will be using the ``reduce_dim`` argument
-    # of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-    # not all dimensions disappear in the process:
-    #
-
-    def get_norm_stats():
-        test_env = make_env()
-        test_env.transform[-1].init_stats(
-            num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
-        )
-        obs_norm_sd = test_env.transform[-1].state_dict()
-        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-        print("state dict of the observation norm:", obs_norm_sd)
-        return obs_norm_sd
-
-    ###############################################################################
-    # Building the model (Deep Q-network)
-    # -----------------------------------
-    #
-    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-    # object which is a simple CNN followed by a two-layer MLP. The only trick used
-    # here is that the action values (i.e. left and right action value) are
-    # computed using
-    #
-    # .. math::
-    #
-    #    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-    #
-    # where :math:`b` is a :math:`\# obs \rightarrow 1` function and :math:`v` is a
-    # :math:`\# obs \rightarrow num_actions` function.
-    #
-    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
-    # which will read the state-action
-    # values, pick up the one with the maximum value and write all those results
-    # in the input :class:`tensordict.TensorDict`.
-    #
-
-    def make_model(dummy_env):
-        cnn_kwargs = {
-            "num_cells": [32, 64, 64],
-            "kernel_sizes": [6, 4, 3],
-            "strides": [2, 2, 1],
-            "activation_class": nn.ELU,
-            # This can be used to reduce the size of the last layer of the CNN
-            # "squeeze_output": True,
-            # "aggregator_class": nn.AdaptiveAvgPool2d,
-            # "aggregator_kwargs": {"output_size": (1, 1)},
-        }
-        mlp_kwargs = {
-            "depth": 2,
-            "num_cells": [
-                64,
-                64,
-            ],
-            "activation_class": nn.ELU,
-        }
-        net = DuelingCnnDQNet(
-            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-        ).to(device)
-        net.value[-1].bias.data.fill_(init_bias)
-
-        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(
-            device
-        )
-        # init actor: because the model is composed of lazy conv/linear layers,
-        # we must pass a fake batch of data through it to instantiate them.
-        tensordict = dummy_env.fake_tensordict()
-        actor(tensordict)
-
-        # we wrap our actor in an EGreedyWrapper for data collection
-        actor_explore = EGreedyWrapper(
-            actor,
-            annealing_num_steps=total_frames,
-            eps_init=eps_greedy_val,
-            eps_end=eps_greedy_val_env,
-        )
-
-        return actor, actor_explore
-
-    ###############################################################################
-    # Collecting and storing data
-    # ---------------------------
-    #
-    # Replay buffers
-    # ~~~~~~~~~~~~~~
-    #
-    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
-    # They constitute the dataset we will be sampling from during training.
-    #
-    # Here, we will use a regular sampling strategy, although a prioritized RB
-    # could improve the performance significantly.
-    #
-    # We place the storage on disk using
-    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-    # storage is created in a lazy manner: it will only be instantiated once the
-    # first batch of data is passed to it.
-    #
-    # The only requirement of this storage is that the data passed to it at write
-    # time must always have the same shape.
-
-    def get_replay_buffer(buffer_size, n_optim, batch_size):
-        replay_buffer = TensorDictReplayBuffer(
-            batch_size=batch_size,
-            storage=LazyMemmapStorage(buffer_size),
-            prefetch=n_optim,
-        )
-        return replay_buffer
-
-    ###############################################################################
-    # Data collector
-    # ~~~~~~~~~~~~~~
-    #
-    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
-    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
-    # a data collector as a dataloader in the outer loop.
-    #
-    # We choose the following configuration: we will be running a series of
-    # parallel environments synchronously in parallel in different collectors,
-    # themselves running in parallel but asynchronously.
-    # The advantage of this configuration is that we can balance the amount of
-    # compute that is executed in batch with what we want to be executed
-    # asynchronously. We encourage the reader to experiment how the collection
-    # speed is impacted by modifying the number of collectors (ie the number of
-    # environment constructors passed to the collector) and the number of
-    # environment executed in parallel in each collector (controlled by the
-    # ``num_workers`` hyperparameter).
-    #
-    # When building the collector, we can choose on which device we want the
-    # environment and policy to execute the operations through the ``device``
-    # keyword argument. The ``storing_devices`` argument will modify the
-    # location of the data being collected: if the batches that we are gathering
-    # have a considerable size, we may want to store them on a different location
-    # than the device where the computation is happening. For asynchronous data
-    # collectors such as ours, different storing devices mean that the data that
-    # we collect won't sit on the same device each time, which is something that
-    # out training loop must account for. For simplicity, we set the devices to
-    # the same value for all sub-collectors.
-
-    def get_collector(
-        obs_norm_sd,
-        num_collectors,
-        actor_explore,
-        frames_per_batch,
-        total_frames,
-        device,
-    ):
-        data_collector = MultiaSyncDataCollector(
-            [
-                make_env(parallel=True, obs_norm_sd=obs_norm_sd),
-            ]
-            * num_collectors,
-            policy=actor_explore,
-            frames_per_batch=frames_per_batch,
-            total_frames=total_frames,
-            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-            exploration_mode="random",
-            # We set the all the devices to be identical. Below is an example of
-            # heterogeneous devices
+    else:
+        base_env = GymEnv(
+            "CartPole-v1",
+            from_pixels=True,
+            pixels_only=True,
             device=device,
-            storing_device=device,
-            split_trajs=False,
-            postproc=MultiStep(gamma=gamma, n_steps=5),
         )
-        return data_collector
-
-    ###############################################################################
-    # Loss function
-    # -------------
-    #
-    # Building our loss function is straightforward: we only need to provide
-    # the model and a bunch of hyperparameters to the DQNLoss class.
-    #
-    # Target parameters
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Many off-policy RL algorithms use the concept of "target parameters" when it
-    # comes to estimate the value of the next state or state-action pair.
-    # The target parameters are lagged copies of the model parameters. Because
-    # their predictions mismatch those of the current model configuration, they
-    # help learning by putting a pessimistic bound on the value being estimated.
-    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-    # in similar algorithms.
-    #
-
-    def get_loss_module(actor, gamma):
-        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
-        target_updater = SoftUpdate(loss_module)
-        return loss_module, target_updater
-
-    ###############################################################################
-    # Hyperparameters
-    # ---------------
-    #
-    # Let's start with our hyperparameters. The following setting should work well
-    # in practice, and the performance of the algorithm should hopefully not be
-    # too sensitive to slight variations of these.
-
-    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-    ###############################################################################
-    # Optimizer
-    # ~~~~~~~~~
-
-    # the learning rate of the optimizer
-    lr = 2e-3
-    # weight decay
-    wd = 1e-5
-    # the beta parameters of Adam
-    betas = (0.9, 0.999)
-    # Optimization steps per batch collected (aka UPD or updates per data)
-    n_optim = 8
-
-    ###############################################################################
-    # DQN parameters
-    # ~~~~~~~~~~~~~~
-    # gamma decay factor
-    gamma = 0.99
-
-    ###############################################################################
-    # Smooth target network update decay parameter.
-    # This loosely corresponds to a 1/tau interval with hard target network
-    # update
-    tau = 0.02
-
-    ###############################################################################
-    # Data collection and replay buffer
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    # Values to be used for proper training have been commented.
-    #
-    # Total frames collected in the environment. In other implementations, the
-    # user defines a maximum number of episodes.
-    # This is harder to do with our data collectors since they return batches
-    # of N collected frames, where N is a constant.
-    # However, one can easily get the same restriction on number of episodes by
-    # breaking the training loop when a certain number
-    # episodes has been collected.
-    total_frames = 4096  # 500000
-
-    ###############################################################################
-    # Random frames used to initialize the replay buffer.
-    init_random_frames = 100  # 1000
-
-    ###############################################################################
-    # Frames in each batch collected.
-    frames_per_batch = 32  # 128
-
-    ###############################################################################
-    # Frames sampled from the replay buffer at each optimization step
-    batch_size = 32  # 256
-
-    ###############################################################################
-    # Size of the replay buffer in terms of frames
-    buffer_size = min(total_frames, 100000)
-
-    ###############################################################################
-    # Number of environments run in parallel in each data collector
-    num_workers = 2  # 8
-    num_collectors = 2  # 4
-
-    ###############################################################################
-    # Environment and exploration
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # We set the initial and final value of the epsilon factor in Epsilon-greedy
-    # exploration.
-    # Since our policy is deterministic, exploration is crucial: without it, the
-    # only source of randomness would be the environment reset.
-
-    eps_greedy_val = 0.1
-    eps_greedy_val_env = 0.005
-
-    ###############################################################################
-    # To speed up learning, we set the bias of the last layer of our value network
-    # to a predefined value (this is not mandatory)
-    init_bias = 2.0
-
-    ###############################################################################
-    # .. note::
-    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
-    #   was set to a very low number. To get a reasonable performance, use a greater
-    #   value e.g. 500000
-    #
-
-    ###############################################################################
-    # Building a Trainer
-    # ------------------
-    #
-    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
-    # following keyword-only arguments:
-    #
-    # - ``collector``
-    # - ``loss_module``
-    # - ``optimizer``
-    # - ``logger``: A logger can be
-    # - ``total_frames``: this parameter defines the lifespan of the trainer.
-    # - ``frame_skip``: when a frame-skip is used, the collector must be made
-    #   aware of it in order to accurately count the number of frames
-    #   collected etc. Making the trainer aware of this parameter is not
-    #   mandatory but helps to have a fairer comparison between settings where
-    #   the total number of frames (budget) is fixed but the frame-skip is
-    #   variable.
-
-    stats = get_norm_stats()
-    test_env = make_env(parallel=False, obs_norm_sd=stats)
-    # Get model
-    actor, actor_explore = make_model(test_env)
-    loss_module, target_net_updater = get_loss_module(actor, gamma)
-    target_net_updater.init_()
-
-    collector = get_collector(
-        stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
+
+    env = TransformedEnv(
+        base_env,
+        Compose(
+            StepCounter(),  # to count the steps of each trajectory
+            ToTensorImage(),
+            RewardScaling(loc=0.0, scale=0.1),
+            GrayScale(),
+            Resize(64, 64),
+            CatFrames(4, in_keys=["pixels"], dim=-3),
+            ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+        ),
     )
-    optimizer = torch.optim.Adam(
-        loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
+    return env
+
+###############################################################################
+# Compute normalizing constants
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To normalize images, we don't want to normalize each pixel independently
+# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+# shaped set of normalizing constants (loc and scale parameters).
+# We will be using the ``reduce_dim`` argument
+# of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+# not all dimensions disappear in the process:
+#
+
+def get_norm_stats():
+    test_env = make_env()
+    test_env.transform[-1].init_stats(
+        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
     )
-    exp_name = f"dqn_exp_{uuid.uuid1()}"
-    logger = CSVLogger(exp_name=exp_name, log_dir="./")
-
-    trainer = Trainer(
-        collector=collector,
-        total_frames=total_frames,
-        frame_skip=1,
-        loss_module=loss_module,
-        optimizer=optimizer,
-        logger=logger,
-        optim_steps_per_batch=n_optim,
+    obs_norm_sd = test_env.transform[-1].state_dict()
+    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+    print("state dict of the observation norm:", obs_norm_sd)
+    return obs_norm_sd
+
+###############################################################################
+# Building the model (Deep Q-network)
+# -----------------------------------
+#
+# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+# object which is a simple CNN followed by a two-layer MLP. The only trick used
+# here is that the action values (i.e. left and right action value) are
+# computed using
+#
+# .. math::
+#
+#    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+#
+# where :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
+# :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
+# :math:`n = \# obs` and :math:`m = \# actions`.
+#
+# Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+# which will read the state-action
+# values, pick up the one with the maximum value and write all those results
+# in the input :class:`tensordict.TensorDict`.
+#
+
+def make_model(dummy_env):
+    cnn_kwargs = {
+        "num_cells": [32, 64, 64],
+        "kernel_sizes": [6, 4, 3],
+        "strides": [2, 2, 1],
+        "activation_class": nn.ELU,
+        # This can be used to reduce the size of the last layer of the CNN
+        # "squeeze_output": True,
+        # "aggregator_class": nn.AdaptiveAvgPool2d,
+        # "aggregator_kwargs": {"output_size": (1, 1)},
+    }
+    mlp_kwargs = {
+        "depth": 2,
+        "num_cells": [
+            64,
+            64,
+        ],
+        "activation_class": nn.ELU,
+    }
+    net = DuelingCnnDQNet(
+        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+    ).to(device)
+    net.value[-1].bias.data.fill_(init_bias)
+
+    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(
+        device
+    )
+    # init actor: because the model is composed of lazy conv/linear layers,
+    # we must pass a fake batch of data through it to instantiate them.
+    tensordict = dummy_env.fake_tensordict()
+    actor(tensordict)
+
+    # we wrap our actor in an EGreedyWrapper for data collection
+    actor_explore = EGreedyWrapper(
+        actor,
+        annealing_num_steps=total_frames,
+        eps_init=eps_greedy_val,
+        eps_end=eps_greedy_val_env,
     )
 
-    ###############################################################################
-    # Registering hooks
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Registering hooks can be achieved in two separate ways:
-    #
-    # - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
-    #   method is the first choice. One just needs to provide the trainer as input
-    #   and the hook will be registered with a default name at a default location.
-    #   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
-    #   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
-    #   can be cumbersome to implement.
-    buffer_hook = ReplayBufferTrainer(
-        get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
-        flatten_tensordicts=True,
+    return actor, actor_explore
+
+###############################################################################
+# Collecting and storing data
+# ---------------------------
+#
+# Replay buffers
+# ~~~~~~~~~~~~~~
+#
+# Replay buffers play a central role in off-policy RL algorithms such as DQN.
+# They constitute the dataset we will be sampling from during training.
+#
+# Here, we will use a regular sampling strategy, although a prioritized RB
+# could improve the performance significantly.
+#
+# We place the storage on disk using
+# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+# storage is created in a lazy manner: it will only be instantiated once the
+# first batch of data is passed to it.
+#
+# The only requirement of this storage is that the data passed to it at write
+# time must always have the same shape.
+
+def get_replay_buffer(buffer_size, n_optim, batch_size):
+    replay_buffer = TensorDictReplayBuffer(
+        batch_size=batch_size,
+        storage=LazyMemmapStorage(buffer_size),
+        prefetch=n_optim,
     )
-    buffer_hook.register(trainer)
-    weight_updater = UpdateWeights(collector, update_weights_interval=1)
-    weight_updater.register(trainer)
-    recorder = Recorder(
-        record_interval=1,  # log every 100 optimization steps
-        record_frames=10_000,  # maximum number of frames in the record
-        frame_skip=1,
-        policy_exploration=actor_explore,
-        environment=test_env,
-        exploration_mode="mode",
-        log_keys=[("next", "reward")],
-        out_keys={("next", "reward"): "rewards"},
-        log_pbar=True,
+    return replay_buffer
+
+###############################################################################
+# Data collector
+# ~~~~~~~~~~~~~~
+#
+# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
+# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
+# a data collector as a dataloader in the outer loop.
+#
+# We choose the following configuration: we will be running a series of
+# parallel environments synchronously in parallel in different collectors,
+# themselves running in parallel but asynchronously.
+# The advantage of this configuration is that we can balance the amount of
+# compute that is executed in batch with what we want to be executed
+# asynchronously. We encourage the reader to experiment how the collection
+# speed is impacted by modifying the number of collectors (ie the number of
+# environment constructors passed to the collector) and the number of
+# environment executed in parallel in each collector (controlled by the
+# ``num_workers`` hyperparameter).
+#
+# When building the collector, we can choose on which device we want the
+# environment and policy to execute the operations through the ``device``
+# keyword argument. The ``storing_devices`` argument will modify the
+# location of the data being collected: if the batches that we are gathering
+# have a considerable size, we may want to store them on a different location
+# than the device where the computation is happening. For asynchronous data
+# collectors such as ours, different storing devices mean that the data that
+# we collect won't sit on the same device each time, which is something that
+# out training loop must account for. For simplicity, we set the devices to
+# the same value for all sub-collectors.
+
+def get_collector(
+    obs_norm_sd,
+    num_collectors,
+    actor_explore,
+    frames_per_batch,
+    total_frames,
+    device,
+):
+    data_collector = MultiaSyncDataCollector(
+        [
+            make_env(parallel=True, obs_norm_sd=obs_norm_sd),
+        ]
+        * num_collectors,
+        policy=actor_explore,
+        frames_per_batch=frames_per_batch,
+        total_frames=total_frames,
+        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+        exploration_mode="random",
+        # We set the all the devices to be identical. Below is an example of
+        # heterogeneous devices
+        device=device,
+        storing_device=device,
+        split_trajs=False,
+        postproc=MultiStep(gamma=gamma, n_steps=5),
     )
-    recorder.register(trainer)
-
-    ###############################################################################
-    # - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
-    #   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
-    #   In this case, a location must be explicitely passed (). This method gives
-    #   more control over the location of the hook but it also requires more
-    #   understanding of the Trainer mechanism.
-    #   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
-    #   for a detailed description of the trainer hooks.
-    #
-    trainer.register_op("post_optim", target_net_updater.step)
-
-    ###############################################################################
-    # We can log the training rewards too. Note that this is of limited interest
-    # with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
-    # not by getting higher rewards but by keeping the cart-pole alive for longer.
-    # This will be reflected by the `total_rewards` value displayed in the progress bar.
-    #
-    log_reward = LogReward(log_pbar=True)
-    log_reward.register(trainer)
-
-    ###############################################################################
-    # .. note::
-    #   It is possible to link multiple optimizers to the trainer if needed.
-    #   In this case, each optimizer will be tied to a field in the loss dictionary.
-    #   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
-    #
-    # Here we are, ready to train our algorithm! A simple call to
-    # ``trainer.train()`` and we'll be getting our results logged in.
-    #
-    trainer.train()
-
-    ###############################################################################
-    # We can now quickly check the CSVs with the results.
-
-    def print_csv_files_in_folder(folder_path):
-        """
-        Find all CSV files in a folder and return the first 10 lines of each file as a string.
-
-        Args:
-            folder_path (str): The relative path to the folder.
-
-        Returns:
-            str: A string containing the first 10 lines of each CSV file in the folder.
-        """
-        csv_files = []
-        output_str = ""
-        for file in os.listdir(folder_path):
-            if file.endswith(".csv"):
-                csv_files.append(os.path.join(folder_path, file))
-        for csv_file in csv_files:
-            output_str += f"File: {csv_file}\n"
-            with open(csv_file, "r") as f:
-                for i, line in enumerate(f):
-                    if i == 10:
-                        break
-                    output_str += line.strip() + "\n"
-            output_str += "\n"
-        return output_str
-
-    print_csv_files_in_folder(logger.experiment.log_dir)
-
-    ###############################################################################
-    # Conclusion and possible improvements
-    # ------------------------------------
-    #
-    # In this tutorial we have learned:
-    #
-    # - How to write a Trainer, including building its components and registering
-    #   them in the trainer;
-    # - How to code a DQN algorithm, including how to create a policy that picks
-    #   up the action with the highest value with
-    #   :class:`torchrl.modules.QValueNetwork`;
-    # - How to build a multiprocessed data collector;
-    #
-    # Possible improvements to this tutorial could include:
-    #
-    # - Using the :class:`torchrl.data.MultiStep`
-    #   post-processing. Multi-step will project an action
-    #   to the :math:`n^{th}` following step, and create a discounted sum of the
-    #   rewards in between. This trick can make the algorithm noticeably less
-    #   myopic (although the reward is then biased). To use this, simply
-    #   create the collector with
-    #
-    #       >>> from torchrl.data.postprocs.postprocs import MultiStep
-    #       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-    #
-    #   where ``n`` is the number of looking-forward steps. Pay attention to the
-    #   fact that the ``gamma`` factor has to be corrected by the number of
-    #   steps till the next observation when being passed to
-    #   ``vec_td_lambda_advantage_estimate``:
-    #
-    #       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
-    #
-    # - A prioritized replay buffer could also be used. This will give a
-    #   higher priority to samples that have the worst value accuracy.
-    #   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
-    #   of the documentation.
-    # - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
-    #   for more information).
-    # - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.
+    return data_collector
+
+###############################################################################
+# Loss function
+# -------------
+#
+# Building our loss function is straightforward: we only need to provide
+# the model and a bunch of hyperparameters to the DQNLoss class.
+#
+# Target parameters
+# ~~~~~~~~~~~~~~~~~
+#
+# Many off-policy RL algorithms use the concept of "target parameters" when it
+# comes to estimate the value of the next state or state-action pair.
+# The target parameters are lagged copies of the model parameters. Because
+# their predictions mismatch those of the current model configuration, they
+# help learning by putting a pessimistic bound on the value being estimated.
+# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+# in similar algorithms.
+#
+
+def get_loss_module(actor, gamma):
+    loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+    target_updater = SoftUpdate(loss_module)
+    return loss_module, target_updater
+
+###############################################################################
+# Hyperparameters
+# ---------------
+#
+# Let's start with our hyperparameters. The following setting should work well
+# in practice, and the performance of the algorithm should hopefully not be
+# too sensitive to slight variations of these.
+
+device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
+
+# the learning rate of the optimizer
+lr = 2e-3
+# weight decay
+wd = 1e-5
+# the beta parameters of Adam
+betas = (0.9, 0.999)
+# Optimization steps per batch collected (aka UPD or updates per data)
+n_optim = 8
+
+###############################################################################
+# DQN parameters
+# ~~~~~~~~~~~~~~
+# gamma decay factor
+gamma = 0.99
+
+###############################################################################
+# Smooth target network update decay parameter.
+# This loosely corresponds to a 1/tau interval with hard target network
+# update
+tau = 0.02
+
+###############################################################################
+# Data collection and replay buffer
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Values to be used for proper training have been commented.
+#
+# Total frames collected in the environment. In other implementations, the
+# user defines a maximum number of episodes.
+# This is harder to do with our data collectors since they return batches
+# of N collected frames, where N is a constant.
+# However, one can easily get the same restriction on number of episodes by
+# breaking the training loop when a certain number
+# episodes has been collected.
+total_frames = 10_000  # 500000
+
+###############################################################################
+# Random frames used to initialize the replay buffer.
+init_random_frames = 100  # 1000
+
+###############################################################################
+# Frames in each batch collected.
+frames_per_batch = 32  # 128
+
+###############################################################################
+# Frames sampled from the replay buffer at each optimization step
+batch_size = 32  # 256
+
+###############################################################################
+# Size of the replay buffer in terms of frames
+buffer_size = min(total_frames, 100000)
+
+###############################################################################
+# Number of environments run in parallel in each data collector
+num_workers = 2  # 8
+num_collectors = 2  # 4
+
+###############################################################################
+# Environment and exploration
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We set the initial and final value of the epsilon factor in Epsilon-greedy
+# exploration.
+# Since our policy is deterministic, exploration is crucial: without it, the
+# only source of randomness would be the environment reset.
+
+eps_greedy_val = 0.1
+eps_greedy_val_env = 0.005
+
+###############################################################################
+# To speed up learning, we set the bias of the last layer of our value network
+# to a predefined value (this is not mandatory)
+init_bias = 2.0
+
+###############################################################################
+# .. note::
+#   For fast rendering of the tutorial ``total_frames`` hyperparameter
+#   was set to a very low number. To get a reasonable performance, use a greater
+#   value e.g. 500000
+#
+
+###############################################################################
+# Building a Trainer
+# ------------------
+#
+# TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+# following keyword-only arguments:
+#
+# - ``collector``
+# - ``loss_module``
+# - ``optimizer``
+# - ``logger``: A logger can be
+# - ``total_frames``: this parameter defines the lifespan of the trainer.
+# - ``frame_skip``: when a frame-skip is used, the collector must be made
+#   aware of it in order to accurately count the number of frames
+#   collected etc. Making the trainer aware of this parameter is not
+#   mandatory but helps to have a fairer comparison between settings where
+#   the total number of frames (budget) is fixed but the frame-skip is
+#   variable.
+
+stats = get_norm_stats()
+test_env = make_env(parallel=False, obs_norm_sd=stats)
+# Get model
+actor, actor_explore = make_model(test_env)
+loss_module, target_net_updater = get_loss_module(actor, gamma)
+target_net_updater.init_()
+
+collector = get_collector(
+    stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
+)
+optimizer = torch.optim.Adam(
+    loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
+)
+exp_name = f"dqn_exp_{uuid.uuid1()}"
+logger = CSVLogger(exp_name=exp_name, log_dir="./")
+
+###############################################################################
+# We can control how often the scalars should be logged. Here we set this
+# to a low value as our training loop is short:
+
+log_interval = 500
+
+trainer = Trainer(
+    collector=collector,
+    total_frames=total_frames,
+    frame_skip=1,
+    loss_module=loss_module,
+    optimizer=optimizer,
+    logger=logger,
+    optim_steps_per_batch=n_optim,
+    log_interval = log_interval,
+)
+
+###############################################################################
+# Registering hooks
+# ~~~~~~~~~~~~~~~~~
+#
+# Registering hooks can be achieved in two separate ways:
+#
+# - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
+#   method is the first choice. One just needs to provide the trainer as input
+#   and the hook will be registered with a default name at a default location.
+#   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
+#   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
+#   can be cumbersome to implement.
+buffer_hook = ReplayBufferTrainer(
+    get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
+    flatten_tensordicts=True,
+)
+buffer_hook.register(trainer)
+weight_updater = UpdateWeights(collector, update_weights_interval=1)
+weight_updater.register(trainer)
+recorder = Recorder(
+    record_interval=100,  # log every 100 optimization steps
+    record_frames=1000,  # maximum number of frames in the record
+    frame_skip=1,
+    policy_exploration=actor_explore,
+    environment=test_env,
+    exploration_mode="mode",
+    log_keys=[("next", "reward")],
+    out_keys={("next", "reward"): "rewards"},
+    log_pbar=True,
+)
+recorder.register(trainer)
+
+###############################################################################
+# - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
+#   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
+#   In this case, a location must be explicitely passed (). This method gives
+#   more control over the location of the hook but it also requires more
+#   understanding of the Trainer mechanism.
+#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+#   for a detailed description of the trainer hooks.
+#
+trainer.register_op("post_optim", target_net_updater.step)
+
+###############################################################################
+# We can log the training rewards too. Note that this is of limited interest
+# with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
+# not by getting higher rewards but by keeping the cart-pole alive for longer.
+# This will be reflected by the `total_rewards` value displayed in the progress bar.
+#
+log_reward = LogReward(log_pbar=True)
+log_reward.register(trainer)
+
+###############################################################################
+# .. note::
+#   It is possible to link multiple optimizers to the trainer if needed.
+#   In this case, each optimizer will be tied to a field in the loss dictionary.
+#   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
+#
+# Here we are, ready to train our algorithm! A simple call to
+# ``trainer.train()`` and we'll be getting our results logged in.
+#
+trainer.train()
+
+###############################################################################
+# We can now quickly check the CSVs with the results.
+
+def print_csv_files_in_folder(folder_path):
+    """
+    Find all CSV files in a folder and return the first 10 lines of each file as a string.
+
+    Args:
+        folder_path (str): The relative path to the folder.
+
+    Returns:
+        str: A string containing the first 10 lines of each CSV file in the folder.
+    """
+    csv_files = []
+    output_str = ""
+    for file in os.listdir(folder_path):
+        if file.endswith(".csv"):
+            csv_files.append(os.path.join(folder_path, file))
+    for csv_file in csv_files:
+        output_str += f"File: {csv_file}\n"
+        with open(csv_file, "r") as f:
+            for i, line in enumerate(f):
+                if i == 10:
+                    break
+                output_str += line.strip() + "\n"
+        output_str += "\n"
+    return output_str
+
+print_csv_files_in_folder(logger.experiment.log_dir)
+
+###############################################################################
+# Conclusion and possible improvements
+# ------------------------------------
+#
+# In this tutorial we have learned:
+#
+# - How to write a Trainer, including building its components and registering
+#   them in the trainer;
+# - How to code a DQN algorithm, including how to create a policy that picks
+#   up the action with the highest value with
+#   :class:`torchrl.modules.QValueNetwork`;
+# - How to build a multiprocessed data collector;
+#
+# Possible improvements to this tutorial could include:
+#
+# - Using the :class:`torchrl.data.MultiStep`
+#   post-processing. Multi-step will project an action
+#   to the :math:`n^{th}` following step, and create a discounted sum of the
+#   rewards in between. This trick can make the algorithm noticeably less
+#   myopic (although the reward is then biased). To use this, simply
+#   create the collector with
+#
+#       >>> from torchrl.data.postprocs.postprocs import MultiStep
+#       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
+#
+#   where ``n`` is the number of looking-forward steps. Pay attention to the
+#   fact that the ``gamma`` factor has to be corrected by the number of
+#   steps till the next observation when being passed to
+#   ``vec_td_lambda_advantage_estimate``:
+#
+#       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
+#
+# - A prioritized replay buffer could also be used. This will give a
+#   higher priority to samples that have the worst value accuracy.
+#   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+#   of the documentation.
+# - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
+#   for more information).
+# - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.

From 7180e6c91bc6767fa4dda7938e410c826147589f Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 09:21:27 +0100
Subject: [PATCH 75/89] amend

---
 torchrl/trainers/trainers.py             |  2 +-
 tutorials/sphinx-tutorials/coding_dqn.py | 22 ++++++++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index d8bce805487..69cc22fd672 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -145,7 +145,7 @@ def __init__(
         progress_bar: bool = True,
         seed: int = None,
         save_trainer_interval: int = 10000,
-        log_interval: int=10000,
+        log_interval: int = 10000,
         save_trainer_file: Optional[Union[str, pathlib.Path]] = None,
     ) -> None:
 
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 3583aaf01e8..3a87e6cba88 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -118,6 +118,7 @@
     UpdateWeights,
 )
 
+
 def is_notebook() -> bool:
     try:
         shell = get_ipython().__class__.__name__
@@ -130,6 +131,7 @@ def is_notebook() -> bool:
     except NameError:
         return False  # Probably standard Python interpreter
 
+
 ###############################################################################
 # Let's get started with the various pieces we need for our algorithm:
 #
@@ -184,6 +186,7 @@ def is_notebook() -> bool:
 #   the :class:`torchrl.envs.ObservationNorm` transform.
 #
 
+
 def make_env(
     parallel=False,
     obs_norm_sd=None,
@@ -224,6 +227,7 @@ def make_env(
     )
     return env
 
+
 ###############################################################################
 # Compute normalizing constants
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -237,6 +241,7 @@ def make_env(
 # not all dimensions disappear in the process:
 #
 
+
 def get_norm_stats():
     test_env = make_env()
     test_env.transform[-1].init_stats(
@@ -248,6 +253,7 @@ def get_norm_stats():
     print("state dict of the observation norm:", obs_norm_sd)
     return obs_norm_sd
 
+
 ###############################################################################
 # Building the model (Deep Q-network)
 # -----------------------------------
@@ -271,6 +277,7 @@ def get_norm_stats():
 # in the input :class:`tensordict.TensorDict`.
 #
 
+
 def make_model(dummy_env):
     cnn_kwargs = {
         "num_cells": [32, 64, 64],
@@ -295,9 +302,7 @@ def make_model(dummy_env):
     ).to(device)
     net.value[-1].bias.data.fill_(init_bias)
 
-    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(
-        device
-    )
+    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
     # init actor: because the model is composed of lazy conv/linear layers,
     # we must pass a fake batch of data through it to instantiate them.
     tensordict = dummy_env.fake_tensordict()
@@ -313,6 +318,7 @@ def make_model(dummy_env):
 
     return actor, actor_explore
 
+
 ###############################################################################
 # Collecting and storing data
 # ---------------------------
@@ -334,6 +340,7 @@ def make_model(dummy_env):
 # The only requirement of this storage is that the data passed to it at write
 # time must always have the same shape.
 
+
 def get_replay_buffer(buffer_size, n_optim, batch_size):
     replay_buffer = TensorDictReplayBuffer(
         batch_size=batch_size,
@@ -342,6 +349,7 @@ def get_replay_buffer(buffer_size, n_optim, batch_size):
     )
     return replay_buffer
 
+
 ###############################################################################
 # Data collector
 # ~~~~~~~~~~~~~~
@@ -372,6 +380,7 @@ def get_replay_buffer(buffer_size, n_optim, batch_size):
 # out training loop must account for. For simplicity, we set the devices to
 # the same value for all sub-collectors.
 
+
 def get_collector(
     obs_norm_sd,
     num_collectors,
@@ -399,6 +408,7 @@ def get_collector(
     )
     return data_collector
 
+
 ###############################################################################
 # Loss function
 # -------------
@@ -418,11 +428,13 @@ def get_collector(
 # in similar algorithms.
 #
 
+
 def get_loss_module(actor, gamma):
     loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
     target_updater = SoftUpdate(loss_module)
     return loss_module, target_updater
 
+
 ###############################################################################
 # Hyperparameters
 # ---------------
@@ -566,7 +578,7 @@ def get_loss_module(actor, gamma):
     optimizer=optimizer,
     logger=logger,
     optim_steps_per_batch=n_optim,
-    log_interval = log_interval,
+    log_interval=log_interval,
 )
 
 ###############################################################################
@@ -635,6 +647,7 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # We can now quickly check the CSVs with the results.
 
+
 def print_csv_files_in_folder(folder_path):
     """
     Find all CSV files in a folder and return the first 10 lines of each file as a string.
@@ -660,6 +673,7 @@ def print_csv_files_in_folder(folder_path):
         output_str += "\n"
     return output_str
 
+
 print_csv_files_in_folder(logger.experiment.log_dir)
 
 ###############################################################################

From 6223494262353d3c8920733aa98cf9f0c2de54a9 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 12:36:38 +0100
Subject: [PATCH 76/89] init

---
 test/test_collector.py           | 34 ++++++++++++++++++++++++++++++++
 torchrl/collectors/collectors.py | 23 ++++++++++++++++-----
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/test/test_collector.py b/test/test_collector.py
index 4dc92491fe7..dd78d68869b 100644
--- a/test/test_collector.py
+++ b/test/test_collector.py
@@ -1293,6 +1293,40 @@ def env_fn(seed):
             assert trajectory_ids[trajectory_ids_mask].numel() < frames_per_batch
 
 
+def test_maxframes_error():
+    env = TransformedEnv(CountingEnv(), StepCounter(2))
+    _ = SyncDataCollector(
+        env, RandomPolicy(env.action_spec), total_frames=10_000, frames_per_batch=1000
+    )
+    with pytest.raises(ValueError):
+        _ = SyncDataCollector(
+            env,
+            RandomPolicy(env.action_spec),
+            total_frames=10_000,
+            frames_per_batch=1000,
+            max_frames_per_traj=2,
+        )
+
+
+def test_reset_heterogeneous_envs():
+    env1 = lambda: TransformedEnv(CountingEnv(), StepCounter(2))
+    env2 = lambda: TransformedEnv(CountingEnv(), StepCounter(3))
+    env = SerialEnv(2, [env1, env2])
+    c = SyncDataCollector(
+        env, RandomPolicy(env.action_spec), total_frames=10_000, frames_per_batch=1000
+    )
+    for data in c:  # noqa: B007
+        break
+    assert (
+        data[0]["next", "truncated"].squeeze()
+        == torch.tensor([False, True]).repeat(250)[:500]
+    ).all()
+    assert (
+        data[1]["next", "truncated"].squeeze()
+        == torch.tensor([False, False, True]).repeat(168)[:500]
+    ).all()
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index dd2505a78b3..98f39912c89 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -534,6 +534,18 @@ def __init__(
         self.env: EnvBase = self.env.to(self.device)
         self.max_frames_per_traj = max_frames_per_traj
         if self.max_frames_per_traj > 0:
+            # let's check that there is no StepCounter yet
+            for key in self.env.output_spec.keys(True, True):
+                if isinstance(key, str):
+                    key = (key,)
+                if "truncated" in key:
+                    raise ValueError(
+                        "A 'truncated' key is already present in the environment "
+                        "and the 'max_frames_per_traj' argument may conflict with "
+                        "a 'StepCounter' that has already been set. "
+                        "Possible solutions: Set max_frames_per_traj to 0 or "
+                        "remove the StepCounter limit from the environment transforms."
+                    )
             env = self.env = TransformedEnv(
                 self.env, StepCounter(max_steps=self.max_frames_per_traj)
             )
@@ -759,11 +771,12 @@ def _step_and_maybe_reset(self) -> None:
                 _reset = None
                 td_reset = None
             td_reset = self.env.reset(td_reset)
-            self._tensordict.update(td_reset, inplace=True)
-            done = self._tensordict.get("done")
-            if (_reset is None and done.any()) or (
-                _reset is not None and done[_reset].any()
-            ):
+            reset_idx = done_or_terminated.squeeze(-1)
+            self._tensordict.get_sub_tensordict(reset_idx).update(
+                td_reset[reset_idx], inplace=True
+            )
+            done = self._tensordict[reset_idx].get("done")
+            if (_reset is None and done.any()) or (_reset is not None and done.any()):
                 raise RuntimeError(
                     f"Env {self.env} was done after reset on specified '_reset' dimensions. This is (currently) not allowed."
                 )

From b0d9629d3446c7277785d100fd65902deb374e75 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 13:19:42 +0100
Subject: [PATCH 77/89] empty commit


From 822f518263646c0c608d4a57874631338d556c72 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 13:26:29 +0100
Subject: [PATCH 78/89] amend

---
 torchrl/collectors/collectors.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index 98f39912c89..ca479cb2aa5 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -771,7 +771,9 @@ def _step_and_maybe_reset(self) -> None:
                 _reset = None
                 td_reset = None
             td_reset = self.env.reset(td_reset)
-            reset_idx = done_or_terminated.squeeze(-1)
+            reset_idx = done_or_terminated
+            while reset_idx.ndim > self._tensordict.ndim:
+                reset_idx = reset_idx.any(-1)
             self._tensordict.get_sub_tensordict(reset_idx).update(
                 td_reset[reset_idx], inplace=True
             )

From 4ad5fb9c4b154bc93c82a9325a794d6820ae40e7 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 13:29:34 +0100
Subject: [PATCH 79/89] amend

---
 torchrl/collectors/collectors.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
index ca479cb2aa5..de84a5f4dc6 100644
--- a/torchrl/collectors/collectors.py
+++ b/torchrl/collectors/collectors.py
@@ -771,21 +771,18 @@ def _step_and_maybe_reset(self) -> None:
                 _reset = None
                 td_reset = None
             td_reset = self.env.reset(td_reset)
-            reset_idx = done_or_terminated
-            while reset_idx.ndim > self._tensordict.ndim:
-                reset_idx = reset_idx.any(-1)
-            self._tensordict.get_sub_tensordict(reset_idx).update(
-                td_reset[reset_idx], inplace=True
+            traj_done_or_terminated = done_or_terminated.sum(
+                tuple(range(self._tensordict.batch_dims, done_or_terminated.ndim)),
+                dtype=torch.bool,
+            )
+            self._tensordict.get_sub_tensordict(traj_done_or_terminated).update(
+                td_reset[traj_done_or_terminated], inplace=True
             )
-            done = self._tensordict[reset_idx].get("done")
+            done = self._tensordict[traj_done_or_terminated].get("done")
             if (_reset is None and done.any()) or (_reset is not None and done.any()):
                 raise RuntimeError(
                     f"Env {self.env} was done after reset on specified '_reset' dimensions. This is (currently) not allowed."
                 )
-            traj_done_or_terminated = done_or_terminated.sum(
-                tuple(range(self._tensordict.batch_dims, done_or_terminated.ndim)),
-                dtype=torch.bool,
-            )
             traj_ids[traj_done_or_terminated] = traj_ids.max() + torch.arange(
                 1, traj_done_or_terminated.sum() + 1, device=traj_ids.device
             )

From ff54f0a2df063c1ac3e1cec9f6413292f1e7eed7 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 14:45:11 +0100
Subject: [PATCH 80/89] amend

---
 docs/source/reference/modules.rst        |  2 +-
 torchrl/trainers/trainers.py             | 11 +++++
 tutorials/sphinx-tutorials/coding_dqn.py | 63 +++++++++++-------------
 3 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
index 7a52329e02f..fb1eebf6b89 100644
--- a/docs/source/reference/modules.rst
+++ b/docs/source/reference/modules.rst
@@ -32,7 +32,7 @@ TensorDict modules
 
 Hooks
 -----
-.. currentmodule:: torchrl.modules.tensordict_module.actors
+.. currentmodule:: torchrl.modules
 
 .. autosummary::
     :toctree: generated/
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 69cc22fd672..070679acd52 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -71,6 +71,17 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
 
     @abc.abstractmethod
     def register(self, trainer: Trainer, name: str):
+        """Registers the hook in the trainer at a default location.
+
+        Args:
+            trainer (Trainer): the trainer where the hook must be registered.
+            name (str): the name of the hook.
+
+        .. note::
+          To register the hook at another location than the default, use
+          :meth:`torchrl.trainers.Trainer.register_op`.
+
+        """
         raise NotImplementedError
 
 
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 3a87e6cba88..47268647e71 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -83,6 +83,7 @@
 # of this algorithm.
 
 # sphinx_gallery_start_ignore
+import tempfile
 import warnings
 
 warnings.filterwarnings("ignore")
@@ -159,19 +160,19 @@ def is_notebook() -> bool:
 # We will be using five transforms:
 #
 # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-# - :class:`torchrl.envs.ToTensorImage` will convert a ``[W, H, C]`` uint8
+# - :class:`torchrl.envs.transforms.ToTensorImage` will convert a ``[W, H, C]`` uint8
 #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
 #   ``[C, W, H]``;
-# - :class:`torchrl.envs.RewardScaling` to reduce the scale of the return;
-# - :class:`torchrl.envs.GrayScale` will turn our image into grayscale;
-# - :class:`torchrl.envs.Resize` will resize the image in a 64x64 format;
-# - :class:`torchrl.envs.CatFrames` will concatenate an arbitrary number of
+# - :class:`torchrl.envs.transforms.RewardScaling` to reduce the scale of the return;
+# - :class:`torchrl.envs.transforms.GrayScale` will turn our image into grayscale;
+# - :class:`torchrl.envs.transforms.Resize` will resize the image in a 64x64 format;
+# - :class:`torchrl.envs.transforms.CatFrames` will concatenate an arbitrary number of
 #   successive frames (``N=4``) in a single tensor along the channel dimension.
 #   This is useful as a single image does not carry information about the
 #   motion of the cartpole. Some memory about past observations and actions
 #   is needed, either via a recurrent neural network or using a stack of
 #   frames.
-# - :class:`torchrl.envs.ObservationNorm` which will normalize our observations
+# - :class:`torchrl.envs.transforms.ObservationNorm` which will normalize our observations
 #   given some custom summary statistics.
 #
 # In practice, our environment builder has two arguments:
@@ -265,9 +266,10 @@ def get_norm_stats():
 #
 # .. math::
 #
-#    val = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+#    \mathbb{v} = b(obs) + v(obs) - \mathbb{E}[v(obs)]
 #
-# where :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
+# where :math:`\mathbb{v}` is our vector of action values,
+# :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
 # :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
 # :math:`n = \# obs` and :math:`m = \# actions`.
 #
@@ -354,8 +356,8 @@ def get_replay_buffer(buffer_size, n_optim, batch_size):
 # Data collector
 # ~~~~~~~~~~~~~~
 #
-# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>` and
-# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`, we will be using
+# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
+# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
 # a data collector as a dataloader in the outer loop.
 #
 # We choose the following configuration: we will be running a series of
@@ -473,7 +475,9 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # Data collection and replay buffer
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Values to be used for proper training have been commented.
+#
+# .. note::
+#   Values to be used for proper training have been commented.
 #
 # Total frames collected in the environment. In other implementations, the
 # user defines a maximum number of episodes.
@@ -562,7 +566,9 @@ def get_loss_module(actor, gamma):
     loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
 )
 exp_name = f"dqn_exp_{uuid.uuid1()}"
-logger = CSVLogger(exp_name=exp_name, log_dir="./")
+tmpdir = tempfile.TemporaryDirectory()
+logger = CSVLogger(exp_name=exp_name, log_dir=tmpdir.name)
+warnings.warn(f"log dir: {logger.experiment.log_dir}")
 
 ###############################################################################
 # We can control how often the scalars should be logged. Here we set this
@@ -616,7 +622,7 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
 #   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
-#   In this case, a location must be explicitely passed (). This method gives
+#   In this case, a location must be explicitly passed (). This method gives
 #   more control over the location of the hook but it also requires more
 #   understanding of the Trainer mechanism.
 #   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
@@ -626,9 +632,11 @@ def get_loss_module(actor, gamma):
 
 ###############################################################################
 # We can log the training rewards too. Note that this is of limited interest
-# with CartPole, as rewards are always 1. The discounted sum of rewards is miximised
-# not by getting higher rewards but by keeping the cart-pole alive for longer.
-# This will be reflected by the `total_rewards` value displayed in the progress bar.
+# with CartPole, as rewards are always 1. The discounted sum of rewards is
+# maximised not by getting higher rewards but by keeping the cart-pole alive
+# for longer.
+# This will be reflected by the `total_rewards` value displayed in the
+# progress bar.
 #
 log_reward = LogReward(log_pbar=True)
 log_reward.register(trainer)
@@ -636,7 +644,8 @@ def get_loss_module(actor, gamma):
 ###############################################################################
 # .. note::
 #   It is possible to link multiple optimizers to the trainer if needed.
-#   In this case, each optimizer will be tied to a field in the loss dictionary.
+#   In this case, each optimizer will be tied to a field in the loss
+#   dictionary.
 #   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
 #
 # Here we are, ready to train our algorithm! A simple call to
@@ -691,26 +700,10 @@ def print_csv_files_in_folder(folder_path):
 #
 # Possible improvements to this tutorial could include:
 #
-# - Using the :class:`torchrl.data.MultiStep`
-#   post-processing. Multi-step will project an action
-#   to the :math:`n^{th}` following step, and create a discounted sum of the
-#   rewards in between. This trick can make the algorithm noticeably less
-#   myopic (although the reward is then biased). To use this, simply
-#   create the collector with
-#
-#       >>> from torchrl.data.postprocs.postprocs import MultiStep
-#       >>> collector = CollectorClass(..., postproc=MultiStep(gamma, n))
-#
-#   where ``n`` is the number of looking-forward steps. Pay attention to the
-#   fact that the ``gamma`` factor has to be corrected by the number of
-#   steps till the next observation when being passed to
-#   ``vec_td_lambda_advantage_estimate``:
-#
-#       >>> gamma = gamma ** tensordict["steps_to_next_obs"]
-#
 # - A prioritized replay buffer could also be used. This will give a
 #   higher priority to samples that have the worst value accuracy.
-#   Learn more on the `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+#   Learn more on the
+#   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
 #   of the documentation.
 # - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
 #   for more information).

From d978824d85465ebfac3baab81028f09eb3b41315 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 15:33:24 +0100
Subject: [PATCH 81/89] amend

---
 torchrl/record/loggers/csv.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchrl/record/loggers/csv.py b/torchrl/record/loggers/csv.py
index 69120bf1110..90aa41a742e 100644
--- a/torchrl/record/loggers/csv.py
+++ b/torchrl/record/loggers/csv.py
@@ -74,6 +74,7 @@ def __init__(self, exp_name: str, log_dir: Optional[str] = None) -> None:
         super().__init__(exp_name=exp_name, log_dir=log_dir)
 
         self._has_imported_moviepy = False
+        print(f"self.log_dir: {self.experiment.log_dir}")
 
     def _create_experiment(self) -> "CSVExperiment":
         """Creates a CSV experiment."""

From cf1ba97f74b768c76e5eb6cc7f730a929c088214 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 16:43:57 +0100
Subject: [PATCH 82/89] amend

---
 docs/source/reference/trainers.rst            | 2 +-
 torchrl/modules/__init__.py                   | 2 ++
 torchrl/modules/tensordict_module/__init__.py | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/source/reference/trainers.rst b/docs/source/reference/trainers.rst
index a0c0056f2f7..d14cfae12ee 100644
--- a/docs/source/reference/trainers.rst
+++ b/docs/source/reference/trainers.rst
@@ -73,7 +73,7 @@ Hooks can be split into 3 categories: **data processing** (:obj:`"batch_process"
 - **Data processing** hooks update a tensordict of data. Hooks :obj:`__call__` method should accept
   a :obj:`TensorDict` object as input and update it given some strategy.
   Examples of such hooks include Replay Buffer extension (:obj:`ReplayBufferTrainer.extend`), data normalization (including normalization
-  constants update), data subsampling (:doc:`BatchSubSampler`) and such.
+  constants update), data subsampling (:class:`torchrl.trainers.BatchSubSampler`) and such.
 
 - **Logging** hooks take a batch of data presented as a :obj:`TensorDict` and write in the logger
   some information retrieved from that data. Examples include the :obj:`Recorder` hook, the reward
diff --git a/torchrl/modules/__init__.py b/torchrl/modules/__init__.py
index 5a3f4fdbb2b..7c26b7b1b8f 100644
--- a/torchrl/modules/__init__.py
+++ b/torchrl/modules/__init__.py
@@ -41,10 +41,12 @@
     ActorValueOperator,
     AdditiveGaussianWrapper,
     DistributionalQValueActor,
+    DistributionalQValueHook,
     EGreedyWrapper,
     OrnsteinUhlenbeckProcessWrapper,
     ProbabilisticActor,
     QValueActor,
+    QValueHook,
     SafeModule,
     SafeProbabilisticModule,
     SafeProbabilisticTensorDictSequential,
diff --git a/torchrl/modules/tensordict_module/__init__.py b/torchrl/modules/tensordict_module/__init__.py
index 6686eb6b602..d74634c153a 100644
--- a/torchrl/modules/tensordict_module/__init__.py
+++ b/torchrl/modules/tensordict_module/__init__.py
@@ -9,8 +9,10 @@
     ActorCriticWrapper,
     ActorValueOperator,
     DistributionalQValueActor,
+    DistributionalQValueHook,
     ProbabilisticActor,
     QValueActor,
+    QValueHook,
     ValueOperator,
 )
 from .common import SafeModule

From 0e4e6b4bfbaf69a530e54179090421c44196aa0a Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Tue, 4 Apr 2023 18:30:51 +0100
Subject: [PATCH 83/89] theme

---
 docs/source/_static/js/theme.js | 3824 +------------------------------
 1 file changed, 2 insertions(+), 3822 deletions(-)

diff --git a/docs/source/_static/js/theme.js b/docs/source/_static/js/theme.js
index 219443ee11e..297154d9ed7 100644
--- a/docs/source/_static/js/theme.js
+++ b/docs/source/_static/js/theme.js
@@ -692,7 +692,7 @@ window.sideMenus = {
   }
 };
 
-},{}],11:[function(require,module,exports){
+},{}],"pytorch-sphinx-theme":[function(require,module,exports){
 var jQuery = (typeof(window) != 'undefined') ? window.jQuery : require('jquery');
 
 // Sphinx theme nav state
@@ -1125,3824 +1125,4 @@ $(window).scroll(function () {
 });
 
 
-},{"jquery":"jquery"}],"pytorch-sphinx-theme":[function(require,module,exports){
-require=(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
-window.utilities = {
-  scrollTop: function() {
-    var supportPageOffset = window.pageXOffset !== undefined;
-    var isCSS1Compat = ((document.compatMode || "") === "CSS1Compat");
-    var scrollLeft = supportPageOffset ? window.pageXOffset : isCSS1Compat ? document.documentElement.scrollLeft : document.body.scrollLeft;
-    return supportPageOffset ? window.pageYOffset : isCSS1Compat ? document.documentElement.scrollTop : document.body.scrollTop;
-  },
-
-  // Modified from https://stackoverflow.com/a/27078401
-  throttle: function(func, wait, options) {
-    var context, args, result;
-    var timeout = null;
-    var previous = 0;
-    if (!options) options = {};
-    var later = function() {
-      previous = options.leading === false ? 0 : Date.now();
-      timeout = null;
-      result = func.apply(context, args);
-      if (!timeout) context = args = null;
-    };
-    return function() {
-      var now = Date.now();
-      if (!previous && options.leading === false) previous = now;
-      var remaining = wait - (now - previous);
-      context = this;
-      args = arguments;
-      if (remaining <= 0 || remaining > wait) {
-        if (timeout) {
-          clearTimeout(timeout);
-          timeout = null;
-        }
-        previous = now;
-        result = func.apply(context, args);
-        if (!timeout) context = args = null;
-      } else if (!timeout && options.trailing !== false) {
-        timeout = setTimeout(later, remaining);
-      }
-      return result;
-    };
-  },
-
-  closest: function (el, selector) {
-    var matchesFn;
-
-    // find vendor prefix
-    ['matches','webkitMatchesSelector','mozMatchesSelector','msMatchesSelector','oMatchesSelector'].some(function(fn) {
-      if (typeof document.body[fn] == 'function') {
-        matchesFn = fn;
-        return true;
-      }
-      return false;
-    });
-
-    var parent;
-
-    // traverse parents
-    while (el) {
-      parent = el.parentElement;
-      if (parent && parent[matchesFn](selector)) {
-        return parent;
-      }
-      el = parent;
-    }
-
-    return null;
-  },
-
-  // Modified from https://stackoverflow.com/a/18953277
-  offset: function(elem) {
-    if (!elem) {
-      return;
-    }
-
-    rect = elem.getBoundingClientRect();
-
-    // Make sure element is not hidden (display: none) or disconnected
-    if (rect.width || rect.height || elem.getClientRects().length) {
-      var doc = elem.ownerDocument;
-      var docElem = doc.documentElement;
-
-      return {
-        top: rect.top + window.pageYOffset - docElem.clientTop,
-        left: rect.left + window.pageXOffset - docElem.clientLeft
-      };
-    }
-  },
-
-  headersHeight: function() {
-    if (document.getElementById("pytorch-left-menu").classList.contains("make-fixed")) {
-      return document.getElementById("pytorch-page-level-bar").offsetHeight;
-    } else {
-      return document.getElementById("header-holder").offsetHeight +
-             document.getElementById("pytorch-page-level-bar").offsetHeight;
-    }
-  },
-
-  windowHeight: function() {
-    return window.innerHeight ||
-           document.documentElement.clientHeight ||
-           document.body.clientHeight;
-  }
-}
-
-},{}],2:[function(require,module,exports){
-var cookieBanner = {
-  init: function() {
-    cookieBanner.bind();
-
-    var cookieExists = cookieBanner.cookieExists();
-
-    if (!cookieExists) {
-      cookieBanner.setCookie();
-      cookieBanner.showCookieNotice();
-    }
-  },
-
-  bind: function() {
-    $(".close-button").on("click", cookieBanner.hideCookieNotice);
-  },
-
-  cookieExists: function() {
-    var cookie = localStorage.getItem("returningPytorchUser");
-
-    if (cookie) {
-      return true;
-    } else {
-      return false;
-    }
-  },
-
-  setCookie: function() {
-    localStorage.setItem("returningPytorchUser", true);
-  },
-
-  showCookieNotice: function() {
-    $(".cookie-banner-wrapper").addClass("is-visible");
-  },
-
-  hideCookieNotice: function() {
-    $(".cookie-banner-wrapper").removeClass("is-visible");
-  }
-};
-
-$(function() {
-  cookieBanner.init();
-});
-
-},{}],3:[function(require,module,exports){
-window.filterTags = {
-  bind: function() {
-    var options = {
-      valueNames: [{ data: ["tags"] }],
-      page: "6",
-      pagination: true
-    };
-
-    var tutorialList = new List("tutorial-cards", options);
-
-    function filterSelectedTags(cardTags, selectedTags) {
-      return cardTags.some(function(tag) {
-        return selectedTags.some(function(selectedTag) {
-          return selectedTag == tag;
-        });
-      });
-    }
-
-    function updateList() {
-      var selectedTags = [];
-
-      $(".selected").each(function() {
-        selectedTags.push($(this).data("tag"));
-      });
-
-      tutorialList.filter(function(item) {
-        var cardTags;
-
-        if (item.values().tags == null) {
-          cardTags = [""];
-        } else {
-          cardTags = item.values().tags.split(",");
-        }
-
-        if (selectedTags.length == 0) {
-          return true;
-        } else {
-          return filterSelectedTags(cardTags, selectedTags);
-        }
-      });
-    }
-
-    $(".filter-btn").on("click", function() {
-      if ($(this).data("tag") == "all") {
-        $(this).addClass("all-tag-selected");
-        $(".filter").removeClass("selected");
-      } else {
-        $(this).toggleClass("selected");
-        $("[data-tag='all']").removeClass("all-tag-selected");
-      }
-
-      // If no tags are selected then highlight the 'All' tag
-
-      if (!$(".selected")[0]) {
-        $("[data-tag='all']").addClass("all-tag-selected");
-      }
-
-      updateList();
-    });
-  }
-};
-
-},{}],4:[function(require,module,exports){
-// Modified from https://stackoverflow.com/a/32396543
-window.highlightNavigation = {
-  navigationListItems: document.querySelectorAll("#pytorch-right-menu li"),
-  sections: document.querySelectorAll(".pytorch-article .section"),
-  sectionIdTonavigationLink: {},
-
-  bind: function() {
-    if (!sideMenus.displayRightMenu) {
-      return;
-    };
-
-    for (var i = 0; i < highlightNavigation.sections.length; i++) {
-      var id = highlightNavigation.sections[i].id;
-      highlightNavigation.sectionIdTonavigationLink[id] =
-        document.querySelectorAll('#pytorch-right-menu li a[href="#' + id + '"]')[0];
-    }
-
-    $(window).scroll(utilities.throttle(highlightNavigation.highlight, 100));
-  },
-
-  highlight: function() {
-    var rightMenu = document.getElementById("pytorch-right-menu");
-
-    // If right menu is not on the screen don't bother
-    if (rightMenu.offsetWidth === 0 && rightMenu.offsetHeight === 0) {
-      return;
-    }
-
-    var scrollPosition = utilities.scrollTop();
-    var OFFSET_TOP_PADDING = 25;
-    var offset = document.getElementById("header-holder").offsetHeight +
-                 document.getElementById("pytorch-page-level-bar").offsetHeight +
-                 OFFSET_TOP_PADDING;
-
-    var sections = highlightNavigation.sections;
-
-    for (var i = (sections.length - 1); i >= 0; i--) {
-      var currentSection = sections[i];
-      var sectionTop = utilities.offset(currentSection).top;
-
-      if (scrollPosition >= sectionTop - offset) {
-        var navigationLink = highlightNavigation.sectionIdTonavigationLink[currentSection.id];
-        var navigationListItem = utilities.closest(navigationLink, "li");
-
-        if (navigationListItem && !navigationListItem.classList.contains("active")) {
-          for (var i = 0; i < highlightNavigation.navigationListItems.length; i++) {
-            var el = highlightNavigation.navigationListItems[i];
-            if (el.classList.contains("active")) {
-              el.classList.remove("active");
-            }
-          }
-
-          navigationListItem.classList.add("active");
-
-          // Scroll to active item. Not a requested feature but we could revive it. Needs work.
-
-          // var menuTop = $("#pytorch-right-menu").position().top;
-          // var itemTop = navigationListItem.getBoundingClientRect().top;
-          // var TOP_PADDING = 20
-          // var newActiveTop = $("#pytorch-side-scroll-right").scrollTop() + itemTop - menuTop - TOP_PADDING;
-
-          // $("#pytorch-side-scroll-right").animate({
-          //   scrollTop: newActiveTop
-          // }, 100);
-        }
-
-        break;
-      }
-    }
-  }
-};
-
-},{}],5:[function(require,module,exports){
-window.mainMenuDropdown = {
-  bind: function() {
-    $("[data-toggle='ecosystem-dropdown']").on("click", function() {
-      toggleDropdown($(this).attr("data-toggle"));
-    });
-
-    $("[data-toggle='resources-dropdown']").on("click", function() {
-      toggleDropdown($(this).attr("data-toggle"));
-    });
-
-    function toggleDropdown(menuToggle) {
-      var showMenuClass = "show-menu";
-      var menuClass = "." + menuToggle + "-menu";
-
-      if ($(menuClass).hasClass(showMenuClass)) {
-        $(menuClass).removeClass(showMenuClass);
-      } else {
-        $("[data-toggle=" + menuToggle + "].show-menu").removeClass(
-          showMenuClass
-        );
-        $(menuClass).addClass(showMenuClass);
-      }
-    }
-  }
-};
-
-},{}],6:[function(require,module,exports){
-window.mobileMenu = {
-  bind: function() {
-    $("[data-behavior='open-mobile-menu']").on('click', function(e) {
-      e.preventDefault();
-      $(".mobile-main-menu").addClass("open");
-      $("body").addClass('no-scroll');
-
-      mobileMenu.listenForResize();
-    });
-
-    $("[data-behavior='close-mobile-menu']").on('click', function(e) {
-      e.preventDefault();
-      mobileMenu.close();
-    });
-  },
-
-  listenForResize: function() {
-    $(window).on('resize.ForMobileMenu', function() {
-      if ($(this).width() > 768) {
-        mobileMenu.close();
-      }
-    });
-  },
-
-  close: function() {
-    $(".mobile-main-menu").removeClass("open");
-    $("body").removeClass('no-scroll');
-    $(window).off('resize.ForMobileMenu');
-  }
-};
-
-},{}],7:[function(require,module,exports){
-window.mobileTOC = {
-  bind: function() {
-    $("[data-behavior='toggle-table-of-contents']").on("click", function(e) {
-      e.preventDefault();
-
-      var $parent = $(this).parent();
-
-      if ($parent.hasClass("is-open")) {
-        $parent.removeClass("is-open");
-        $(".pytorch-left-menu").slideUp(200, function() {
-          $(this).css({display: ""});
-        });
-      } else {
-        $parent.addClass("is-open");
-        $(".pytorch-left-menu").slideDown(200);
-      }
-    });
-  }
-}
-
-},{}],8:[function(require,module,exports){
-window.pytorchAnchors = {
-  bind: function() {
-    // Replace Sphinx-generated anchors with anchorjs ones
-    $(".headerlink").text("");
-
-    window.anchors.add(".pytorch-article .headerlink");
-
-    $(".anchorjs-link").each(function() {
-      var $headerLink = $(this).closest(".headerlink");
-      var href = $headerLink.attr("href");
-      var clone = this.outerHTML;
-
-      $clone = $(clone).attr("href", href);
-      $headerLink.before($clone);
-      $headerLink.remove();
-    });
-  }
-};
-
-},{}],9:[function(require,module,exports){
-// Modified from https://stackoverflow.com/a/13067009
-// Going for a JS solution to scrolling to an anchor so we can benefit from
-// less hacky css and smooth scrolling.
-
-window.scrollToAnchor = {
-  bind: function() {
-    var document = window.document;
-    var history = window.history;
-    var location = window.location
-    var HISTORY_SUPPORT = !!(history && history.pushState);
-
-    var anchorScrolls = {
-      ANCHOR_REGEX: /^#[^ ]+$/,
-      offsetHeightPx: function() {
-        var OFFSET_HEIGHT_PADDING = 20;
-        // TODO: this is a little janky. We should try to not rely on JS for this
-        return utilities.headersHeight() + OFFSET_HEIGHT_PADDING;
-      },
-
-      /**
-       * Establish events, and fix initial scroll position if a hash is provided.
-       */
-      init: function() {
-        this.scrollToCurrent();
-        // This interferes with clicks below it, causing a double fire
-        // $(window).on('hashchange', $.proxy(this, 'scrollToCurrent'));
-        $('body').on('click', 'a', $.proxy(this, 'delegateAnchors'));
-        $('body').on('click', '#pytorch-right-menu li span', $.proxy(this, 'delegateSpans'));
-      },
-
-      /**
-       * Return the offset amount to deduct from the normal scroll position.
-       * Modify as appropriate to allow for dynamic calculations
-       */
-      getFixedOffset: function() {
-        return this.offsetHeightPx();
-      },
-
-      /**
-       * If the provided href is an anchor which resolves to an element on the
-       * page, scroll to it.
-       * @param  {String} href
-       * @return {Boolean} - Was the href an anchor.
-       */
-      scrollIfAnchor: function(href, pushToHistory) {
-        var match, anchorOffset;
-
-        if(!this.ANCHOR_REGEX.test(href)) {
-          return false;
-        }
-
-        match = document.getElementById(href.slice(1));
-
-        if(match) {
-          var anchorOffset = $(match).offset().top - this.getFixedOffset();
-
-          $('html, body').scrollTop(anchorOffset);
-
-          // Add the state to history as-per normal anchor links
-          if(HISTORY_SUPPORT && pushToHistory) {
-            history.pushState({}, document.title, location.pathname + href);
-          }
-        }
-
-        return !!match;
-      },
-
-      /**
-       * Attempt to scroll to the current location's hash.
-       */
-      scrollToCurrent: function(e) {
-        if(this.scrollIfAnchor(window.location.hash) && e) {
-          e.preventDefault();
-        }
-      },
-
-      delegateSpans: function(e) {
-        var elem = utilities.closest(e.target, "a");
-
-        if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-          e.preventDefault();
-        }
-      },
-
-      /**
-       * If the click event's target was an anchor, fix the scroll position.
-       */
-      delegateAnchors: function(e) {
-        var elem = e.target;
-
-        if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-          e.preventDefault();
-        }
-      }
-    };
-
-    $(document).ready($.proxy(anchorScrolls, 'init'));
-  }
-};
-
-},{}],10:[function(require,module,exports){
-window.sideMenus = {
-  rightMenuIsOnScreen: function() {
-    return document.getElementById("pytorch-content-right").offsetParent !== null;
-  },
-
-  isFixedToBottom: false,
-
-  bind: function() {
-    sideMenus.handleLeftMenu();
-
-    var rightMenuLinks = document.querySelectorAll("#pytorch-right-menu li");
-    var rightMenuHasLinks = rightMenuLinks.length > 1;
-
-    if (!rightMenuHasLinks) {
-      for (var i = 0; i < rightMenuLinks.length; i++) {
-        rightMenuLinks[i].style.display = "none";
-      }
-    }
-
-    if (rightMenuHasLinks) {
-      // Don't show the Shortcuts menu title text unless there are menu items
-      document.getElementById("pytorch-shortcuts-wrapper").style.display = "block";
-
-      // We are hiding the titles of the pages in the right side menu but there are a few
-      // pages that include other pages in the right side menu (see 'torch.nn' in the docs)
-      // so if we exclude those it looks confusing. Here we add a 'title-link' class to these
-      // links so we can exclude them from normal right side menu link operations
-      var titleLinks = document.querySelectorAll(
-        "#pytorch-right-menu #pytorch-side-scroll-right \
-         > ul > li > a.reference.internal"
-      );
-
-      for (var i = 0; i < titleLinks.length; i++) {
-        var link = titleLinks[i];
-
-        link.classList.add("title-link");
-
-        if (
-          link.nextElementSibling &&
-          link.nextElementSibling.tagName === "UL" &&
-          link.nextElementSibling.children.length > 0
-        ) {
-          link.classList.add("has-children");
-        }
-      }
-
-      // Add + expansion signifiers to normal right menu links that have sub menus
-      var menuLinks = document.querySelectorAll(
-        "#pytorch-right-menu ul li ul li a.reference.internal"
-      );
-
-      for (var i = 0; i < menuLinks.length; i++) {
-        if (
-          menuLinks[i].nextElementSibling &&
-          menuLinks[i].nextElementSibling.tagName === "UL"
-        ) {
-          menuLinks[i].classList.add("not-expanded");
-        }
-      }
-
-      // If a hash is present on page load recursively expand menu items leading to selected item
-      var linkWithHash =
-        document.querySelector(
-          "#pytorch-right-menu a[href=\"" + window.location.hash + "\"]"
-        );
-
-      if (linkWithHash) {
-        // Expand immediate sibling list if present
-        if (
-          linkWithHash.nextElementSibling &&
-          linkWithHash.nextElementSibling.tagName === "UL" &&
-          linkWithHash.nextElementSibling.children.length > 0
-        ) {
-          linkWithHash.nextElementSibling.style.display = "block";
-          linkWithHash.classList.add("expanded");
-        }
-
-        // Expand ancestor lists if any
-        sideMenus.expandClosestUnexpandedParentList(linkWithHash);
-      }
-
-      // Bind click events on right menu links
-      $("#pytorch-right-menu a.reference.internal").on("click", function() {
-        if (this.classList.contains("expanded")) {
-          this.nextElementSibling.style.display = "none";
-          this.classList.remove("expanded");
-          this.classList.add("not-expanded");
-        } else if (this.classList.contains("not-expanded")) {
-          this.nextElementSibling.style.display = "block";
-          this.classList.remove("not-expanded");
-          this.classList.add("expanded");
-        }
-      });
-
-      sideMenus.handleRightMenu();
-    }
-
-    $(window).on('resize scroll', function(e) {
-      sideMenus.handleNavBar();
-
-      sideMenus.handleLeftMenu();
-
-      if (sideMenus.rightMenuIsOnScreen()) {
-        sideMenus.handleRightMenu();
-      }
-    });
-  },
-
-  leftMenuIsFixed: function() {
-    return document.getElementById("pytorch-left-menu").classList.contains("make-fixed");
-  },
-
-  handleNavBar: function() {
-    var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-    // If we are scrolled past the main navigation header fix the sub menu bar to top of page
-    if (utilities.scrollTop() >= mainHeaderHeight) {
-      document.getElementById("pytorch-left-menu").classList.add("make-fixed");
-      document.getElementById("pytorch-page-level-bar").classList.add("left-menu-is-fixed");
-    } else {
-      document.getElementById("pytorch-left-menu").classList.remove("make-fixed");
-      document.getElementById("pytorch-page-level-bar").classList.remove("left-menu-is-fixed");
-    }
-  },
-
-  expandClosestUnexpandedParentList: function (el) {
-    var closestParentList = utilities.closest(el, "ul");
-
-    if (closestParentList) {
-      var closestParentLink = closestParentList.previousElementSibling;
-      var closestParentLinkExists = closestParentLink &&
-                                    closestParentLink.tagName === "A" &&
-                                    closestParentLink.classList.contains("reference");
-
-      if (closestParentLinkExists) {
-        // Don't add expansion class to any title links
-         if (closestParentLink.classList.contains("title-link")) {
-           return;
-         }
-
-        closestParentList.style.display = "block";
-        closestParentLink.classList.remove("not-expanded");
-        closestParentLink.classList.add("expanded");
-        sideMenus.expandClosestUnexpandedParentList(closestParentLink);
-      }
-    }
-  },
-
-  handleLeftMenu: function () {
-    var windowHeight = utilities.windowHeight();
-    var topOfFooterRelativeToWindow = document.getElementById("docs-tutorials-resources").getBoundingClientRect().top;
-
-    if (topOfFooterRelativeToWindow >= windowHeight) {
-      document.getElementById("pytorch-left-menu").style.height = "100%";
-    } else {
-      var howManyPixelsOfTheFooterAreInTheWindow = windowHeight - topOfFooterRelativeToWindow;
-      var leftMenuDifference = howManyPixelsOfTheFooterAreInTheWindow;
-      document.getElementById("pytorch-left-menu").style.height = (windowHeight - leftMenuDifference) + "px";
-    }
-  },
-
-  handleRightMenu: function() {
-    var rightMenuWrapper = document.getElementById("pytorch-content-right");
-    var rightMenu = document.getElementById("pytorch-right-menu");
-    var rightMenuList = rightMenu.getElementsByTagName("ul")[0];
-    var article = document.getElementById("pytorch-article");
-    var articleHeight = article.offsetHeight;
-    var articleBottom = utilities.offset(article).top + articleHeight;
-    var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-    if (utilities.scrollTop() < mainHeaderHeight) {
-      rightMenuWrapper.style.height = "100%";
-      rightMenu.style.top = 0;
-      rightMenu.classList.remove("scrolling-fixed");
-      rightMenu.classList.remove("scrolling-absolute");
-    } else {
-      if (rightMenu.classList.contains("scrolling-fixed")) {
-        var rightMenuBottom =
-          utilities.offset(rightMenuList).top + rightMenuList.offsetHeight;
-
-        if (rightMenuBottom >= articleBottom) {
-          rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-          rightMenu.style.top = utilities.scrollTop() - mainHeaderHeight + "px";
-          rightMenu.classList.add("scrolling-absolute");
-          rightMenu.classList.remove("scrolling-fixed");
-        }
-      } else {
-        rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-        rightMenu.style.top =
-          articleBottom - mainHeaderHeight - rightMenuList.offsetHeight + "px";
-        rightMenu.classList.add("scrolling-absolute");
-      }
-
-      if (utilities.scrollTop() < articleBottom - rightMenuList.offsetHeight) {
-        rightMenuWrapper.style.height = "100%";
-        rightMenu.style.top = "";
-        rightMenu.classList.remove("scrolling-absolute");
-        rightMenu.classList.add("scrolling-fixed");
-      }
-    }
-
-    var rightMenuSideScroll = document.getElementById("pytorch-side-scroll-right");
-    var sideScrollFromWindowTop = rightMenuSideScroll.getBoundingClientRect().top;
-
-    rightMenuSideScroll.style.height = utilities.windowHeight() - sideScrollFromWindowTop + "px";
-  }
-};
-
-},{}],11:[function(require,module,exports){
-var jQuery = (typeof(window) != 'undefined') ? window.jQuery : require('jquery');
-
-// Sphinx theme nav state
-function ThemeNav () {
-
-    var nav = {
-        navBar: null,
-        win: null,
-        winScroll: false,
-        winResize: false,
-        linkScroll: false,
-        winPosition: 0,
-        winHeight: null,
-        docHeight: null,
-        isRunning: false
-    };
-
-    nav.enable = function (withStickyNav) {
-        var self = this;
-
-        // TODO this can likely be removed once the theme javascript is broken
-        // out from the RTD assets. This just ensures old projects that are
-        // calling `enable()` get the sticky menu on by default. All other cals
-        // to `enable` should include an argument for enabling the sticky menu.
-        if (typeof(withStickyNav) == 'undefined') {
-            withStickyNav = true;
-        }
-
-        if (self.isRunning) {
-            // Only allow enabling nav logic once
-            return;
-        }
-
-        self.isRunning = true;
-        jQuery(function ($) {
-            self.init($);
-
-            self.reset();
-            self.win.on('hashchange', self.reset);
-
-            if (withStickyNav) {
-                // Set scroll monitor
-                self.win.on('scroll', function () {
-                    if (!self.linkScroll) {
-                        if (!self.winScroll) {
-                            self.winScroll = true;
-                            requestAnimationFrame(function() { self.onScroll(); });
-                        }
-                    }
-                });
-            }
-
-            // Set resize monitor
-            self.win.on('resize', function () {
-                if (!self.winResize) {
-                    self.winResize = true;
-                    requestAnimationFrame(function() { self.onResize(); });
-                }
-            });
-
-            self.onResize();
-        });
-
-    };
-
-    // TODO remove this with a split in theme and Read the Docs JS logic as
-    // well, it's only here to support 0.3.0 installs of our theme.
-    nav.enableSticky = function() {
-        this.enable(true);
-    };
-
-    nav.init = function ($) {
-        var doc = $(document),
-            self = this;
-
-        this.navBar = $('div.pytorch-side-scroll:first');
-        this.win = $(window);
-
-        // Set up javascript UX bits
-        $(document)
-            // Shift nav in mobile when clicking the menu.
-            .on('click', "[data-toggle='pytorch-left-menu-nav-top']", function() {
-                $("[data-toggle='wy-nav-shift']").toggleClass("shift");
-                $("[data-toggle='rst-versions']").toggleClass("shift");
-            })
-
-            // Nav menu link click operations
-            .on('click', ".pytorch-menu-vertical .current ul li a", function() {
-                var target = $(this);
-                // Close menu when you click a link.
-                $("[data-toggle='wy-nav-shift']").removeClass("shift");
-                $("[data-toggle='rst-versions']").toggleClass("shift");
-                // Handle dynamic display of l3 and l4 nav lists
-                self.toggleCurrent(target);
-                self.hashChange();
-            })
-            .on('click', "[data-toggle='rst-current-version']", function() {
-                $("[data-toggle='rst-versions']").toggleClass("shift-up");
-            })
-
-        // Make tables responsive
-        $("table.docutils:not(.field-list,.footnote,.citation)")
-            .wrap("<div class='wy-table-responsive'></div>");
-
-        // Add extra class to responsive tables that contain
-        // footnotes or citations so that we can target them for styling
-        $("table.docutils.footnote")
-            .wrap("<div class='wy-table-responsive footnote'></div>");
-        $("table.docutils.citation")
-            .wrap("<div class='wy-table-responsive citation'></div>");
-
-        // Add expand links to all parents of nested ul
-        $('.pytorch-menu-vertical ul').not('.simple').siblings('a').each(function () {
-            var link = $(this);
-                expand = $('<span class="toctree-expand"></span>');
-            expand.on('click', function (ev) {
-                self.toggleCurrent(link);
-                ev.stopPropagation();
-                return false;
-            });
-            link.prepend(expand);
-        });
-    };
-
-    nav.reset = function () {
-        // Get anchor from URL and open up nested nav
-        var anchor = encodeURI(window.location.hash) || '#';
-
-        try {
-            var vmenu = $('.pytorch-menu-vertical');
-            var link = vmenu.find('[href="' + anchor + '"]');
-            if (link.length === 0) {
-                // this link was not found in the sidebar.
-                // Find associated id element, then its closest section
-                // in the document and try with that one.
-                var id_elt = $('.document [id="' + anchor.substring(1) + '"]');
-                var closest_section = id_elt.closest('div.section');
-                link = vmenu.find('[href="#' + closest_section.attr("id") + '"]');
-                if (link.length === 0) {
-                    // still not found in the sidebar. fall back to main section
-                    link = vmenu.find('[href="#"]');
-                }
-            }
-            // If we found a matching link then reset current and re-apply
-            // otherwise retain the existing match
-            if (link.length > 0) {
-                $('.pytorch-menu-vertical .current').removeClass('current');
-                link.addClass('current');
-                link.closest('li.toctree-l1').addClass('current');
-                link.closest('li.toctree-l1').parent().addClass('current');
-                link.closest('li.toctree-l1').addClass('current');
-                link.closest('li.toctree-l2').addClass('current');
-                link.closest('li.toctree-l3').addClass('current');
-                link.closest('li.toctree-l4').addClass('current');
-            }
-        }
-        catch (err) {
-            console.log("Error expanding nav for anchor", err);
-        }
-
-    };
-
-    nav.onScroll = function () {
-        this.winScroll = false;
-        var newWinPosition = this.win.scrollTop(),
-            winBottom = newWinPosition + this.winHeight,
-            navPosition = this.navBar.scrollTop(),
-            newNavPosition = navPosition + (newWinPosition - this.winPosition);
-        if (newWinPosition < 0 || winBottom > this.docHeight) {
-            return;
-        }
-        this.navBar.scrollTop(newNavPosition);
-        this.winPosition = newWinPosition;
-    };
-
-    nav.onResize = function () {
-        this.winResize = false;
-        this.winHeight = this.win.height();
-        this.docHeight = $(document).height();
-    };
-
-    nav.hashChange = function () {
-        this.linkScroll = true;
-        this.win.one('hashchange', function () {
-            this.linkScroll = false;
-        });
-    };
-
-    nav.toggleCurrent = function (elem) {
-        var parent_li = elem.closest('li');
-        parent_li.siblings('li.current').removeClass('current');
-        parent_li.siblings().find('li.current').removeClass('current');
-        parent_li.find('> ul li.current').removeClass('current');
-        parent_li.toggleClass('current');
-    }
-
-    return nav;
-};
-
-module.exports.ThemeNav = ThemeNav();
-
-if (typeof(window) != 'undefined') {
-    window.SphinxRtdTheme = {
-        Navigation: module.exports.ThemeNav,
-        // TODO remove this once static assets are split up between the theme
-        // and Read the Docs. For now, this patches 0.3.0 to be backwards
-        // compatible with a pre-0.3.0 layout.html
-        StickyNav: module.exports.ThemeNav,
-    };
-}
-
-
-// requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
-// https://gist.github.com/paulirish/1579671
-// MIT license
-
-(function() {
-    var lastTime = 0;
-    var vendors = ['ms', 'moz', 'webkit', 'o'];
-    for(var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) {
-        window.requestAnimationFrame = window[vendors[x]+'RequestAnimationFrame'];
-        window.cancelAnimationFrame = window[vendors[x]+'CancelAnimationFrame']
-                                   || window[vendors[x]+'CancelRequestAnimationFrame'];
-    }
-
-    if (!window.requestAnimationFrame)
-        window.requestAnimationFrame = function(callback, element) {
-            var currTime = new Date().getTime();
-            var timeToCall = Math.max(0, 16 - (currTime - lastTime));
-            var id = window.setTimeout(function() { callback(currTime + timeToCall); },
-              timeToCall);
-            lastTime = currTime + timeToCall;
-            return id;
-        };
-
-    if (!window.cancelAnimationFrame)
-        window.cancelAnimationFrame = function(id) {
-            clearTimeout(id);
-        };
-}());
-
-$(".sphx-glr-thumbcontainer").removeAttr("tooltip");
-$("table").removeAttr("border");
-
-// This code replaces the default sphinx gallery download buttons
-// with the 3 download buttons at the top of the page
-
-var downloadNote = $(".sphx-glr-download-link-note.admonition.note");
-if (downloadNote.length >= 1) {
-    var tutorialUrlArray = $("#tutorial-type").text().split('/');
-        tutorialUrlArray[0] = tutorialUrlArray[0] + "/sphinx-tutorials"
-
-    var githubLink = "https://github.com/pytorch/rl/blob/main/" + tutorialUrlArray.join("/") + ".py",
-        notebookLink = $(".reference.download")[1].href,
-        notebookDownloadPath = notebookLink.split('_downloads')[1],
-        colabLink = "https://colab.research.google.com/github/pytorch/rl/blob/gh-pages/_downloads" + notebookDownloadPath;
-
-    $("#google-colab-link").wrap("<a href=" + colabLink + " data-behavior='call-to-action-event' data-response='Run in Google Colab' target='_blank'/>");
-    $("#download-notebook-link").wrap("<a href=" + notebookLink + " data-behavior='call-to-action-event' data-response='Download Notebook'/>");
-    $("#github-view-link").wrap("<a href=" + githubLink + " data-behavior='call-to-action-event' data-response='View on Github' target='_blank'/>");
-} else {
-    $(".pytorch-call-to-action-links").hide();
-}
-
-//This code handles the Expand/Hide toggle for the Docs/Tutorials left nav items
-
-$(document).ready(function() {
-  var caption = "#pytorch-left-menu p.caption";
-  var collapseAdded = $(this).not("checked");
-  $(caption).each(function () {
-    var menuName = this.innerText.replace(/[^\w\s]/gi, "").trim();
-    $(this).find("span").addClass("checked");
-    if (collapsedSections.includes(menuName) == true && collapseAdded && sessionStorage.getItem(menuName) !== "expand" || sessionStorage.getItem(menuName) == "collapse") {
-      $(this.firstChild).after("<span class='expand-menu'>[ + ]</span>");
-      $(this.firstChild).after("<span class='hide-menu collapse'>[ - ]</span>");
-      $(this).next("ul").hide();
-    } else if (collapsedSections.includes(menuName) == false && collapseAdded || sessionStorage.getItem(menuName) == "expand") {
-      $(this.firstChild).after("<span class='expand-menu collapse'>[ + ]</span>");
-      $(this.firstChild).after("<span class='hide-menu'>[ - ]</span>");
-    }
-  });
-
-  $(".expand-menu").on("click", function () {
-    $(this).prev(".hide-menu").toggle();
-    $(this).parent().next("ul").toggle();
-    var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-    if (sessionStorage.getItem(menuName) == "collapse") {
-      sessionStorage.removeItem(menuName);
-    }
-    sessionStorage.setItem(menuName, "expand");
-    toggleList(this);
-  });
-
-  $(".hide-menu").on("click", function () {
-    $(this).next(".expand-menu").toggle();
-    $(this).parent().next("ul").toggle();
-    var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-    if (sessionStorage.getItem(menuName) == "expand") {
-      sessionStorage.removeItem(menuName);
-    }
-    sessionStorage.setItem(menuName, "collapse");
-    toggleList(this);
-  });
-
-  function toggleList(menuCommand) {
-    $(menuCommand).toggle();
-  }
-});
-
-// Build an array from each tag that's present
-
-var tagList = $(".tutorials-card-container").map(function() {
-    return $(this).data("tags").split(",").map(function(item) {
-        return item.trim();
-      });
-}).get();
-
-function unique(value, index, self) {
-      return self.indexOf(value) == index && value != ""
-    }
-
-// Only return unique tags
-
-var tags = tagList.sort().filter(unique);
-
-// Add filter buttons to the top of the page for each tag
-
-function createTagMenu() {
-    tags.forEach(function(item){
-    $(".tutorial-filter-menu").append(" <div class='tutorial-filter filter-btn filter' data-tag='" + item + "'>" + item + "</div>")
-  })
-};
-
-createTagMenu();
-
-// Remove hyphens if they are present in the filter buttons
-
-$(".tags").each(function(){
-    var tags = $(this).text().split(",");
-    tags.forEach(function(tag, i ) {
-       tags[i] = tags[i].replace(/-/, ' ')
-    })
-    $(this).html(tags.join(", "));
-});
-
-// Remove hyphens if they are present in the card body
-
-$(".tutorial-filter").each(function(){
-    var tag = $(this).text();
-    $(this).html(tag.replace(/-/, ' '))
-})
-
-// Remove any empty p tags that Sphinx adds
-
-$("#tutorial-cards p").each(function(index, item) {
-    if(!$(item).text().trim()) {
-        $(item).remove();
-    }
-});
-
-// Jump back to top on pagination click
-
-$(document).on("click", ".page", function() {
-    $('html, body').animate(
-      {scrollTop: $("#dropdown-filter-tags").position().top},
-      'slow'
-    );
-});
-
-var link = $("a[href='intermediate/speech_command_recognition_with_torchaudio.html']");
-
-if (link.text() == "SyntaxError") {
-    console.log("There is an issue with the intermediate/speech_command_recognition_with_torchaudio.html menu item.");
-    link.text("Speech Command Recognition with torchaudio");
-}
-
-$(".stars-outer > i").hover(function() {
-    $(this).prevAll().addBack().toggleClass("fas star-fill");
-});
-
-$(".stars-outer > i").on("click", function() {
-    $(this).prevAll().each(function() {
-        $(this).addBack().addClass("fas star-fill");
-    });
-
-    $(".stars-outer > i").each(function() {
-        $(this).unbind("mouseenter mouseleave").css({
-            "pointer-events": "none"
-        });
-    });
-})
-
-$("#pytorch-side-scroll-right li a").on("click", function (e) {
-  var href = $(this).attr("href");
-  $('html, body').stop().animate({
-    scrollTop: $(href).offset().top - 100
-  }, 850);
-  e.preventDefault;
-});
-
-var lastId,
-  topMenu = $("#pytorch-side-scroll-right"),
-  topMenuHeight = topMenu.outerHeight() + 1,
-  // All sidenav items
-  menuItems = topMenu.find("a"),
-  // Anchors for menu items
-  scrollItems = menuItems.map(function () {
-    var item = $(this).attr("href");
-    if (item.length) {
-      return item;
-    }
-  });
-
-$(window).scroll(function () {
-  var fromTop = $(this).scrollTop() + topMenuHeight;
-  var article = ".section";
-
-  $(article).each(function (i) {
-    var offsetScroll = $(this).offset().top - $(window).scrollTop();
-    if (
-      offsetScroll <= topMenuHeight + 200 &&
-      offsetScroll >= topMenuHeight - 200 &&
-      scrollItems[i] == "#" + $(this).attr("id") &&
-      $(".hidden:visible")
-    ) {
-      $(menuItems).removeClass("side-scroll-highlight");
-      $(menuItems[i]).addClass("side-scroll-highlight");
-    }
-  });
-});
-
-
-},{"jquery":"jquery"}],"pytorch-sphinx-theme":[function(require,module,exports){
-require=(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
-window.utilities = {
-  scrollTop: function() {
-    var supportPageOffset = window.pageXOffset !== undefined;
-    var isCSS1Compat = ((document.compatMode || "") === "CSS1Compat");
-    var scrollLeft = supportPageOffset ? window.pageXOffset : isCSS1Compat ? document.documentElement.scrollLeft : document.body.scrollLeft;
-    return supportPageOffset ? window.pageYOffset : isCSS1Compat ? document.documentElement.scrollTop : document.body.scrollTop;
-  },
-
-  // Modified from https://stackoverflow.com/a/27078401
-  throttle: function(func, wait, options) {
-    var context, args, result;
-    var timeout = null;
-    var previous = 0;
-    if (!options) options = {};
-    var later = function() {
-      previous = options.leading === false ? 0 : Date.now();
-      timeout = null;
-      result = func.apply(context, args);
-      if (!timeout) context = args = null;
-    };
-    return function() {
-      var now = Date.now();
-      if (!previous && options.leading === false) previous = now;
-      var remaining = wait - (now - previous);
-      context = this;
-      args = arguments;
-      if (remaining <= 0 || remaining > wait) {
-        if (timeout) {
-          clearTimeout(timeout);
-          timeout = null;
-        }
-        previous = now;
-        result = func.apply(context, args);
-        if (!timeout) context = args = null;
-      } else if (!timeout && options.trailing !== false) {
-        timeout = setTimeout(later, remaining);
-      }
-      return result;
-    };
-  },
-
-  closest: function (el, selector) {
-    var matchesFn;
-
-    // find vendor prefix
-    ['matches','webkitMatchesSelector','mozMatchesSelector','msMatchesSelector','oMatchesSelector'].some(function(fn) {
-      if (typeof document.body[fn] == 'function') {
-        matchesFn = fn;
-        return true;
-      }
-      return false;
-    });
-
-    var parent;
-
-    // traverse parents
-    while (el) {
-      parent = el.parentElement;
-      if (parent && parent[matchesFn](selector)) {
-        return parent;
-      }
-      el = parent;
-    }
-
-    return null;
-  },
-
-  // Modified from https://stackoverflow.com/a/18953277
-  offset: function(elem) {
-    if (!elem) {
-      return;
-    }
-
-    rect = elem.getBoundingClientRect();
-
-    // Make sure element is not hidden (display: none) or disconnected
-    if (rect.width || rect.height || elem.getClientRects().length) {
-      var doc = elem.ownerDocument;
-      var docElem = doc.documentElement;
-
-      return {
-        top: rect.top + window.pageYOffset - docElem.clientTop,
-        left: rect.left + window.pageXOffset - docElem.clientLeft
-      };
-    }
-  },
-
-  headersHeight: function() {
-    if (document.getElementById("pytorch-left-menu").classList.contains("make-fixed")) {
-      return document.getElementById("pytorch-page-level-bar").offsetHeight;
-    } else {
-      return document.getElementById("header-holder").offsetHeight +
-             document.getElementById("pytorch-page-level-bar").offsetHeight;
-    }
-  },
-
-  windowHeight: function() {
-    return window.innerHeight ||
-           document.documentElement.clientHeight ||
-           document.body.clientHeight;
-  }
-}
-
-},{}],2:[function(require,module,exports){
-var cookieBanner = {
-  init: function() {
-    cookieBanner.bind();
-
-    var cookieExists = cookieBanner.cookieExists();
-
-    if (!cookieExists) {
-      cookieBanner.setCookie();
-      cookieBanner.showCookieNotice();
-    }
-  },
-
-  bind: function() {
-    $(".close-button").on("click", cookieBanner.hideCookieNotice);
-  },
-
-  cookieExists: function() {
-    var cookie = localStorage.getItem("returningPytorchUser");
-
-    if (cookie) {
-      return true;
-    } else {
-      return false;
-    }
-  },
-
-  setCookie: function() {
-    localStorage.setItem("returningPytorchUser", true);
-  },
-
-  showCookieNotice: function() {
-    $(".cookie-banner-wrapper").addClass("is-visible");
-  },
-
-  hideCookieNotice: function() {
-    $(".cookie-banner-wrapper").removeClass("is-visible");
-  }
-};
-
-$(function() {
-  cookieBanner.init();
-});
-
-},{}],3:[function(require,module,exports){
-window.filterTags = {
-  bind: function() {
-    var options = {
-      valueNames: [{ data: ["tags"] }],
-      page: "6",
-      pagination: true
-    };
-
-    var tutorialList = new List("tutorial-cards", options);
-
-    function filterSelectedTags(cardTags, selectedTags) {
-      return cardTags.some(function(tag) {
-        return selectedTags.some(function(selectedTag) {
-          return selectedTag == tag;
-        });
-      });
-    }
-
-    function updateList() {
-      var selectedTags = [];
-
-      $(".selected").each(function() {
-        selectedTags.push($(this).data("tag"));
-      });
-
-      tutorialList.filter(function(item) {
-        var cardTags;
-
-        if (item.values().tags == null) {
-          cardTags = [""];
-        } else {
-          cardTags = item.values().tags.split(",");
-        }
-
-        if (selectedTags.length == 0) {
-          return true;
-        } else {
-          return filterSelectedTags(cardTags, selectedTags);
-        }
-      });
-    }
-
-    $(".filter-btn").on("click", function() {
-      if ($(this).data("tag") == "all") {
-        $(this).addClass("all-tag-selected");
-        $(".filter").removeClass("selected");
-      } else {
-        $(this).toggleClass("selected");
-        $("[data-tag='all']").removeClass("all-tag-selected");
-      }
-
-      // If no tags are selected then highlight the 'All' tag
-
-      if (!$(".selected")[0]) {
-        $("[data-tag='all']").addClass("all-tag-selected");
-      }
-
-      updateList();
-    });
-  }
-};
-
-},{}],4:[function(require,module,exports){
-// Modified from https://stackoverflow.com/a/32396543
-window.highlightNavigation = {
-  navigationListItems: document.querySelectorAll("#pytorch-right-menu li"),
-  sections: document.querySelectorAll(".pytorch-article .section"),
-  sectionIdTonavigationLink: {},
-
-  bind: function() {
-    if (!sideMenus.displayRightMenu) {
-      return;
-    };
-
-    for (var i = 0; i < highlightNavigation.sections.length; i++) {
-      var id = highlightNavigation.sections[i].id;
-      highlightNavigation.sectionIdTonavigationLink[id] =
-        document.querySelectorAll('#pytorch-right-menu li a[href="#' + id + '"]')[0];
-    }
-
-    $(window).scroll(utilities.throttle(highlightNavigation.highlight, 100));
-  },
-
-  highlight: function() {
-    var rightMenu = document.getElementById("pytorch-right-menu");
-
-    // If right menu is not on the screen don't bother
-    if (rightMenu.offsetWidth === 0 && rightMenu.offsetHeight === 0) {
-      return;
-    }
-
-    var scrollPosition = utilities.scrollTop();
-    var OFFSET_TOP_PADDING = 25;
-    var offset = document.getElementById("header-holder").offsetHeight +
-                 document.getElementById("pytorch-page-level-bar").offsetHeight +
-                 OFFSET_TOP_PADDING;
-
-    var sections = highlightNavigation.sections;
-
-    for (var i = (sections.length - 1); i >= 0; i--) {
-      var currentSection = sections[i];
-      var sectionTop = utilities.offset(currentSection).top;
-
-      if (scrollPosition >= sectionTop - offset) {
-        var navigationLink = highlightNavigation.sectionIdTonavigationLink[currentSection.id];
-        var navigationListItem = utilities.closest(navigationLink, "li");
-
-        if (navigationListItem && !navigationListItem.classList.contains("active")) {
-          for (var i = 0; i < highlightNavigation.navigationListItems.length; i++) {
-            var el = highlightNavigation.navigationListItems[i];
-            if (el.classList.contains("active")) {
-              el.classList.remove("active");
-            }
-          }
-
-          navigationListItem.classList.add("active");
-
-          // Scroll to active item. Not a requested feature but we could revive it. Needs work.
-
-          // var menuTop = $("#pytorch-right-menu").position().top;
-          // var itemTop = navigationListItem.getBoundingClientRect().top;
-          // var TOP_PADDING = 20
-          // var newActiveTop = $("#pytorch-side-scroll-right").scrollTop() + itemTop - menuTop - TOP_PADDING;
-
-          // $("#pytorch-side-scroll-right").animate({
-          //   scrollTop: newActiveTop
-          // }, 100);
-        }
-
-        break;
-      }
-    }
-  }
-};
-
-},{}],5:[function(require,module,exports){
-window.mainMenuDropdown = {
-  bind: function() {
-    $("[data-toggle='ecosystem-dropdown']").on("click", function() {
-      toggleDropdown($(this).attr("data-toggle"));
-    });
-
-    $("[data-toggle='resources-dropdown']").on("click", function() {
-      toggleDropdown($(this).attr("data-toggle"));
-    });
-
-    function toggleDropdown(menuToggle) {
-      var showMenuClass = "show-menu";
-      var menuClass = "." + menuToggle + "-menu";
-
-      if ($(menuClass).hasClass(showMenuClass)) {
-        $(menuClass).removeClass(showMenuClass);
-      } else {
-        $("[data-toggle=" + menuToggle + "].show-menu").removeClass(
-          showMenuClass
-        );
-        $(menuClass).addClass(showMenuClass);
-      }
-    }
-  }
-};
-
-},{}],6:[function(require,module,exports){
-window.mobileMenu = {
-  bind: function() {
-    $("[data-behavior='open-mobile-menu']").on('click', function(e) {
-      e.preventDefault();
-      $(".mobile-main-menu").addClass("open");
-      $("body").addClass('no-scroll');
-
-      mobileMenu.listenForResize();
-    });
-
-    $("[data-behavior='close-mobile-menu']").on('click', function(e) {
-      e.preventDefault();
-      mobileMenu.close();
-    });
-  },
-
-  listenForResize: function() {
-    $(window).on('resize.ForMobileMenu', function() {
-      if ($(this).width() > 768) {
-        mobileMenu.close();
-      }
-    });
-  },
-
-  close: function() {
-    $(".mobile-main-menu").removeClass("open");
-    $("body").removeClass('no-scroll');
-    $(window).off('resize.ForMobileMenu');
-  }
-};
-
-},{}],7:[function(require,module,exports){
-window.mobileTOC = {
-  bind: function() {
-    $("[data-behavior='toggle-table-of-contents']").on("click", function(e) {
-      e.preventDefault();
-
-      var $parent = $(this).parent();
-
-      if ($parent.hasClass("is-open")) {
-        $parent.removeClass("is-open");
-        $(".pytorch-left-menu").slideUp(200, function() {
-          $(this).css({display: ""});
-        });
-      } else {
-        $parent.addClass("is-open");
-        $(".pytorch-left-menu").slideDown(200);
-      }
-    });
-  }
-}
-
-},{}],8:[function(require,module,exports){
-window.pytorchAnchors = {
-  bind: function() {
-    // Replace Sphinx-generated anchors with anchorjs ones
-    $(".headerlink").text("");
-
-    window.anchors.add(".pytorch-article .headerlink");
-
-    $(".anchorjs-link").each(function() {
-      var $headerLink = $(this).closest(".headerlink");
-      var href = $headerLink.attr("href");
-      var clone = this.outerHTML;
-
-      $clone = $(clone).attr("href", href);
-      $headerLink.before($clone);
-      $headerLink.remove();
-    });
-  }
-};
-
-},{}],9:[function(require,module,exports){
-// Modified from https://stackoverflow.com/a/13067009
-// Going for a JS solution to scrolling to an anchor so we can benefit from
-// less hacky css and smooth scrolling.
-
-window.scrollToAnchor = {
-  bind: function() {
-    var document = window.document;
-    var history = window.history;
-    var location = window.location
-    var HISTORY_SUPPORT = !!(history && history.pushState);
-
-    var anchorScrolls = {
-      ANCHOR_REGEX: /^#[^ ]+$/,
-      offsetHeightPx: function() {
-        var OFFSET_HEIGHT_PADDING = 20;
-        // TODO: this is a little janky. We should try to not rely on JS for this
-        return utilities.headersHeight() + OFFSET_HEIGHT_PADDING;
-      },
-
-      /**
-       * Establish events, and fix initial scroll position if a hash is provided.
-       */
-      init: function() {
-        this.scrollToCurrent();
-        // This interferes with clicks below it, causing a double fire
-        // $(window).on('hashchange', $.proxy(this, 'scrollToCurrent'));
-        $('body').on('click', 'a', $.proxy(this, 'delegateAnchors'));
-        $('body').on('click', '#pytorch-right-menu li span', $.proxy(this, 'delegateSpans'));
-      },
-
-      /**
-       * Return the offset amount to deduct from the normal scroll position.
-       * Modify as appropriate to allow for dynamic calculations
-       */
-      getFixedOffset: function() {
-        return this.offsetHeightPx();
-      },
-
-      /**
-       * If the provided href is an anchor which resolves to an element on the
-       * page, scroll to it.
-       * @param  {String} href
-       * @return {Boolean} - Was the href an anchor.
-       */
-      scrollIfAnchor: function(href, pushToHistory) {
-        var match, anchorOffset;
-
-        if(!this.ANCHOR_REGEX.test(href)) {
-          return false;
-        }
-
-        match = document.getElementById(href.slice(1));
-
-        if(match) {
-          var anchorOffset = $(match).offset().top - this.getFixedOffset();
-
-          $('html, body').scrollTop(anchorOffset);
-
-          // Add the state to history as-per normal anchor links
-          if(HISTORY_SUPPORT && pushToHistory) {
-            history.pushState({}, document.title, location.pathname + href);
-          }
-        }
-
-        return !!match;
-      },
-
-      /**
-       * Attempt to scroll to the current location's hash.
-       */
-      scrollToCurrent: function(e) {
-        if(this.scrollIfAnchor(window.location.hash) && e) {
-          e.preventDefault();
-        }
-      },
-
-      delegateSpans: function(e) {
-        var elem = utilities.closest(e.target, "a");
-
-        if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-          e.preventDefault();
-        }
-      },
-
-      /**
-       * If the click event's target was an anchor, fix the scroll position.
-       */
-      delegateAnchors: function(e) {
-        var elem = e.target;
-
-        if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-          e.preventDefault();
-        }
-      }
-    };
-
-    $(document).ready($.proxy(anchorScrolls, 'init'));
-  }
-};
-
-},{}],10:[function(require,module,exports){
-window.sideMenus = {
-  rightMenuIsOnScreen: function() {
-    return document.getElementById("pytorch-content-right").offsetParent !== null;
-  },
-
-  isFixedToBottom: false,
-
-  bind: function() {
-    sideMenus.handleLeftMenu();
-
-    var rightMenuLinks = document.querySelectorAll("#pytorch-right-menu li");
-    var rightMenuHasLinks = rightMenuLinks.length > 1;
-
-    if (!rightMenuHasLinks) {
-      for (var i = 0; i < rightMenuLinks.length; i++) {
-        rightMenuLinks[i].style.display = "none";
-      }
-    }
-
-    if (rightMenuHasLinks) {
-      // Don't show the Shortcuts menu title text unless there are menu items
-      document.getElementById("pytorch-shortcuts-wrapper").style.display = "block";
-
-      // We are hiding the titles of the pages in the right side menu but there are a few
-      // pages that include other pages in the right side menu (see 'torch.nn' in the docs)
-      // so if we exclude those it looks confusing. Here we add a 'title-link' class to these
-      // links so we can exclude them from normal right side menu link operations
-      var titleLinks = document.querySelectorAll(
-        "#pytorch-right-menu #pytorch-side-scroll-right \
-         > ul > li > a.reference.internal"
-      );
-
-      for (var i = 0; i < titleLinks.length; i++) {
-        var link = titleLinks[i];
-
-        link.classList.add("title-link");
-
-        if (
-          link.nextElementSibling &&
-          link.nextElementSibling.tagName === "UL" &&
-          link.nextElementSibling.children.length > 0
-        ) {
-          link.classList.add("has-children");
-        }
-      }
-
-      // Add + expansion signifiers to normal right menu links that have sub menus
-      var menuLinks = document.querySelectorAll(
-        "#pytorch-right-menu ul li ul li a.reference.internal"
-      );
-
-      for (var i = 0; i < menuLinks.length; i++) {
-        if (
-          menuLinks[i].nextElementSibling &&
-          menuLinks[i].nextElementSibling.tagName === "UL"
-        ) {
-          menuLinks[i].classList.add("not-expanded");
-        }
-      }
-
-      // If a hash is present on page load recursively expand menu items leading to selected item
-      var linkWithHash =
-        document.querySelector(
-          "#pytorch-right-menu a[href=\"" + window.location.hash + "\"]"
-        );
-
-      if (linkWithHash) {
-        // Expand immediate sibling list if present
-        if (
-          linkWithHash.nextElementSibling &&
-          linkWithHash.nextElementSibling.tagName === "UL" &&
-          linkWithHash.nextElementSibling.children.length > 0
-        ) {
-          linkWithHash.nextElementSibling.style.display = "block";
-          linkWithHash.classList.add("expanded");
-        }
-
-        // Expand ancestor lists if any
-        sideMenus.expandClosestUnexpandedParentList(linkWithHash);
-      }
-
-      // Bind click events on right menu links
-      $("#pytorch-right-menu a.reference.internal").on("click", function() {
-        if (this.classList.contains("expanded")) {
-          this.nextElementSibling.style.display = "none";
-          this.classList.remove("expanded");
-          this.classList.add("not-expanded");
-        } else if (this.classList.contains("not-expanded")) {
-          this.nextElementSibling.style.display = "block";
-          this.classList.remove("not-expanded");
-          this.classList.add("expanded");
-        }
-      });
-
-      sideMenus.handleRightMenu();
-    }
-
-    $(window).on('resize scroll', function(e) {
-      sideMenus.handleNavBar();
-
-      sideMenus.handleLeftMenu();
-
-      if (sideMenus.rightMenuIsOnScreen()) {
-        sideMenus.handleRightMenu();
-      }
-    });
-  },
-
-  leftMenuIsFixed: function() {
-    return document.getElementById("pytorch-left-menu").classList.contains("make-fixed");
-  },
-
-  handleNavBar: function() {
-    var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-    // If we are scrolled past the main navigation header fix the sub menu bar to top of page
-    if (utilities.scrollTop() >= mainHeaderHeight) {
-      document.getElementById("pytorch-left-menu").classList.add("make-fixed");
-      document.getElementById("pytorch-page-level-bar").classList.add("left-menu-is-fixed");
-    } else {
-      document.getElementById("pytorch-left-menu").classList.remove("make-fixed");
-      document.getElementById("pytorch-page-level-bar").classList.remove("left-menu-is-fixed");
-    }
-  },
-
-  expandClosestUnexpandedParentList: function (el) {
-    var closestParentList = utilities.closest(el, "ul");
-
-    if (closestParentList) {
-      var closestParentLink = closestParentList.previousElementSibling;
-      var closestParentLinkExists = closestParentLink &&
-                                    closestParentLink.tagName === "A" &&
-                                    closestParentLink.classList.contains("reference");
-
-      if (closestParentLinkExists) {
-        // Don't add expansion class to any title links
-         if (closestParentLink.classList.contains("title-link")) {
-           return;
-         }
-
-        closestParentList.style.display = "block";
-        closestParentLink.classList.remove("not-expanded");
-        closestParentLink.classList.add("expanded");
-        sideMenus.expandClosestUnexpandedParentList(closestParentLink);
-      }
-    }
-  },
-
-  handleLeftMenu: function () {
-    var windowHeight = utilities.windowHeight();
-    var topOfFooterRelativeToWindow = document.getElementById("docs-tutorials-resources").getBoundingClientRect().top;
-
-    if (topOfFooterRelativeToWindow >= windowHeight) {
-      document.getElementById("pytorch-left-menu").style.height = "100%";
-    } else {
-      var howManyPixelsOfTheFooterAreInTheWindow = windowHeight - topOfFooterRelativeToWindow;
-      var leftMenuDifference = howManyPixelsOfTheFooterAreInTheWindow;
-      document.getElementById("pytorch-left-menu").style.height = (windowHeight - leftMenuDifference) + "px";
-    }
-  },
-
-  handleRightMenu: function() {
-    var rightMenuWrapper = document.getElementById("pytorch-content-right");
-    var rightMenu = document.getElementById("pytorch-right-menu");
-    var rightMenuList = rightMenu.getElementsByTagName("ul")[0];
-    var article = document.getElementById("pytorch-article");
-    var articleHeight = article.offsetHeight;
-    var articleBottom = utilities.offset(article).top + articleHeight;
-    var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-    if (utilities.scrollTop() < mainHeaderHeight) {
-      rightMenuWrapper.style.height = "100%";
-      rightMenu.style.top = 0;
-      rightMenu.classList.remove("scrolling-fixed");
-      rightMenu.classList.remove("scrolling-absolute");
-    } else {
-      if (rightMenu.classList.contains("scrolling-fixed")) {
-        var rightMenuBottom =
-          utilities.offset(rightMenuList).top + rightMenuList.offsetHeight;
-
-        if (rightMenuBottom >= articleBottom) {
-          rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-          rightMenu.style.top = utilities.scrollTop() - mainHeaderHeight + "px";
-          rightMenu.classList.add("scrolling-absolute");
-          rightMenu.classList.remove("scrolling-fixed");
-        }
-      } else {
-        rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-        rightMenu.style.top =
-          articleBottom - mainHeaderHeight - rightMenuList.offsetHeight + "px";
-        rightMenu.classList.add("scrolling-absolute");
-      }
-
-      if (utilities.scrollTop() < articleBottom - rightMenuList.offsetHeight) {
-        rightMenuWrapper.style.height = "100%";
-        rightMenu.style.top = "";
-        rightMenu.classList.remove("scrolling-absolute");
-        rightMenu.classList.add("scrolling-fixed");
-      }
-    }
-
-    var rightMenuSideScroll = document.getElementById("pytorch-side-scroll-right");
-    var sideScrollFromWindowTop = rightMenuSideScroll.getBoundingClientRect().top;
-
-    rightMenuSideScroll.style.height = utilities.windowHeight() - sideScrollFromWindowTop + "px";
-  }
-};
-
-},{}],11:[function(require,module,exports){
-var jQuery = (typeof(window) != 'undefined') ? window.jQuery : require('jquery');
-
-// Sphinx theme nav state
-function ThemeNav () {
-
-    var nav = {
-        navBar: null,
-        win: null,
-        winScroll: false,
-        winResize: false,
-        linkScroll: false,
-        winPosition: 0,
-        winHeight: null,
-        docHeight: null,
-        isRunning: false
-    };
-
-    nav.enable = function (withStickyNav) {
-        var self = this;
-
-        // TODO this can likely be removed once the theme javascript is broken
-        // out from the RTD assets. This just ensures old projects that are
-        // calling `enable()` get the sticky menu on by default. All other cals
-        // to `enable` should include an argument for enabling the sticky menu.
-        if (typeof(withStickyNav) == 'undefined') {
-            withStickyNav = true;
-        }
-
-        if (self.isRunning) {
-            // Only allow enabling nav logic once
-            return;
-        }
-
-        self.isRunning = true;
-        jQuery(function ($) {
-            self.init($);
-
-            self.reset();
-            self.win.on('hashchange', self.reset);
-
-            if (withStickyNav) {
-                // Set scroll monitor
-                self.win.on('scroll', function () {
-                    if (!self.linkScroll) {
-                        if (!self.winScroll) {
-                            self.winScroll = true;
-                            requestAnimationFrame(function() { self.onScroll(); });
-                        }
-                    }
-                });
-            }
-
-            // Set resize monitor
-            self.win.on('resize', function () {
-                if (!self.winResize) {
-                    self.winResize = true;
-                    requestAnimationFrame(function() { self.onResize(); });
-                }
-            });
-
-            self.onResize();
-        });
-
-    };
-
-    // TODO remove this with a split in theme and Read the Docs JS logic as
-    // well, it's only here to support 0.3.0 installs of our theme.
-    nav.enableSticky = function() {
-        this.enable(true);
-    };
-
-    nav.init = function ($) {
-        var doc = $(document),
-            self = this;
-
-        this.navBar = $('div.pytorch-side-scroll:first');
-        this.win = $(window);
-
-        // Set up javascript UX bits
-        $(document)
-            // Shift nav in mobile when clicking the menu.
-            .on('click', "[data-toggle='pytorch-left-menu-nav-top']", function() {
-                $("[data-toggle='wy-nav-shift']").toggleClass("shift");
-                $("[data-toggle='rst-versions']").toggleClass("shift");
-            })
-
-            // Nav menu link click operations
-            .on('click', ".pytorch-menu-vertical .current ul li a", function() {
-                var target = $(this);
-                // Close menu when you click a link.
-                $("[data-toggle='wy-nav-shift']").removeClass("shift");
-                $("[data-toggle='rst-versions']").toggleClass("shift");
-                // Handle dynamic display of l3 and l4 nav lists
-                self.toggleCurrent(target);
-                self.hashChange();
-            })
-            .on('click', "[data-toggle='rst-current-version']", function() {
-                $("[data-toggle='rst-versions']").toggleClass("shift-up");
-            })
-
-        // Make tables responsive
-        $("table.docutils:not(.field-list,.footnote,.citation)")
-            .wrap("<div class='wy-table-responsive'></div>");
-
-        // Add extra class to responsive tables that contain
-        // footnotes or citations so that we can target them for styling
-        $("table.docutils.footnote")
-            .wrap("<div class='wy-table-responsive footnote'></div>");
-        $("table.docutils.citation")
-            .wrap("<div class='wy-table-responsive citation'></div>");
-
-        // Add expand links to all parents of nested ul
-        $('.pytorch-menu-vertical ul').not('.simple').siblings('a').each(function () {
-            var link = $(this);
-                expand = $('<span class="toctree-expand"></span>');
-            expand.on('click', function (ev) {
-                self.toggleCurrent(link);
-                ev.stopPropagation();
-                return false;
-            });
-            link.prepend(expand);
-        });
-    };
-
-    nav.reset = function () {
-        // Get anchor from URL and open up nested nav
-        var anchor = encodeURI(window.location.hash) || '#';
-
-        try {
-            var vmenu = $('.pytorch-menu-vertical');
-            var link = vmenu.find('[href="' + anchor + '"]');
-            if (link.length === 0) {
-                // this link was not found in the sidebar.
-                // Find associated id element, then its closest section
-                // in the document and try with that one.
-                var id_elt = $('.document [id="' + anchor.substring(1) + '"]');
-                var closest_section = id_elt.closest('div.section');
-                link = vmenu.find('[href="#' + closest_section.attr("id") + '"]');
-                if (link.length === 0) {
-                    // still not found in the sidebar. fall back to main section
-                    link = vmenu.find('[href="#"]');
-                }
-            }
-            // If we found a matching link then reset current and re-apply
-            // otherwise retain the existing match
-            if (link.length > 0) {
-                $('.pytorch-menu-vertical .current').removeClass('current');
-                link.addClass('current');
-                link.closest('li.toctree-l1').addClass('current');
-                link.closest('li.toctree-l1').parent().addClass('current');
-                link.closest('li.toctree-l1').addClass('current');
-                link.closest('li.toctree-l2').addClass('current');
-                link.closest('li.toctree-l3').addClass('current');
-                link.closest('li.toctree-l4').addClass('current');
-            }
-        }
-        catch (err) {
-            console.log("Error expanding nav for anchor", err);
-        }
-
-    };
-
-    nav.onScroll = function () {
-        this.winScroll = false;
-        var newWinPosition = this.win.scrollTop(),
-            winBottom = newWinPosition + this.winHeight,
-            navPosition = this.navBar.scrollTop(),
-            newNavPosition = navPosition + (newWinPosition - this.winPosition);
-        if (newWinPosition < 0 || winBottom > this.docHeight) {
-            return;
-        }
-        this.navBar.scrollTop(newNavPosition);
-        this.winPosition = newWinPosition;
-    };
-
-    nav.onResize = function () {
-        this.winResize = false;
-        this.winHeight = this.win.height();
-        this.docHeight = $(document).height();
-    };
-
-    nav.hashChange = function () {
-        this.linkScroll = true;
-        this.win.one('hashchange', function () {
-            this.linkScroll = false;
-        });
-    };
-
-    nav.toggleCurrent = function (elem) {
-        var parent_li = elem.closest('li');
-        parent_li.siblings('li.current').removeClass('current');
-        parent_li.siblings().find('li.current').removeClass('current');
-        parent_li.find('> ul li.current').removeClass('current');
-        parent_li.toggleClass('current');
-    }
-
-    return nav;
-};
-
-module.exports.ThemeNav = ThemeNav();
-
-if (typeof(window) != 'undefined') {
-    window.SphinxRtdTheme = {
-        Navigation: module.exports.ThemeNav,
-        // TODO remove this once static assets are split up between the theme
-        // and Read the Docs. For now, this patches 0.3.0 to be backwards
-        // compatible with a pre-0.3.0 layout.html
-        StickyNav: module.exports.ThemeNav,
-    };
-}
-
-
-// requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
-// https://gist.github.com/paulirish/1579671
-// MIT license
-
-(function() {
-    var lastTime = 0;
-    var vendors = ['ms', 'moz', 'webkit', 'o'];
-    for(var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) {
-        window.requestAnimationFrame = window[vendors[x]+'RequestAnimationFrame'];
-        window.cancelAnimationFrame = window[vendors[x]+'CancelAnimationFrame']
-                                   || window[vendors[x]+'CancelRequestAnimationFrame'];
-    }
-
-    if (!window.requestAnimationFrame)
-        window.requestAnimationFrame = function(callback, element) {
-            var currTime = new Date().getTime();
-            var timeToCall = Math.max(0, 16 - (currTime - lastTime));
-            var id = window.setTimeout(function() { callback(currTime + timeToCall); },
-              timeToCall);
-            lastTime = currTime + timeToCall;
-            return id;
-        };
-
-    if (!window.cancelAnimationFrame)
-        window.cancelAnimationFrame = function(id) {
-            clearTimeout(id);
-        };
-}());
-
-$(".sphx-glr-thumbcontainer").removeAttr("tooltip");
-$("table").removeAttr("border");
-
-// This code replaces the default sphinx gallery download buttons
-// with the 3 download buttons at the top of the page
-
-var downloadNote = $(".sphx-glr-download-link-note.admonition.note");
-if (downloadNote.length >= 1) {
-    var tutorialUrlArray = $("#tutorial-type").text().split('/');
-        tutorialUrlArray[0] = tutorialUrlArray[0] + "/sphinx-tutorials"
-
-    var githubLink = "https://github.com/pytorch/rl/blob/main/" + tutorialUrlArray.join("/") + ".py",
-        notebookLink = $(".reference.download")[1].href,
-        notebookDownloadPath = notebookLink.split('_downloads')[1],
-        colabLink = "https://colab.research.google.com/github/pytorch/rl/blob/gh-pages/_downloads" + notebookDownloadPath;
-
-    $("#google-colab-link").wrap("<a href=" + colabLink + " data-behavior='call-to-action-event' data-response='Run in Google Colab' target='_blank'/>");
-    $("#download-notebook-link").wrap("<a href=" + notebookLink + " data-behavior='call-to-action-event' data-response='Download Notebook'/>");
-    $("#github-view-link").wrap("<a href=" + githubLink + " data-behavior='call-to-action-event' data-response='View on Github' target='_blank'/>");
-} else {
-    $(".pytorch-call-to-action-links").hide();
-}
-
-//This code handles the Expand/Hide toggle for the Docs/Tutorials left nav items
-
-$(document).ready(function() {
-  var caption = "#pytorch-left-menu p.caption";
-  var collapseAdded = $(this).not("checked");
-  $(caption).each(function () {
-    var menuName = this.innerText.replace(/[^\w\s]/gi, "").trim();
-    $(this).find("span").addClass("checked");
-    if (collapsedSections.includes(menuName) == true && collapseAdded && sessionStorage.getItem(menuName) !== "expand" || sessionStorage.getItem(menuName) == "collapse") {
-      $(this.firstChild).after("<span class='expand-menu'>[ + ]</span>");
-      $(this.firstChild).after("<span class='hide-menu collapse'>[ - ]</span>");
-      $(this).next("ul").hide();
-    } else if (collapsedSections.includes(menuName) == false && collapseAdded || sessionStorage.getItem(menuName) == "expand") {
-      $(this.firstChild).after("<span class='expand-menu collapse'>[ + ]</span>");
-      $(this.firstChild).after("<span class='hide-menu'>[ - ]</span>");
-    }
-  });
-
-  $(".expand-menu").on("click", function () {
-    $(this).prev(".hide-menu").toggle();
-    $(this).parent().next("ul").toggle();
-    var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-    if (sessionStorage.getItem(menuName) == "collapse") {
-      sessionStorage.removeItem(menuName);
-    }
-    sessionStorage.setItem(menuName, "expand");
-    toggleList(this);
-  });
-
-  $(".hide-menu").on("click", function () {
-    $(this).next(".expand-menu").toggle();
-    $(this).parent().next("ul").toggle();
-    var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-    if (sessionStorage.getItem(menuName) == "expand") {
-      sessionStorage.removeItem(menuName);
-    }
-    sessionStorage.setItem(menuName, "collapse");
-    toggleList(this);
-  });
-
-  function toggleList(menuCommand) {
-    $(menuCommand).toggle();
-  }
-});
-
-// Build an array from each tag that's present
-
-var tagList = $(".tutorials-card-container").map(function() {
-    return $(this).data("tags").split(",").map(function(item) {
-        return item.trim();
-      });
-}).get();
-
-function unique(value, index, self) {
-      return self.indexOf(value) == index && value != ""
-    }
-
-// Only return unique tags
-
-var tags = tagList.sort().filter(unique);
-
-// Add filter buttons to the top of the page for each tag
-
-function createTagMenu() {
-    tags.forEach(function(item){
-    $(".tutorial-filter-menu").append(" <div class='tutorial-filter filter-btn filter' data-tag='" + item + "'>" + item + "</div>")
-  })
-};
-
-createTagMenu();
-
-// Remove hyphens if they are present in the filter buttons
-
-$(".tags").each(function(){
-    var tags = $(this).text().split(",");
-    tags.forEach(function(tag, i ) {
-       tags[i] = tags[i].replace(/-/, ' ')
-    })
-    $(this).html(tags.join(", "));
-});
-
-// Remove hyphens if they are present in the card body
-
-$(".tutorial-filter").each(function(){
-    var tag = $(this).text();
-    $(this).html(tag.replace(/-/, ' '))
-})
-
-// Remove any empty p tags that Sphinx adds
-
-$("#tutorial-cards p").each(function(index, item) {
-    if(!$(item).text().trim()) {
-        $(item).remove();
-    }
-});
-
-// Jump back to top on pagination click
-
-$(document).on("click", ".page", function() {
-    $('html, body').animate(
-      {scrollTop: $("#dropdown-filter-tags").position().top},
-      'slow'
-    );
-});
-
-var link = $("a[href='intermediate/speech_command_recognition_with_torchaudio.html']");
-
-if (link.text() == "SyntaxError") {
-    console.log("There is an issue with the intermediate/speech_command_recognition_with_torchaudio.html menu item.");
-    link.text("Speech Command Recognition with torchaudio");
-}
-
-$(".stars-outer > i").hover(function() {
-    $(this).prevAll().addBack().toggleClass("fas star-fill");
-});
-
-$(".stars-outer > i").on("click", function() {
-    $(this).prevAll().each(function() {
-        $(this).addBack().addClass("fas star-fill");
-    });
-
-    $(".stars-outer > i").each(function() {
-        $(this).unbind("mouseenter mouseleave").css({
-            "pointer-events": "none"
-        });
-    });
-})
-
-$("#pytorch-side-scroll-right li a").on("click", function (e) {
-  var href = $(this).attr("href");
-  $('html, body').stop().animate({
-    scrollTop: $(href).offset().top - 100
-  }, 850);
-  e.preventDefault;
-});
-
-var lastId,
-  topMenu = $("#pytorch-side-scroll-right"),
-  topMenuHeight = topMenu.outerHeight() + 1,
-  // All sidenav items
-  menuItems = topMenu.find("a"),
-  // Anchors for menu items
-  scrollItems = menuItems.map(function () {
-    var item = $(this).attr("href");
-    if (item.length) {
-      return item;
-    }
-  });
-
-$(window).scroll(function () {
-  var fromTop = $(this).scrollTop() + topMenuHeight;
-  var article = ".section";
-
-  $(article).each(function (i) {
-    var offsetScroll = $(this).offset().top - $(window).scrollTop();
-    if (
-      offsetScroll <= topMenuHeight + 200 &&
-      offsetScroll >= topMenuHeight - 200 &&
-      scrollItems[i] == "#" + $(this).attr("id") &&
-      $(".hidden:visible")
-    ) {
-      $(menuItems).removeClass("side-scroll-highlight");
-      $(menuItems[i]).addClass("side-scroll-highlight");
-    }
-  });
-});
-
-
-},{"jquery":"jquery"}],"pytorch-sphinx-theme":[function(require,module,exports){
-require=(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
-  window.utilities = {
-    scrollTop: function() {
-      var supportPageOffset = window.pageXOffset !== undefined;
-      var isCSS1Compat = ((document.compatMode || "") === "CSS1Compat");
-      var scrollLeft = supportPageOffset ? window.pageXOffset : isCSS1Compat ? document.documentElement.scrollLeft : document.body.scrollLeft;
-      return supportPageOffset ? window.pageYOffset : isCSS1Compat ? document.documentElement.scrollTop : document.body.scrollTop;
-    },
-
-    // Modified from https://stackoverflow.com/a/27078401
-    throttle: function(func, wait, options) {
-      var context, args, result;
-      var timeout = null;
-      var previous = 0;
-      if (!options) options = {};
-      var later = function() {
-        previous = options.leading === false ? 0 : Date.now();
-        timeout = null;
-        result = func.apply(context, args);
-        if (!timeout) context = args = null;
-      };
-      return function() {
-        var now = Date.now();
-        if (!previous && options.leading === false) previous = now;
-        var remaining = wait - (now - previous);
-        context = this;
-        args = arguments;
-        if (remaining <= 0 || remaining > wait) {
-          if (timeout) {
-            clearTimeout(timeout);
-            timeout = null;
-          }
-          previous = now;
-          result = func.apply(context, args);
-          if (!timeout) context = args = null;
-        } else if (!timeout && options.trailing !== false) {
-          timeout = setTimeout(later, remaining);
-        }
-        return result;
-      };
-    },
-
-    closest: function (el, selector) {
-      var matchesFn;
-
-      // find vendor prefix
-      ['matches','webkitMatchesSelector','mozMatchesSelector','msMatchesSelector','oMatchesSelector'].some(function(fn) {
-        if (typeof document.body[fn] == 'function') {
-          matchesFn = fn;
-          return true;
-        }
-        return false;
-      });
-
-      var parent;
-
-      // traverse parents
-      while (el) {
-        parent = el.parentElement;
-        if (parent && parent[matchesFn](selector)) {
-          return parent;
-        }
-        el = parent;
-      }
-
-      return null;
-    },
-
-    // Modified from https://stackoverflow.com/a/18953277
-    offset: function(elem) {
-      if (!elem) {
-        return;
-      }
-
-      rect = elem.getBoundingClientRect();
-
-      // Make sure element is not hidden (display: none) or disconnected
-      if (rect.width || rect.height || elem.getClientRects().length) {
-        var doc = elem.ownerDocument;
-        var docElem = doc.documentElement;
-
-        return {
-          top: rect.top + window.pageYOffset - docElem.clientTop,
-          left: rect.left + window.pageXOffset - docElem.clientLeft
-        };
-      }
-    },
-
-    headersHeight: function() {
-      if (document.getElementById("pytorch-left-menu").classList.contains("make-fixed")) {
-        return document.getElementById("pytorch-page-level-bar").offsetHeight;
-      } else {
-        return document.getElementById("header-holder").offsetHeight +
-               document.getElementById("pytorch-page-level-bar").offsetHeight;
-      }
-    },
-
-    windowHeight: function() {
-      return window.innerHeight ||
-             document.documentElement.clientHeight ||
-             document.body.clientHeight;
-    }
-  }
-
-  },{}],2:[function(require,module,exports){
-  var cookieBanner = {
-    init: function() {
-      cookieBanner.bind();
-
-      var cookieExists = cookieBanner.cookieExists();
-
-      if (!cookieExists) {
-        cookieBanner.setCookie();
-        cookieBanner.showCookieNotice();
-      }
-    },
-
-    bind: function() {
-      $(".close-button").on("click", cookieBanner.hideCookieNotice);
-    },
-
-    cookieExists: function() {
-      var cookie = localStorage.getItem("returningPytorchUser");
-
-      if (cookie) {
-        return true;
-      } else {
-        return false;
-      }
-    },
-
-    setCookie: function() {
-      localStorage.setItem("returningPytorchUser", true);
-    },
-
-    showCookieNotice: function() {
-      $(".cookie-banner-wrapper").addClass("is-visible");
-    },
-
-    hideCookieNotice: function() {
-      $(".cookie-banner-wrapper").removeClass("is-visible");
-    }
-  };
-
-  $(function() {
-    cookieBanner.init();
-  });
-
-  },{}],3:[function(require,module,exports){
-  window.filterTags = {
-    bind: function() {
-      var options = {
-        valueNames: [{ data: ["tags"] }],
-        page: "6",
-        pagination: true
-      };
-
-      var tutorialList = new List("tutorial-cards", options);
-
-      function filterSelectedTags(cardTags, selectedTags) {
-        return cardTags.some(function(tag) {
-          return selectedTags.some(function(selectedTag) {
-            return selectedTag == tag;
-          });
-        });
-      }
-
-      function updateList() {
-        var selectedTags = [];
-
-        $(".selected").each(function() {
-          selectedTags.push($(this).data("tag"));
-        });
-
-        tutorialList.filter(function(item) {
-          var cardTags;
-
-          if (item.values().tags == null) {
-            cardTags = [""];
-          } else {
-            cardTags = item.values().tags.split(",");
-          }
-
-          if (selectedTags.length == 0) {
-            return true;
-          } else {
-            return filterSelectedTags(cardTags, selectedTags);
-          }
-        });
-      }
-
-      $(".filter-btn").on("click", function() {
-        if ($(this).data("tag") == "all") {
-          $(this).addClass("all-tag-selected");
-          $(".filter").removeClass("selected");
-        } else {
-          $(this).toggleClass("selected");
-          $("[data-tag='all']").removeClass("all-tag-selected");
-        }
-
-        // If no tags are selected then highlight the 'All' tag
-
-        if (!$(".selected")[0]) {
-          $("[data-tag='all']").addClass("all-tag-selected");
-        }
-
-        updateList();
-      });
-    }
-  };
-
-  },{}],4:[function(require,module,exports){
-  // Modified from https://stackoverflow.com/a/32396543
-  window.highlightNavigation = {
-    navigationListItems: document.querySelectorAll("#pytorch-right-menu li"),
-    sections: document.querySelectorAll(".pytorch-article .section"),
-    sectionIdTonavigationLink: {},
-
-    bind: function() {
-      if (!sideMenus.displayRightMenu) {
-        return;
-      };
-
-      for (var i = 0; i < highlightNavigation.sections.length; i++) {
-        var id = highlightNavigation.sections[i].id;
-        highlightNavigation.sectionIdTonavigationLink[id] =
-          document.querySelectorAll('#pytorch-right-menu li a[href="#' + id + '"]')[0];
-      }
-
-      $(window).scroll(utilities.throttle(highlightNavigation.highlight, 100));
-    },
-
-    highlight: function() {
-      var rightMenu = document.getElementById("pytorch-right-menu");
-
-      // If right menu is not on the screen don't bother
-      if (rightMenu.offsetWidth === 0 && rightMenu.offsetHeight === 0) {
-        return;
-      }
-
-      var scrollPosition = utilities.scrollTop();
-      var OFFSET_TOP_PADDING = 25;
-      var offset = document.getElementById("header-holder").offsetHeight +
-                   document.getElementById("pytorch-page-level-bar").offsetHeight +
-                   OFFSET_TOP_PADDING;
-
-      var sections = highlightNavigation.sections;
-
-      for (var i = (sections.length - 1); i >= 0; i--) {
-        var currentSection = sections[i];
-        var sectionTop = utilities.offset(currentSection).top;
-
-        if (scrollPosition >= sectionTop - offset) {
-          var navigationLink = highlightNavigation.sectionIdTonavigationLink[currentSection.id];
-          var navigationListItem = utilities.closest(navigationLink, "li");
-
-          if (navigationListItem && !navigationListItem.classList.contains("active")) {
-            for (var i = 0; i < highlightNavigation.navigationListItems.length; i++) {
-              var el = highlightNavigation.navigationListItems[i];
-              if (el.classList.contains("active")) {
-                el.classList.remove("active");
-              }
-            }
-
-            navigationListItem.classList.add("active");
-
-            // Scroll to active item. Not a requested feature but we could revive it. Needs work.
-
-            // var menuTop = $("#pytorch-right-menu").position().top;
-            // var itemTop = navigationListItem.getBoundingClientRect().top;
-            // var TOP_PADDING = 20
-            // var newActiveTop = $("#pytorch-side-scroll-right").scrollTop() + itemTop - menuTop - TOP_PADDING;
-
-            // $("#pytorch-side-scroll-right").animate({
-            //   scrollTop: newActiveTop
-            // }, 100);
-          }
-
-          break;
-        }
-      }
-    }
-  };
-
-  },{}],5:[function(require,module,exports){
-  window.mainMenuDropdown = {
-    bind: function() {
-      $("[data-toggle='ecosystem-dropdown']").on("click", function() {
-        toggleDropdown($(this).attr("data-toggle"));
-      });
-
-      $("[data-toggle='resources-dropdown']").on("click", function() {
-        toggleDropdown($(this).attr("data-toggle"));
-      });
-
-      function toggleDropdown(menuToggle) {
-        var showMenuClass = "show-menu";
-        var menuClass = "." + menuToggle + "-menu";
-
-        if ($(menuClass).hasClass(showMenuClass)) {
-          $(menuClass).removeClass(showMenuClass);
-        } else {
-          $("[data-toggle=" + menuToggle + "].show-menu").removeClass(
-            showMenuClass
-          );
-          $(menuClass).addClass(showMenuClass);
-        }
-      }
-    }
-  };
-
-  },{}],6:[function(require,module,exports){
-  window.mobileMenu = {
-    bind: function() {
-      $("[data-behavior='open-mobile-menu']").on('click', function(e) {
-        e.preventDefault();
-        $(".mobile-main-menu").addClass("open");
-        $("body").addClass('no-scroll');
-
-        mobileMenu.listenForResize();
-      });
-
-      $("[data-behavior='close-mobile-menu']").on('click', function(e) {
-        e.preventDefault();
-        mobileMenu.close();
-      });
-    },
-
-    listenForResize: function() {
-      $(window).on('resize.ForMobileMenu', function() {
-        if ($(this).width() > 768) {
-          mobileMenu.close();
-        }
-      });
-    },
-
-    close: function() {
-      $(".mobile-main-menu").removeClass("open");
-      $("body").removeClass('no-scroll');
-      $(window).off('resize.ForMobileMenu');
-    }
-  };
-
-  },{}],7:[function(require,module,exports){
-  window.mobileTOC = {
-    bind: function() {
-      $("[data-behavior='toggle-table-of-contents']").on("click", function(e) {
-        e.preventDefault();
-
-        var $parent = $(this).parent();
-
-        if ($parent.hasClass("is-open")) {
-          $parent.removeClass("is-open");
-          $(".pytorch-left-menu").slideUp(200, function() {
-            $(this).css({display: ""});
-          });
-        } else {
-          $parent.addClass("is-open");
-          $(".pytorch-left-menu").slideDown(200);
-        }
-      });
-    }
-  }
-
-  },{}],8:[function(require,module,exports){
-  window.pytorchAnchors = {
-    bind: function() {
-      // Replace Sphinx-generated anchors with anchorjs ones
-      $(".headerlink").text("");
-
-      window.anchors.add(".pytorch-article .headerlink");
-
-      $(".anchorjs-link").each(function() {
-        var $headerLink = $(this).closest(".headerlink");
-        var href = $headerLink.attr("href");
-        var clone = this.outerHTML;
-
-        $clone = $(clone).attr("href", href);
-        $headerLink.before($clone);
-        $headerLink.remove();
-      });
-    }
-  };
-
-  },{}],9:[function(require,module,exports){
-  // Modified from https://stackoverflow.com/a/13067009
-  // Going for a JS solution to scrolling to an anchor so we can benefit from
-  // less hacky css and smooth scrolling.
-
-  window.scrollToAnchor = {
-    bind: function() {
-      var document = window.document;
-      var history = window.history;
-      var location = window.location
-      var HISTORY_SUPPORT = !!(history && history.pushState);
-
-      var anchorScrolls = {
-        ANCHOR_REGEX: /^#[^ ]+$/,
-        offsetHeightPx: function() {
-          var OFFSET_HEIGHT_PADDING = 20;
-          // TODO: this is a little janky. We should try to not rely on JS for this
-          return utilities.headersHeight() + OFFSET_HEIGHT_PADDING;
-        },
-
-        /**
-         * Establish events, and fix initial scroll position if a hash is provided.
-         */
-        init: function() {
-          this.scrollToCurrent();
-          // This interferes with clicks below it, causing a double fire
-          // $(window).on('hashchange', $.proxy(this, 'scrollToCurrent'));
-          $('body').on('click', 'a', $.proxy(this, 'delegateAnchors'));
-          $('body').on('click', '#pytorch-right-menu li span', $.proxy(this, 'delegateSpans'));
-        },
-
-        /**
-         * Return the offset amount to deduct from the normal scroll position.
-         * Modify as appropriate to allow for dynamic calculations
-         */
-        getFixedOffset: function() {
-          return this.offsetHeightPx();
-        },
-
-        /**
-         * If the provided href is an anchor which resolves to an element on the
-         * page, scroll to it.
-         * @param  {String} href
-         * @return {Boolean} - Was the href an anchor.
-         */
-        scrollIfAnchor: function(href, pushToHistory) {
-          var match, anchorOffset;
-
-          if(!this.ANCHOR_REGEX.test(href)) {
-            return false;
-          }
-
-          match = document.getElementById(href.slice(1));
-
-          if(match) {
-            var anchorOffset = $(match).offset().top - this.getFixedOffset();
-
-            $('html, body').scrollTop(anchorOffset);
-
-            // Add the state to history as-per normal anchor links
-            if(HISTORY_SUPPORT && pushToHistory) {
-              history.pushState({}, document.title, location.pathname + href);
-            }
-          }
-
-          return !!match;
-        },
-
-        /**
-         * Attempt to scroll to the current location's hash.
-         */
-        scrollToCurrent: function(e) {
-          if(this.scrollIfAnchor(window.location.hash) && e) {
-            e.preventDefault();
-          }
-        },
-
-        delegateSpans: function(e) {
-          var elem = utilities.closest(e.target, "a");
-
-          if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-            e.preventDefault();
-          }
-        },
-
-        /**
-         * If the click event's target was an anchor, fix the scroll position.
-         */
-        delegateAnchors: function(e) {
-          var elem = e.target;
-
-          if(this.scrollIfAnchor(elem.getAttribute('href'), true)) {
-            e.preventDefault();
-          }
-        }
-      };
-
-      $(document).ready($.proxy(anchorScrolls, 'init'));
-    }
-  };
-
-  },{}],10:[function(require,module,exports){
-  window.sideMenus = {
-    rightMenuIsOnScreen: function() {
-      return document.getElementById("pytorch-content-right").offsetParent !== null;
-    },
-
-    isFixedToBottom: false,
-
-    bind: function() {
-      sideMenus.handleLeftMenu();
-
-      var rightMenuLinks = document.querySelectorAll("#pytorch-right-menu li");
-      var rightMenuHasLinks = rightMenuLinks.length > 1;
-
-      if (!rightMenuHasLinks) {
-        for (var i = 0; i < rightMenuLinks.length; i++) {
-          rightMenuLinks[i].style.display = "none";
-        }
-      }
-
-      if (rightMenuHasLinks) {
-        // Don't show the Shortcuts menu title text unless there are menu items
-        document.getElementById("pytorch-shortcuts-wrapper").style.display = "block";
-
-        // We are hiding the titles of the pages in the right side menu but there are a few
-        // pages that include other pages in the right side menu (see 'torch.nn' in the docs)
-        // so if we exclude those it looks confusing. Here we add a 'title-link' class to these
-        // links so we can exclude them from normal right side menu link operations
-        var titleLinks = document.querySelectorAll(
-          "#pytorch-right-menu #pytorch-side-scroll-right \
-           > ul > li > a.reference.internal"
-        );
-
-        for (var i = 0; i < titleLinks.length; i++) {
-          var link = titleLinks[i];
-
-          link.classList.add("title-link");
-
-          if (
-            link.nextElementSibling &&
-            link.nextElementSibling.tagName === "UL" &&
-            link.nextElementSibling.children.length > 0
-          ) {
-            link.classList.add("has-children");
-          }
-        }
-
-        // Add + expansion signifiers to normal right menu links that have sub menus
-        var menuLinks = document.querySelectorAll(
-          "#pytorch-right-menu ul li ul li a.reference.internal"
-        );
-
-        for (var i = 0; i < menuLinks.length; i++) {
-          if (
-            menuLinks[i].nextElementSibling &&
-            menuLinks[i].nextElementSibling.tagName === "UL"
-          ) {
-            menuLinks[i].classList.add("not-expanded");
-          }
-        }
-
-        // If a hash is present on page load recursively expand menu items leading to selected item
-        var linkWithHash =
-          document.querySelector(
-            "#pytorch-right-menu a[href=\"" + window.location.hash + "\"]"
-          );
-
-        if (linkWithHash) {
-          // Expand immediate sibling list if present
-          if (
-            linkWithHash.nextElementSibling &&
-            linkWithHash.nextElementSibling.tagName === "UL" &&
-            linkWithHash.nextElementSibling.children.length > 0
-          ) {
-            linkWithHash.nextElementSibling.style.display = "block";
-            linkWithHash.classList.add("expanded");
-          }
-
-          // Expand ancestor lists if any
-          sideMenus.expandClosestUnexpandedParentList(linkWithHash);
-        }
-
-        // Bind click events on right menu links
-        $("#pytorch-right-menu a.reference.internal").on("click", function() {
-          if (this.classList.contains("expanded")) {
-            this.nextElementSibling.style.display = "none";
-            this.classList.remove("expanded");
-            this.classList.add("not-expanded");
-          } else if (this.classList.contains("not-expanded")) {
-            this.nextElementSibling.style.display = "block";
-            this.classList.remove("not-expanded");
-            this.classList.add("expanded");
-          }
-        });
-
-        sideMenus.handleRightMenu();
-      }
-
-      $(window).on('resize scroll', function(e) {
-        sideMenus.handleNavBar();
-
-        sideMenus.handleLeftMenu();
-
-        if (sideMenus.rightMenuIsOnScreen()) {
-          sideMenus.handleRightMenu();
-        }
-      });
-    },
-
-    leftMenuIsFixed: function() {
-      return document.getElementById("pytorch-left-menu").classList.contains("make-fixed");
-    },
-
-    handleNavBar: function() {
-      var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-      // If we are scrolled past the main navigation header fix the sub menu bar to top of page
-      if (utilities.scrollTop() >= mainHeaderHeight) {
-        document.getElementById("pytorch-left-menu").classList.add("make-fixed");
-        document.getElementById("pytorch-page-level-bar").classList.add("left-menu-is-fixed");
-      } else {
-        document.getElementById("pytorch-left-menu").classList.remove("make-fixed");
-        document.getElementById("pytorch-page-level-bar").classList.remove("left-menu-is-fixed");
-      }
-    },
-
-    expandClosestUnexpandedParentList: function (el) {
-      var closestParentList = utilities.closest(el, "ul");
-
-      if (closestParentList) {
-        var closestParentLink = closestParentList.previousElementSibling;
-        var closestParentLinkExists = closestParentLink &&
-                                      closestParentLink.tagName === "A" &&
-                                      closestParentLink.classList.contains("reference");
-
-        if (closestParentLinkExists) {
-          // Don't add expansion class to any title links
-           if (closestParentLink.classList.contains("title-link")) {
-             return;
-           }
-
-          closestParentList.style.display = "block";
-          closestParentLink.classList.remove("not-expanded");
-          closestParentLink.classList.add("expanded");
-          sideMenus.expandClosestUnexpandedParentList(closestParentLink);
-        }
-      }
-    },
-
-    handleLeftMenu: function () {
-      var windowHeight = utilities.windowHeight();
-      var topOfFooterRelativeToWindow = document.getElementById("docs-tutorials-resources").getBoundingClientRect().top;
-
-      if (topOfFooterRelativeToWindow >= windowHeight) {
-        document.getElementById("pytorch-left-menu").style.height = "100%";
-      } else {
-        var howManyPixelsOfTheFooterAreInTheWindow = windowHeight - topOfFooterRelativeToWindow;
-        var leftMenuDifference = howManyPixelsOfTheFooterAreInTheWindow;
-        document.getElementById("pytorch-left-menu").style.height = (windowHeight - leftMenuDifference) + "px";
-      }
-    },
-
-    handleRightMenu: function() {
-      var rightMenuWrapper = document.getElementById("pytorch-content-right");
-      var rightMenu = document.getElementById("pytorch-right-menu");
-      var rightMenuList = rightMenu.getElementsByTagName("ul")[0];
-      var article = document.getElementById("pytorch-article");
-      var articleHeight = article.offsetHeight;
-      var articleBottom = utilities.offset(article).top + articleHeight;
-      var mainHeaderHeight = document.getElementById('header-holder').offsetHeight;
-
-      if (utilities.scrollTop() < mainHeaderHeight) {
-        rightMenuWrapper.style.height = "100%";
-        rightMenu.style.top = 0;
-        rightMenu.classList.remove("scrolling-fixed");
-        rightMenu.classList.remove("scrolling-absolute");
-      } else {
-        if (rightMenu.classList.contains("scrolling-fixed")) {
-          var rightMenuBottom =
-            utilities.offset(rightMenuList).top + rightMenuList.offsetHeight;
-
-          if (rightMenuBottom >= articleBottom) {
-            rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-            rightMenu.style.top = utilities.scrollTop() - mainHeaderHeight + "px";
-            rightMenu.classList.add("scrolling-absolute");
-            rightMenu.classList.remove("scrolling-fixed");
-          }
-        } else {
-          rightMenuWrapper.style.height = articleHeight + mainHeaderHeight + "px";
-          rightMenu.style.top =
-            articleBottom - mainHeaderHeight - rightMenuList.offsetHeight + "px";
-          rightMenu.classList.add("scrolling-absolute");
-        }
-
-        if (utilities.scrollTop() < articleBottom - rightMenuList.offsetHeight) {
-          rightMenuWrapper.style.height = "100%";
-          rightMenu.style.top = "";
-          rightMenu.classList.remove("scrolling-absolute");
-          rightMenu.classList.add("scrolling-fixed");
-        }
-      }
-
-      var rightMenuSideScroll = document.getElementById("pytorch-side-scroll-right");
-      var sideScrollFromWindowTop = rightMenuSideScroll.getBoundingClientRect().top;
-
-      rightMenuSideScroll.style.height = utilities.windowHeight() - sideScrollFromWindowTop + "px";
-    }
-  };
-
-  },{}],11:[function(require,module,exports){
-  var jQuery = (typeof(window) != 'undefined') ? window.jQuery : require('jquery');
-
-  // Sphinx theme nav state
-  function ThemeNav () {
-
-      var nav = {
-          navBar: null,
-          win: null,
-          winScroll: false,
-          winResize: false,
-          linkScroll: false,
-          winPosition: 0,
-          winHeight: null,
-          docHeight: null,
-          isRunning: false
-      };
-
-      nav.enable = function (withStickyNav) {
-          var self = this;
-
-          // TODO this can likely be removed once the theme javascript is broken
-          // out from the RTD assets. This just ensures old projects that are
-          // calling `enable()` get the sticky menu on by default. All other cals
-          // to `enable` should include an argument for enabling the sticky menu.
-          if (typeof(withStickyNav) == 'undefined') {
-              withStickyNav = true;
-          }
-
-          if (self.isRunning) {
-              // Only allow enabling nav logic once
-              return;
-          }
-
-          self.isRunning = true;
-          jQuery(function ($) {
-              self.init($);
-
-              self.reset();
-              self.win.on('hashchange', self.reset);
-
-              if (withStickyNav) {
-                  // Set scroll monitor
-                  self.win.on('scroll', function () {
-                      if (!self.linkScroll) {
-                          if (!self.winScroll) {
-                              self.winScroll = true;
-                              requestAnimationFrame(function() { self.onScroll(); });
-                          }
-                      }
-                  });
-              }
-
-              // Set resize monitor
-              self.win.on('resize', function () {
-                  if (!self.winResize) {
-                      self.winResize = true;
-                      requestAnimationFrame(function() { self.onResize(); });
-                  }
-              });
-
-              self.onResize();
-          });
-
-      };
-
-      // TODO remove this with a split in theme and Read the Docs JS logic as
-      // well, it's only here to support 0.3.0 installs of our theme.
-      nav.enableSticky = function() {
-          this.enable(true);
-      };
-
-      nav.init = function ($) {
-          var doc = $(document),
-              self = this;
-
-          this.navBar = $('div.pytorch-side-scroll:first');
-          this.win = $(window);
-
-          // Set up javascript UX bits
-          $(document)
-              // Shift nav in mobile when clicking the menu.
-              .on('click', "[data-toggle='pytorch-left-menu-nav-top']", function() {
-                  $("[data-toggle='wy-nav-shift']").toggleClass("shift");
-                  $("[data-toggle='rst-versions']").toggleClass("shift");
-              })
-
-              // Nav menu link click operations
-              .on('click', ".pytorch-menu-vertical .current ul li a", function() {
-                  var target = $(this);
-                  // Close menu when you click a link.
-                  $("[data-toggle='wy-nav-shift']").removeClass("shift");
-                  $("[data-toggle='rst-versions']").toggleClass("shift");
-                  // Handle dynamic display of l3 and l4 nav lists
-                  self.toggleCurrent(target);
-                  self.hashChange();
-              })
-              .on('click', "[data-toggle='rst-current-version']", function() {
-                  $("[data-toggle='rst-versions']").toggleClass("shift-up");
-              })
-
-          // Make tables responsive
-          $("table.docutils:not(.field-list,.footnote,.citation)")
-              .wrap("<div class='wy-table-responsive'></div>");
-
-          // Add extra class to responsive tables that contain
-          // footnotes or citations so that we can target them for styling
-          $("table.docutils.footnote")
-              .wrap("<div class='wy-table-responsive footnote'></div>");
-          $("table.docutils.citation")
-              .wrap("<div class='wy-table-responsive citation'></div>");
-
-          // Add expand links to all parents of nested ul
-          $('.pytorch-menu-vertical ul').not('.simple').siblings('a').each(function () {
-              var link = $(this);
-                  expand = $('<span class="toctree-expand"></span>');
-              expand.on('click', function (ev) {
-                  self.toggleCurrent(link);
-                  ev.stopPropagation();
-                  return false;
-              });
-              link.prepend(expand);
-          });
-      };
-
-      nav.reset = function () {
-          // Get anchor from URL and open up nested nav
-          var anchor = encodeURI(window.location.hash) || '#';
-
-          try {
-              var vmenu = $('.pytorch-menu-vertical');
-              var link = vmenu.find('[href="' + anchor + '"]');
-              if (link.length === 0) {
-                  // this link was not found in the sidebar.
-                  // Find associated id element, then its closest section
-                  // in the document and try with that one.
-                  var id_elt = $('.document [id="' + anchor.substring(1) + '"]');
-                  var closest_section = id_elt.closest('div.section');
-                  link = vmenu.find('[href="#' + closest_section.attr("id") + '"]');
-                  if (link.length === 0) {
-                      // still not found in the sidebar. fall back to main section
-                      link = vmenu.find('[href="#"]');
-                  }
-              }
-              // If we found a matching link then reset current and re-apply
-              // otherwise retain the existing match
-              if (link.length > 0) {
-                  $('.pytorch-menu-vertical .current').removeClass('current');
-                  link.addClass('current');
-                  link.closest('li.toctree-l1').addClass('current');
-                  link.closest('li.toctree-l1').parent().addClass('current');
-                  link.closest('li.toctree-l1').addClass('current');
-                  link.closest('li.toctree-l2').addClass('current');
-                  link.closest('li.toctree-l3').addClass('current');
-                  link.closest('li.toctree-l4').addClass('current');
-              }
-          }
-          catch (err) {
-              console.log("Error expanding nav for anchor", err);
-          }
-
-      };
-
-      nav.onScroll = function () {
-          this.winScroll = false;
-          var newWinPosition = this.win.scrollTop(),
-              winBottom = newWinPosition + this.winHeight,
-              navPosition = this.navBar.scrollTop(),
-              newNavPosition = navPosition + (newWinPosition - this.winPosition);
-          if (newWinPosition < 0 || winBottom > this.docHeight) {
-              return;
-          }
-          this.navBar.scrollTop(newNavPosition);
-          this.winPosition = newWinPosition;
-      };
-
-      nav.onResize = function () {
-          this.winResize = false;
-          this.winHeight = this.win.height();
-          this.docHeight = $(document).height();
-      };
-
-      nav.hashChange = function () {
-          this.linkScroll = true;
-          this.win.one('hashchange', function () {
-              this.linkScroll = false;
-          });
-      };
-
-      nav.toggleCurrent = function (elem) {
-          var parent_li = elem.closest('li');
-          parent_li.siblings('li.current').removeClass('current');
-          parent_li.siblings().find('li.current').removeClass('current');
-          parent_li.find('> ul li.current').removeClass('current');
-          parent_li.toggleClass('current');
-      }
-
-      return nav;
-  };
-
-  module.exports.ThemeNav = ThemeNav();
-
-  if (typeof(window) != 'undefined') {
-      window.SphinxRtdTheme = {
-          Navigation: module.exports.ThemeNav,
-          // TODO remove this once static assets are split up between the theme
-          // and Read the Docs. For now, this patches 0.3.0 to be backwards
-          // compatible with a pre-0.3.0 layout.html
-          StickyNav: module.exports.ThemeNav,
-      };
-  }
-
-
-  // requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
-  // https://gist.github.com/paulirish/1579671
-  // MIT license
-
-  (function() {
-      var lastTime = 0;
-      var vendors = ['ms', 'moz', 'webkit', 'o'];
-      for(var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) {
-          window.requestAnimationFrame = window[vendors[x]+'RequestAnimationFrame'];
-          window.cancelAnimationFrame = window[vendors[x]+'CancelAnimationFrame']
-                                     || window[vendors[x]+'CancelRequestAnimationFrame'];
-      }
-
-      if (!window.requestAnimationFrame)
-          window.requestAnimationFrame = function(callback, element) {
-              var currTime = new Date().getTime();
-              var timeToCall = Math.max(0, 16 - (currTime - lastTime));
-              var id = window.setTimeout(function() { callback(currTime + timeToCall); },
-                timeToCall);
-              lastTime = currTime + timeToCall;
-              return id;
-          };
-
-      if (!window.cancelAnimationFrame)
-          window.cancelAnimationFrame = function(id) {
-              clearTimeout(id);
-          };
-  }());
-
-  $(".sphx-glr-thumbcontainer").removeAttr("tooltip");
-  $("table").removeAttr("border");
-
-  // This code replaces the default sphinx gallery download buttons
-  // with the 3 download buttons at the top of the page
-
-  var downloadNote = $(".sphx-glr-download-link-note.admonition.note");
-  if (downloadNote.length >= 1) {
-      var tutorialUrlArray = $("#tutorial-type").text().split('/');
-
-      var githubLink = "https://github.com/pytorch/rl/tree/tutorial_py_dup/sphinx-tutorials/" + tutorialUrlArray[tutorialUrlArray.length - 1] + ".py",
-          notebookLink = $(".reference.download")[1].href,
-          notebookDownloadPath = notebookLink.split('_downloads')[1],
-          colabLink = "https://colab.research.google.com/github/pytorch/rl/blob/gh-pages/_downloads" + notebookDownloadPath;
-
-      $("#google-colab-link").wrap("<a href=" + colabLink + " data-behavior='call-to-action-event' data-response='Run in Google Colab' target='_blank'/>");
-      $("#download-notebook-link").wrap("<a href=" + notebookLink + " data-behavior='call-to-action-event' data-response='Download Notebook'/>");
-      $("#github-view-link").wrap("<a href=" + githubLink + " data-behavior='call-to-action-event' data-response='View on Github' target='_blank'/>");
-  } else {
-      $(".pytorch-call-to-action-links").hide();
-  }
-
-  //This code handles the Expand/Hide toggle for the Docs/Tutorials left nav items
-
-  $(document).ready(function() {
-    var caption = "#pytorch-left-menu p.caption";
-    var collapseAdded = $(this).not("checked");
-    $(caption).each(function () {
-      var menuName = this.innerText.replace(/[^\w\s]/gi, "").trim();
-      $(this).find("span").addClass("checked");
-      if (collapsedSections.includes(menuName) == true && collapseAdded && sessionStorage.getItem(menuName) !== "expand" || sessionStorage.getItem(menuName) == "collapse") {
-        $(this.firstChild).after("<span class='expand-menu'>[ + ]</span>");
-        $(this.firstChild).after("<span class='hide-menu collapse'>[ - ]</span>");
-        $(this).next("ul").hide();
-      } else if (collapsedSections.includes(menuName) == false && collapseAdded || sessionStorage.getItem(menuName) == "expand") {
-        $(this.firstChild).after("<span class='expand-menu collapse'>[ + ]</span>");
-        $(this.firstChild).after("<span class='hide-menu'>[ - ]</span>");
-      }
-    });
-
-    $(".expand-menu").on("click", function () {
-      $(this).prev(".hide-menu").toggle();
-      $(this).parent().next("ul").toggle();
-      var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-      if (sessionStorage.getItem(menuName) == "collapse") {
-        sessionStorage.removeItem(menuName);
-      }
-      sessionStorage.setItem(menuName, "expand");
-      toggleList(this);
-    });
-
-    $(".hide-menu").on("click", function () {
-      $(this).next(".expand-menu").toggle();
-      $(this).parent().next("ul").toggle();
-      var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-      if (sessionStorage.getItem(menuName) == "expand") {
-        sessionStorage.removeItem(menuName);
-      }
-      sessionStorage.setItem(menuName, "collapse");
-      toggleList(this);
-    });
-
-    function toggleList(menuCommand) {
-      $(menuCommand).toggle();
-    }
-  });
-
-  // Build an array from each tag that's present
-
-  var tagList = $(".tutorials-card-container").map(function() {
-      return $(this).data("tags").split(",").map(function(item) {
-          return item.trim();
-        });
-  }).get();
-
-  function unique(value, index, self) {
-        return self.indexOf(value) == index && value != ""
-      }
-
-  // Only return unique tags
-
-  var tags = tagList.sort().filter(unique);
-
-  // Add filter buttons to the top of the page for each tag
-
-  function createTagMenu() {
-      tags.forEach(function(item){
-      $(".tutorial-filter-menu").append(" <div class='tutorial-filter filter-btn filter' data-tag='" + item + "'>" + item + "</div>")
-    })
-  };
-
-  createTagMenu();
-
-  // Remove hyphens if they are present in the filter buttons
-
-  $(".tags").each(function(){
-      var tags = $(this).text().split(",");
-      tags.forEach(function(tag, i ) {
-         tags[i] = tags[i].replace(/-/, ' ')
-      })
-      $(this).html(tags.join(", "));
-  });
-
-  // Remove hyphens if they are present in the card body
-
-  $(".tutorial-filter").each(function(){
-      var tag = $(this).text();
-      $(this).html(tag.replace(/-/, ' '))
-  })
-
-  // Remove any empty p tags that Sphinx adds
-
-  $("#tutorial-cards p").each(function(index, item) {
-      if(!$(item).text().trim()) {
-          $(item).remove();
-      }
-  });
-
-  // Jump back to top on pagination click
-
-  $(document).on("click", ".page", function() {
-      $('html, body').animate(
-        {scrollTop: $("#dropdown-filter-tags").position().top},
-        'slow'
-      );
-  });
-
-  var link = $("a[href='intermediate/speech_command_recognition_with_torchaudio.html']");
-
-  if (link.text() == "SyntaxError") {
-      console.log("There is an issue with the intermediate/speech_command_recognition_with_torchaudio.html menu item.");
-      link.text("Speech Command Recognition with torchaudio");
-  }
-
-  $(".stars-outer > i").hover(function() {
-      $(this).prevAll().addBack().toggleClass("fas star-fill");
-  });
-
-  $(".stars-outer > i").on("click", function() {
-      $(this).prevAll().each(function() {
-          $(this).addBack().addClass("fas star-fill");
-      });
-
-      $(".stars-outer > i").each(function() {
-          $(this).unbind("mouseenter mouseleave").css({
-              "pointer-events": "none"
-          });
-      });
-  })
-
-  $("#pytorch-side-scroll-right li a").on("click", function (e) {
-    var href = $(this).attr("href");
-    $('html, body').stop().animate({
-      scrollTop: $(href).offset().top - 100
-    }, 850);
-    e.preventDefault;
-  });
-
-  var lastId,
-    topMenu = $("#pytorch-side-scroll-right"),
-    topMenuHeight = topMenu.outerHeight() + 1,
-    // All sidenav items
-    menuItems = topMenu.find("a"),
-    // Anchors for menu items
-    scrollItems = menuItems.map(function () {
-      var item = $(this).attr("href");
-      if (item.length) {
-        return item;
-      }
-    });
-
-  $(window).scroll(function () {
-    var fromTop = $(this).scrollTop() + topMenuHeight;
-    var article = ".section";
-
-    $(article).each(function (i) {
-      var offsetScroll = $(this).offset().top - $(window).scrollTop();
-      if (
-        offsetScroll <= topMenuHeight + 200 &&
-        offsetScroll >= topMenuHeight - 200 &&
-        scrollItems[i] == "#" + $(this).attr("id") &&
-        $(".hidden:visible")
-      ) {
-        $(menuItems).removeClass("side-scroll-highlight");
-        $(menuItems[i]).addClass("side-scroll-highlight");
-      }
-    });
-  });
-
-
-  },{"jquery":"jquery"}],"pytorch-sphinx-theme":[function(require,module,exports){
-  var jQuery = (typeof(window) != 'undefined') ? window.jQuery : require('jquery');
-
-  // Sphinx theme nav state
-  function ThemeNav () {
-
-      var nav = {
-          navBar: null,
-          win: null,
-          winScroll: false,
-          winResize: false,
-          linkScroll: false,
-          winPosition: 0,
-          winHeight: null,
-          docHeight: null,
-          isRunning: false
-      };
-
-      nav.enable = function (withStickyNav) {
-          var self = this;
-
-          // TODO this can likely be removed once the theme javascript is broken
-          // out from the RTD assets. This just ensures old projects that are
-          // calling `enable()` get the sticky menu on by default. All other cals
-          // to `enable` should include an argument for enabling the sticky menu.
-          if (typeof(withStickyNav) == 'undefined') {
-              withStickyNav = true;
-          }
-
-          if (self.isRunning) {
-              // Only allow enabling nav logic once
-              return;
-          }
-
-          self.isRunning = true;
-          jQuery(function ($) {
-              self.init($);
-
-              self.reset();
-              self.win.on('hashchange', self.reset);
-
-              if (withStickyNav) {
-                  // Set scroll monitor
-                  self.win.on('scroll', function () {
-                      if (!self.linkScroll) {
-                          if (!self.winScroll) {
-                              self.winScroll = true;
-                              requestAnimationFrame(function() { self.onScroll(); });
-                          }
-                      }
-                  });
-              }
-
-              // Set resize monitor
-              self.win.on('resize', function () {
-                  if (!self.winResize) {
-                      self.winResize = true;
-                      requestAnimationFrame(function() { self.onResize(); });
-                  }
-              });
-
-              self.onResize();
-          });
-
-      };
-
-      // TODO remove this with a split in theme and Read the Docs JS logic as
-      // well, it's only here to support 0.3.0 installs of our theme.
-      nav.enableSticky = function() {
-          this.enable(true);
-      };
-
-      nav.init = function ($) {
-          var doc = $(document),
-              self = this;
-
-          this.navBar = $('div.pytorch-side-scroll:first');
-          this.win = $(window);
-
-          // Set up javascript UX bits
-          $(document)
-              // Shift nav in mobile when clicking the menu.
-              .on('click', "[data-toggle='pytorch-left-menu-nav-top']", function() {
-                  $("[data-toggle='wy-nav-shift']").toggleClass("shift");
-                  $("[data-toggle='rst-versions']").toggleClass("shift");
-              })
-
-              // Nav menu link click operations
-              .on('click', ".pytorch-menu-vertical .current ul li a", function() {
-                  var target = $(this);
-                  // Close menu when you click a link.
-                  $("[data-toggle='wy-nav-shift']").removeClass("shift");
-                  $("[data-toggle='rst-versions']").toggleClass("shift");
-                  // Handle dynamic display of l3 and l4 nav lists
-                  self.toggleCurrent(target);
-                  self.hashChange();
-              })
-              .on('click', "[data-toggle='rst-current-version']", function() {
-                  $("[data-toggle='rst-versions']").toggleClass("shift-up");
-              })
-
-          // Make tables responsive
-          $("table.docutils:not(.field-list,.footnote,.citation)")
-              .wrap("<div class='wy-table-responsive'></div>");
-
-          // Add extra class to responsive tables that contain
-          // footnotes or citations so that we can target them for styling
-          $("table.docutils.footnote")
-              .wrap("<div class='wy-table-responsive footnote'></div>");
-          $("table.docutils.citation")
-              .wrap("<div class='wy-table-responsive citation'></div>");
-
-          // Add expand links to all parents of nested ul
-          $('.pytorch-menu-vertical ul').not('.simple').siblings('a').each(function () {
-              var link = $(this);
-                  expand = $('<span class="toctree-expand"></span>');
-              expand.on('click', function (ev) {
-                  self.toggleCurrent(link);
-                  ev.stopPropagation();
-                  return false;
-              });
-              link.prepend(expand);
-          });
-      };
-
-      nav.reset = function () {
-          // Get anchor from URL and open up nested nav
-          var anchor = encodeURI(window.location.hash) || '#';
-
-          try {
-              var vmenu = $('.pytorch-menu-vertical');
-              var link = vmenu.find('[href="' + anchor + '"]');
-              if (link.length === 0) {
-                  // this link was not found in the sidebar.
-                  // Find associated id element, then its closest section
-                  // in the document and try with that one.
-                  var id_elt = $('.document [id="' + anchor.substring(1) + '"]');
-                  var closest_section = id_elt.closest('div.section');
-                  link = vmenu.find('[href="#' + closest_section.attr("id") + '"]');
-                  if (link.length === 0) {
-                      // still not found in the sidebar. fall back to main section
-                      link = vmenu.find('[href="#"]');
-                  }
-              }
-              // If we found a matching link then reset current and re-apply
-              // otherwise retain the existing match
-              if (link.length > 0) {
-                  $('.pytorch-menu-vertical .current').removeClass('current');
-                  link.addClass('current');
-                  link.closest('li.toctree-l1').addClass('current');
-                  link.closest('li.toctree-l1').parent().addClass('current');
-                  link.closest('li.toctree-l1').addClass('current');
-                  link.closest('li.toctree-l2').addClass('current');
-                  link.closest('li.toctree-l3').addClass('current');
-                  link.closest('li.toctree-l4').addClass('current');
-              }
-          }
-          catch (err) {
-              console.log("Error expanding nav for anchor", err);
-          }
-
-      };
-
-      nav.onScroll = function () {
-          this.winScroll = false;
-          var newWinPosition = this.win.scrollTop(),
-              winBottom = newWinPosition + this.winHeight,
-              navPosition = this.navBar.scrollTop(),
-              newNavPosition = navPosition + (newWinPosition - this.winPosition);
-          if (newWinPosition < 0 || winBottom > this.docHeight) {
-              return;
-          }
-          this.navBar.scrollTop(newNavPosition);
-          this.winPosition = newWinPosition;
-      };
-
-      nav.onResize = function () {
-          this.winResize = false;
-          this.winHeight = this.win.height();
-          this.docHeight = $(document).height();
-      };
-
-      nav.hashChange = function () {
-          this.linkScroll = true;
-          this.win.one('hashchange', function () {
-              this.linkScroll = false;
-          });
-      };
-
-      nav.toggleCurrent = function (elem) {
-          var parent_li = elem.closest('li');
-          parent_li.siblings('li.current').removeClass('current');
-          parent_li.siblings().find('li.current').removeClass('current');
-          parent_li.find('> ul li.current').removeClass('current');
-          parent_li.toggleClass('current');
-      }
-
-      return nav;
-  };
-
-  module.exports.ThemeNav = ThemeNav();
-
-  if (typeof(window) != 'undefined') {
-      window.SphinxRtdTheme = {
-          Navigation: module.exports.ThemeNav,
-          // TODO remove this once static assets are split up between the theme
-          // and Read the Docs. For now, this patches 0.3.0 to be backwards
-          // compatible with a pre-0.3.0 layout.html
-          StickyNav: module.exports.ThemeNav,
-      };
-  }
-
-
-  // requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
-  // https://gist.github.com/paulirish/1579671
-  // MIT license
-
-  (function() {
-      var lastTime = 0;
-      var vendors = ['ms', 'moz', 'webkit', 'o'];
-      for(var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) {
-          window.requestAnimationFrame = window[vendors[x]+'RequestAnimationFrame'];
-          window.cancelAnimationFrame = window[vendors[x]+'CancelAnimationFrame']
-                                     || window[vendors[x]+'CancelRequestAnimationFrame'];
-      }
-
-      if (!window.requestAnimationFrame)
-          window.requestAnimationFrame = function(callback, element) {
-              var currTime = new Date().getTime();
-              var timeToCall = Math.max(0, 16 - (currTime - lastTime));
-              var id = window.setTimeout(function() { callback(currTime + timeToCall); },
-                timeToCall);
-              lastTime = currTime + timeToCall;
-              return id;
-          };
-
-      if (!window.cancelAnimationFrame)
-          window.cancelAnimationFrame = function(id) {
-              clearTimeout(id);
-          };
-  }());
-
-  $(".sphx-glr-thumbcontainer").removeAttr("tooltip");
-  $("table").removeAttr("border");
-
-  // This code replaces the default sphinx gallery download buttons
-  // with the 3 download buttons at the top of the page
-
-  var downloadNote = $(".sphx-glr-download-link-note.admonition.note");
-  if (downloadNote.length >= 1) {
-      var tutorialUrlArray = $("#tutorial-type").text().split('/');
-
-      var githubLink = "https://github.com/pytorch/rl/tree/tutorial_py_dup/tutorials/" + tutorialUrlArray.join("/") + ".py",
-          notebookLink = $(".reference.download")[1].href,
-          notebookDownloadPath = notebookLink.split('_downloads')[1],
-          colabLink = "https://colab.research.google.com/github/pytorch/rl/blob/gh-pages/_downloads" + notebookDownloadPath;
-
-      $("#google-colab-link").wrap("<a href=" + colabLink + " data-behavior='call-to-action-event' data-response='Run in Google Colab' target='_blank'/>");
-      $("#download-notebook-link").wrap("<a href=" + notebookLink + " data-behavior='call-to-action-event' data-response='Download Notebook'/>");
-      $("#github-view-link").wrap("<a href=" + githubLink + " data-behavior='call-to-action-event' data-response='View on Github' target='_blank'/>");
-  } else {
-      $(".pytorch-call-to-action-links").hide();
-  }
-
-  //This code handles the Expand/Hide toggle for the Docs/Tutorials left nav items
-
-  $(document).ready(function() {
-    var caption = "#pytorch-left-menu p.caption";
-    var collapseAdded = $(this).not("checked");
-    $(caption).each(function () {
-      var menuName = this.innerText.replace(/[^\w\s]/gi, "").trim();
-      $(this).find("span").addClass("checked");
-      if (collapsedSections.includes(menuName) == true && collapseAdded && sessionStorage.getItem(menuName) !== "expand" || sessionStorage.getItem(menuName) == "collapse") {
-        $(this.firstChild).after("<span class='expand-menu'>[ + ]</span>");
-        $(this.firstChild).after("<span class='hide-menu collapse'>[ - ]</span>");
-        $(this).next("ul").hide();
-      } else if (collapsedSections.includes(menuName) == false && collapseAdded || sessionStorage.getItem(menuName) == "expand") {
-        $(this.firstChild).after("<span class='expand-menu collapse'>[ + ]</span>");
-        $(this.firstChild).after("<span class='hide-menu'>[ - ]</span>");
-      }
-    });
-
-    $(".expand-menu").on("click", function () {
-      $(this).prev(".hide-menu").toggle();
-      $(this).parent().next("ul").toggle();
-      var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-      if (sessionStorage.getItem(menuName) == "collapse") {
-        sessionStorage.removeItem(menuName);
-      }
-      sessionStorage.setItem(menuName, "expand");
-      toggleList(this);
-    });
-
-    $(".hide-menu").on("click", function () {
-      $(this).next(".expand-menu").toggle();
-      $(this).parent().next("ul").toggle();
-      var menuName = $(this).parent().text().replace(/[^\w\s]/gi, "").trim();
-      if (sessionStorage.getItem(menuName) == "expand") {
-        sessionStorage.removeItem(menuName);
-      }
-      sessionStorage.setItem(menuName, "collapse");
-      toggleList(this);
-    });
-
-    function toggleList(menuCommand) {
-      $(menuCommand).toggle();
-    }
-  });
-
-  // Build an array from each tag that's present
-
-  var tagList = $(".tutorials-card-container").map(function() {
-      return $(this).data("tags").split(",").map(function(item) {
-          return item.trim();
-        });
-  }).get();
-
-  function unique(value, index, self) {
-        return self.indexOf(value) == index && value != ""
-      }
-
-  // Only return unique tags
-
-  var tags = tagList.sort().filter(unique);
-
-  // Add filter buttons to the top of the page for each tag
-
-  function createTagMenu() {
-      tags.forEach(function(item){
-      $(".tutorial-filter-menu").append(" <div class='tutorial-filter filter-btn filter' data-tag='" + item + "'>" + item + "</div>")
-    })
-  };
-
-  createTagMenu();
-
-  // Remove hyphens if they are present in the filter buttons
-
-  $(".tags").each(function(){
-      var tags = $(this).text().split(",");
-      tags.forEach(function(tag, i ) {
-         tags[i] = tags[i].replace(/-/, ' ')
-      })
-      $(this).html(tags.join(", "));
-  });
-
-  // Remove hyphens if they are present in the card body
-
-  $(".tutorial-filter").each(function(){
-      var tag = $(this).text();
-      $(this).html(tag.replace(/-/, ' '))
-  })
-
-  // Remove any empty p tags that Sphinx adds
-
-  $("#tutorial-cards p").each(function(index, item) {
-      if(!$(item).text().trim()) {
-          $(item).remove();
-      }
-  });
-
-  // Jump back to top on pagination click
-
-  $(document).on("click", ".page", function() {
-      $('html, body').animate(
-        {scrollTop: $("#dropdown-filter-tags").position().top},
-        'slow'
-      );
-  });
-
-  var link = $("a[href='intermediate/speech_command_recognition_with_torchaudio.html']");
-
-  if (link.text() == "SyntaxError") {
-      console.log("There is an issue with the intermediate/speech_command_recognition_with_torchaudio.html menu item.");
-      link.text("Speech Command Recognition with torchaudio");
-  }
-
-  $(".stars-outer > i").hover(function() {
-      $(this).prevAll().addBack().toggleClass("fas star-fill");
-  });
-
-  $(".stars-outer > i").on("click", function() {
-      $(this).prevAll().each(function() {
-          $(this).addBack().addClass("fas star-fill");
-      });
-
-      $(".stars-outer > i").each(function() {
-          $(this).unbind("mouseenter mouseleave").css({
-              "pointer-events": "none"
-          });
-      });
-  })
-
-  $("#pytorch-side-scroll-right li a").on("click", function (e) {
-    var href = $(this).attr("href");
-    $('html, body').stop().animate({
-      scrollTop: $(href).offset().top - 100
-    }, 850);
-    e.preventDefault;
-  });
-
-  var lastId,
-    topMenu = $("#pytorch-side-scroll-right"),
-    topMenuHeight = topMenu.outerHeight() + 1,
-    // All sidenav items
-    menuItems = topMenu.find("a"),
-    // Anchors for menu items
-    scrollItems = menuItems.map(function () {
-      var item = $(this).attr("href");
-      if (item.length) {
-        return item;
-      }
-    });
-
-  $(window).scroll(function () {
-    var fromTop = $(this).scrollTop() + topMenuHeight;
-    var article = ".section";
-
-    $(article).each(function (i) {
-      var offsetScroll = $(this).offset().top - $(window).scrollTop();
-      if (
-        offsetScroll <= topMenuHeight + 200 &&
-        offsetScroll >= topMenuHeight - 200 &&
-        scrollItems[i] == "#" + $(this).attr("id") &&
-        $(".hidden:visible")
-      ) {
-        $(menuItems).removeClass("side-scroll-highlight");
-        $(menuItems[i]).addClass("side-scroll-highlight");
-      }
-    });
-  });
-
-  },{"jquery":"jquery"}]},{},[1,2,3,4,5,6,7,8,9,10,11]);
-
-},{"jquery":"jquery"}]},{},[1,2,3,4,5,6,7,8,9,10,11]);
-
-},{"jquery":"jquery"}]},{},[1,2,3,4,5,6,7,8,9,10,11]);
-
-},{"jquery":"jquery"}]},{},[1,2,3,4,5,6,7,8,9,10,11]);
+},{"jquery":"jquery"}]},{},[1,2,3,4,5,6,7,8,9,10,"pytorch-sphinx-theme"]);

From 66da33630d2cc5de794fe1f823f819c6d42b4cc5 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 5 Apr 2023 16:17:49 +0100
Subject: [PATCH 84/89] amend

---
 torchrl/modules/tensordict_module/actors.py |  143 +-
 torchrl/objectives/dqn.py                   |    2 +
 torchrl/objectives/sac.py                   |    2 +
 torchrl/objectives/value/advantages.py      |    8 +
 torchrl/trainers/trainers.py                |    5 +-
 tutorials/sphinx-tutorials/coding_dqn.py    | 1399 ++++++++++---------
 6 files changed, 796 insertions(+), 763 deletions(-)

diff --git a/torchrl/modules/tensordict_module/actors.py b/torchrl/modules/tensordict_module/actors.py
index 635fc90ca21..7b9b8ef53a1 100644
--- a/torchrl/modules/tensordict_module/actors.py
+++ b/torchrl/modules/tensordict_module/actors.py
@@ -715,7 +715,8 @@ def __init__(
 class ActorValueOperator(SafeSequential):
     """Actor-value operator.
 
-    This class wraps together an actor and a value model that share a common observation embedding network:
+    This class wraps together an actor and a value model that share a common
+    observation embedding network:
 
     .. aafig::
         :aspect: 60
@@ -723,22 +724,30 @@ class ActorValueOperator(SafeSequential):
         :proportional:
         :textual:
 
-            +-------------+
-            |"Observation"|
-            +-------------+
-                   |
-                   v
-            +--------------+
-            |"hidden state"|
-            +--------------+
-            |      |       |
-            v      |       v
-            actor  |       critic
-            |      |       |
-            v      |       v
-         +--------+|+-------+
-         |"action"|||"value"|
-         +--------+|+-------+
+            +---------------+
+            |Observation (s)|
+            +---------------+
+                     |
+                     v
+                   common
+                     |
+                     v
+           +------------------+
+           |    Hidden state  |
+           +------------------+
+            |                |
+            v                v
+         actor             critic
+           |                 |
+           v                 v
+       +-------------+  +------------+
+       |Action (a(s))|  |Value (V(s))|
+       +-------------+  +------------+
+
+    .. note::
+      For a similar class that returns an action and a Quality value :math:`Q(s, a)`
+      see :class:`~.ActorCriticOperator`. For a version without common embeddig
+      refet to :class:`~.ActorCriticWrapper`.
 
     To facilitate the workflow, this  class comes with a get_policy_operator() and get_value_operator() methods, which
     will both return a stand-alone TDModule with the dedicated functionality.
@@ -755,17 +764,13 @@ class ActorValueOperator(SafeSequential):
         >>> import torch
         >>> from tensordict import TensorDict
         >>> from torchrl.modules import ProbabilisticActor, SafeModule
-        >>> from torchrl.data import UnboundedContinuousTensorSpec, BoundedTensorSpec
         >>> from torchrl.modules import ValueOperator, TanhNormal, ActorValueOperator, NormalParamWrapper
-        >>> spec_hidden = UnboundedContinuousTensorSpec(4)
         >>> module_hidden = torch.nn.Linear(4, 4)
         >>> td_module_hidden = SafeModule(
         ...    module=module_hidden,
-        ...    spec=spec_hidden,
         ...    in_keys=["observation"],
         ...    out_keys=["hidden"],
         ...    )
-        >>> spec_action = BoundedTensorSpec(-1, 1, torch.Size([8]))
         >>> module_action = TensorDictModule(
         ...     NormalParamWrapper(torch.nn.Linear(4, 8)),
         ...     in_keys=["hidden"],
@@ -773,7 +778,6 @@ class ActorValueOperator(SafeSequential):
         ...     )
         >>> td_module_action = ProbabilisticActor(
         ...    module=module_action,
-        ...    spec=spec_action,
         ...    in_keys=["loc", "scale"],
         ...    out_keys=["action"],
         ...    distribution_class=TanhNormal,
@@ -854,7 +858,8 @@ def get_value_operator(self) -> SafeSequential:
 class ActorCriticOperator(ActorValueOperator):
     """Actor-critic operator.
 
-    This class wraps together an actor and a value model that share a common observation embedding network:
+    This class wraps together an actor and a value model that share a common
+    observation embedding network:
 
     .. aafig::
         :aspect: 60
@@ -862,51 +867,58 @@ class ActorCriticOperator(ActorValueOperator):
         :proportional:
         :textual:
 
-          +-----------+
-          |Observation|
-          +-----------+
-            |
-            v
-            actor
-            |
-            v
-        +------+
-        |action| --> critic
-        +------+      |
-                      v
-                   +-----+
-                   |value|
-                   +-----+
+                +---------------+
+                |Observation (s)|
+                +---------------+
+                         |
+                         v
+                      common
+                         |
+                         v
+               +------------------+
+               |    Hidden state  |
+               +------------------+
+                |                |
+                v                v
+             actor  ------>   critic
+               |                 |
+               v                 v
+       +-------------+  +----------------+
+       |Action (a(s))|  |Quality (Q(s,a))|
+       +-------------+  +----------------+
+
+    .. note::
+      For a similar class that returns an action and a state-value :math:`V(s)`
+      see :class:`~.ActorValueOperator`.
+
 
     To facilitate the workflow, this  class comes with a get_policy_operator() method, which
     will both return a stand-alone TDModule with the dedicated functionality. The get_critic_operator will return the
     parent object, as the value is computed based on the policy output.
 
     Args:
-        common_operator (TensorDictModule): a common operator that reads observations and produces a hidden variable
-        policy_operator (TensorDictModule): a policy operator that reads the hidden variable and returns an action
-        value_operator (TensorDictModule): a value operator, that reads the hidden variable and returns a value
+        common_operator (TensorDictModule): a common operator that reads
+            observations and produces a hidden variable
+        policy_operator (TensorDictModule): a policy operator that reads the
+            hidden variable and returns an action
+        value_operator (TensorDictModule): a value operator, that reads the
+            hidden variable and returns a value
 
     Examples:
         >>> import torch
         >>> from tensordict import TensorDict
         >>> from torchrl.modules import ProbabilisticActor
-        >>> from torchrl.data import UnboundedContinuousTensorSpec, BoundedTensorSpec
         >>> from torchrl.modules import  ValueOperator, TanhNormal, ActorCriticOperator, NormalParamWrapper, MLP
-        >>> spec_hidden = UnboundedContinuousTensorSpec(4)
         >>> module_hidden = torch.nn.Linear(4, 4)
         >>> td_module_hidden = SafeModule(
         ...    module=module_hidden,
-        ...    spec=spec_hidden,
         ...    in_keys=["observation"],
         ...    out_keys=["hidden"],
         ...    )
-        >>> spec_action = BoundedTensorSpec(-1, 1, torch.Size([8]))
         >>> module_action = NormalParamWrapper(torch.nn.Linear(4, 8))
         >>> module_action = TensorDictModule(module_action, in_keys=["hidden"], out_keys=["loc", "scale"])
         >>> td_module_action = ProbabilisticActor(
         ...    module=module_action,
-        ...    spec=spec_action,
         ...    in_keys=["loc", "scale"],
         ...    out_keys=["action"],
         ...    distribution_class=TanhNormal,
@@ -964,8 +976,17 @@ class ActorCriticOperator(ActorValueOperator):
 
     """
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(
+        self,
+        common_operator: TensorDictModule,
+        policy_operator: TensorDictModule,
+        value_operator: TensorDictModule,
+    ):
+        super().__init__(
+            common_operator,
+            policy_operator,
+            value_operator,
+        )
         if self[2].out_keys[0] == "state_value":
             raise RuntimeError(
                 "Value out_key is state_value, which may lead to errors in downstream usages"
@@ -998,17 +1019,18 @@ class ActorCriticWrapper(SafeSequential):
         :proportional:
         :textual:
 
-          +-----------+
-          |Observation|
-          +-----------+
-          |     |   |
-          v     |   v
-          actor |   critic
-          |     |   |
-          v     |   v
-        +------+|+-------+
-        |action||| value |
-        +------+|+-------+
+               +---------------+
+               |Observation (s)|
+               +---------------+
+                |     |   |
+                v     |   v
+                actor |   critic
+                |     |   |
+                v     |   v
+       +-------------+|+------------+
+       |Action (a(s))|||Value (V(s))|
+       +-------------+|+------------+
+
 
     To facilitate the workflow, this  class comes with a get_policy_operator() and get_value_operator() methods, which
     will both return a stand-alone TDModule with the dedicated functionality.
@@ -1021,7 +1043,6 @@ class ActorCriticWrapper(SafeSequential):
         >>> import torch
         >>> from tensordict import TensorDict
         >>> from tensordict.nn import TensorDictModule
-        >>> from torchrl.data import UnboundedContinuousTensorSpec, BoundedTensorSpec
         >>> from torchrl.modules import (
         ...      ActorCriticWrapper,
         ...      ProbabilisticActor,
@@ -1029,7 +1050,6 @@ class ActorCriticWrapper(SafeSequential):
         ...      TanhNormal,
         ...      ValueOperator,
         ...  )
-        >>> action_spec = BoundedTensorSpec(-1, 1, torch.Size([8]))
         >>> action_module = TensorDictModule(
         ...        NormalParamWrapper(torch.nn.Linear(4, 8)),
         ...        in_keys=["observation"],
@@ -1037,7 +1057,6 @@ class ActorCriticWrapper(SafeSequential):
         ...    )
         >>> td_module_action = ProbabilisticActor(
         ...    module=action_module,
-        ...    spec=action_spec,
         ...    in_keys=["loc", "scale"],
         ...    distribution_class=TanhNormal,
         ...    return_log_prob=True,
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py
index e584b894ed7..70957785fa7 100644
--- a/torchrl/objectives/dqn.py
+++ b/torchrl/objectives/dqn.py
@@ -189,10 +189,12 @@ class DistributionalDQNLoss(LossModule):
         value_network (DistributionalQValueActor or nn.Module): the distributional Q
             value operator.
         gamma (scalar): a discount factor for return computation.
+
             .. note::
               Unlike :class:`DQNLoss`, this class does not currently support
               custom value functions. The next value estimation is always
               bootstrapped.
+
         delay_value (bool): whether to duplicate the value network into a new
             target value network to create double DQN
     """
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
index 8177c2f393c..521777fec60 100644
--- a/torchrl/objectives/sac.py
+++ b/torchrl/objectives/sac.py
@@ -49,9 +49,11 @@ class SACLoss(LossModule):
             This module typically outputs a ``"state_action_value"`` entry.
         value_network (TensorDictModule, optional): V(s) parametric model.
             This module typically outputs a ``"state_value"`` entry.
+
             .. note::
               If not provided, the second version of SAC is assumed, where
               only the Q-Value network is needed.
+
         num_qvalue_nets (integer, optional): number of Q-Value networks used.
             Defaults to ``2``.
         priority_key (str, optional): tensordict key where to write the
diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
index 14799118990..e6e42fef55f 100644
--- a/torchrl/objectives/value/advantages.py
+++ b/torchrl/objectives/value/advantages.py
@@ -132,10 +132,12 @@ class TD0Estimator(ValueEstimatorBase):
             before the TD is computed.
         differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+
             .. note::
               The proper way to make the function call non-differentiable is to
               decorate it in a `torch.no_grad()` context manager/decorator or
               pass detached parameters for functional modules.
+
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -319,10 +321,12 @@ class TD1Estimator(ValueEstimatorBase):
             before the TD is computed.
         differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+
             .. note::
               The proper way to make the function call non-differentiable is to
               decorate it in a `torch.no_grad()` context manager/decorator or
               pass detached parameters for functional modules.
+
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
@@ -506,10 +510,12 @@ class TDLambdaEstimator(ValueEstimatorBase):
             before the TD is computed.
         differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+
             .. note::
               The proper way to make the function call non-differentiable is to
               decorate it in a `torch.no_grad()` context manager/decorator or
               pass detached parameters for functional modules.
+
         vectorized (bool, optional): whether to use the vectorized version of the
             lambda return. Default is `True`.
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
@@ -710,10 +716,12 @@ class GAE(ValueEstimatorBase):
             Default is ``False``.
         differentiable (bool, optional): if ``True``, gradients are propagated through
             the computation of the value function. Default is ``False``.
+
             .. note::
               The proper way to make the function call non-differentiable is to
               decorate it in a `torch.no_grad()` context manager/decorator or
               pass detached parameters for functional modules.
+
         advantage_key (str or tuple of str, optional): the key of the advantage entry.
             Defaults to "advantage".
         value_target_key (str or tuple of str, optional): the key of the advantage entry.
diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 070679acd52..4a04acd4c98 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -438,7 +438,6 @@ def train(self):
 
         for batch in self.collector:
             batch = self._process_batch_hook(batch)
-            self._pre_steps_log_hook(batch)
             current_frames = (
                 batch.get(("collector", "mask"), torch.tensor(batch.numel()))
                 .sum()
@@ -446,6 +445,7 @@ def train(self):
                 * self.frame_skip
             )
             self.collected_frames += current_frames
+            self._pre_steps_log_hook(batch)
 
             if self.collected_frames > self.collector.init_random_frames:
                 self.optim_steps(batch)
@@ -506,7 +506,7 @@ def _log(self, log_pbar=False, **kwargs) -> None:
         collected_frames = self.collected_frames
         for key, item in kwargs.items():
             self._log_dict[key].append(item)
-
+            print(f"collected_frames {collected_frames}, self._last_log.get({key}, 0) {self._last_log.get(key, 0)}, self._log_interval {self._log_interval}")
             if (collected_frames - self._last_log.get(key, 0)) > self._log_interval:
                 self._last_log[key] = collected_frames
                 _log = True
@@ -514,6 +514,7 @@ def _log(self, log_pbar=False, **kwargs) -> None:
                 _log = False
             method = LOGGER_METHODS.get(key, "log_scalar")
             if _log and self.logger is not None:
+                print("logging!", key, self.logger.experiment.log_dir)
                 getattr(self.logger, method)(key, item, step=collected_frames)
             if method == "log_scalar" and self.progress_bar and log_pbar:
                 if isinstance(item, torch.Tensor):
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 47268647e71..3d0eef5adc9 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -1,710 +1,711 @@
-# -*- coding: utf-8 -*-
-"""
-TorchRL trainer: A DQN example
-==============================
-**Author**: `Vincent Moens <https://github.com/vmoens>`_
-
-"""
-
-##############################################################################
-# TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
-# your training loop. The trainer executes a nested loop where the outer loop
-# is the data collection and the inner loop consumes this data or some data
-# retrieved from the replay buffer to train the model.
-# At various points in this training loop, hooks can be attached and executed at
-# given intervals.
-#
-# In this tutorial, we will be using the trainer class to train a DQN algorithm
-# to solve the CartPole task from scratch.
-#
-# Main takeaways:
-#
-# - Building a trainer with its essential components: data collector, loss
-#   module, replay buffer and optimizer.
-# - Adding hooks to a trainer, such as loggers, target network updaters and such.
-#
-# The trainer is fully customisable and offers a large set of functionalities.
-# The tutorial is organised around its construction.
-# We will be detailing how to build each of the components of the library first,
-# and then put the pieces together using the :class:`torchrl.trainers.Trainer`
-# class.
-#
-# Along the road, we will also focus on some other aspects of the library:
-#
-# - how to build an environment in TorchRL, including transforms (e.g. data
-#   normalization, frame concatenation, resizing and turning to grayscale)
-#   and parallel execution. Unlike what we did in the
-#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
-#   will normalize the pixels and not the state vector.
-# - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
-#   that estimates the action values and picks up the action with the highest
-#   estimated return;
-# - how to collect data from your environment efficiently and store them
-#   in a replay buffer;
-# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
-# - and finally how to evaluate your model.
-#
-# **Prerequisites**: We encourage you to get familiar with torchrl through the
-# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
-#
-# DQN
-# ---
-#
-# DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
-# the founding work in deep reinforcement learning.
-#
-# On a high level, the algorithm is quite simple: Q-learning consists in
-# learning a table of state-action values in such a way that, when
-# encountering any particular state, we know which action to pick just by
-# searching for the one with the highest value. This simple setting
-# requires the actions and states to be
-# discrete, otherwise a lookup table cannot be built.
-#
-# DQN uses a neural network that encodes a map from the state-action space to
-# a value (scalar) space, which amortizes the cost of storing and exploring all
-# the possible state-action combinations: if a state has not been seen in the
-# past, we can still pass it in conjunction with the various actions available
-# through our neural network and get an interpolated value for each of the
-# actions available.
-#
-# We will solve the classic control problem of the cart pole. From the
-# Gymnasium doc from where this environment is retrieved:
-#
-# | A pole is attached by an un-actuated joint to a cart, which moves along a
-# | frictionless track. The pendulum is placed upright on the cart and the goal
-# | is to balance the pole by applying forces in the left and right direction
-# | on the cart.
-#
-# .. figure:: /_static/img/cartpole_demo.gif
-#    :alt: Cart Pole
-#
-# We do not aim at giving a SOTA implementation of the algorithm, but rather
-# to provide a high-level illustration of TorchRL features in the context
-# of this algorithm.
-
-# sphinx_gallery_start_ignore
-import tempfile
-import warnings
-
-warnings.filterwarnings("ignore")
-# sphinx_gallery_end_ignore
-
-import os
-import uuid
-
-import torch
-from torch import nn
-from torchrl.collectors import MultiaSyncDataCollector
-from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
-from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-from torchrl.envs.libs.gym import GymEnv
-from torchrl.envs.transforms import (
-    CatFrames,
-    Compose,
-    GrayScale,
-    ObservationNorm,
-    Resize,
-    ToTensorImage,
-    TransformedEnv,
-)
-from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
-
-from torchrl.objectives import DQNLoss, SoftUpdate
-from torchrl.record.loggers.csv import CSVLogger
-from torchrl.trainers import (
-    LogReward,
-    Recorder,
-    ReplayBufferTrainer,
-    Trainer,
-    UpdateWeights,
-)
-
-
-def is_notebook() -> bool:
-    try:
-        shell = get_ipython().__class__.__name__
-        if shell == "ZMQInteractiveShell":
-            return True  # Jupyter notebook or qtconsole
-        elif shell == "TerminalInteractiveShell":
-            return False  # Terminal running IPython
+if __name__ == "__main__":
+    # -*- coding: utf-8 -*-
+    """
+    TorchRL trainer: A DQN example
+    ==============================
+    **Author**: `Vincent Moens <https://github.com/vmoens>`_
+    
+    """
+
+    ##############################################################################
+    # TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
+    # your training loop. The trainer executes a nested loop where the outer loop
+    # is the data collection and the inner loop consumes this data or some data
+    # retrieved from the replay buffer to train the model.
+    # At various points in this training loop, hooks can be attached and executed at
+    # given intervals.
+    #
+    # In this tutorial, we will be using the trainer class to train a DQN algorithm
+    # to solve the CartPole task from scratch.
+    #
+    # Main takeaways:
+    #
+    # - Building a trainer with its essential components: data collector, loss
+    #   module, replay buffer and optimizer.
+    # - Adding hooks to a trainer, such as loggers, target network updaters and such.
+    #
+    # The trainer is fully customisable and offers a large set of functionalities.
+    # The tutorial is organised around its construction.
+    # We will be detailing how to build each of the components of the library first,
+    # and then put the pieces together using the :class:`torchrl.trainers.Trainer`
+    # class.
+    #
+    # Along the road, we will also focus on some other aspects of the library:
+    #
+    # - how to build an environment in TorchRL, including transforms (e.g. data
+    #   normalization, frame concatenation, resizing and turning to grayscale)
+    #   and parallel execution. Unlike what we did in the
+    #   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+    #   will normalize the pixels and not the state vector.
+    # - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
+    #   that estimates the action values and picks up the action with the highest
+    #   estimated return;
+    # - how to collect data from your environment efficiently and store them
+    #   in a replay buffer;
+    # - how to use multi-step, a simple preprocessing step for off-policy algorithms;
+    # - and finally how to evaluate your model.
+    #
+    # **Prerequisites**: We encourage you to get familiar with torchrl through the
+    # `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+    #
+    # DQN
+    # ---
+    #
+    # DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
+    # the founding work in deep reinforcement learning.
+    #
+    # On a high level, the algorithm is quite simple: Q-learning consists in
+    # learning a table of state-action values in such a way that, when
+    # encountering any particular state, we know which action to pick just by
+    # searching for the one with the highest value. This simple setting
+    # requires the actions and states to be
+    # discrete, otherwise a lookup table cannot be built.
+    #
+    # DQN uses a neural network that encodes a map from the state-action space to
+    # a value (scalar) space, which amortizes the cost of storing and exploring all
+    # the possible state-action combinations: if a state has not been seen in the
+    # past, we can still pass it in conjunction with the various actions available
+    # through our neural network and get an interpolated value for each of the
+    # actions available.
+    #
+    # We will solve the classic control problem of the cart pole. From the
+    # Gymnasium doc from where this environment is retrieved:
+    #
+    # | A pole is attached by an un-actuated joint to a cart, which moves along a
+    # | frictionless track. The pendulum is placed upright on the cart and the goal
+    # | is to balance the pole by applying forces in the left and right direction
+    # | on the cart.
+    #
+    # .. figure:: /_static/img/cartpole_demo.gif
+    #    :alt: Cart Pole
+    #
+    # We do not aim at giving a SOTA implementation of the algorithm, but rather
+    # to provide a high-level illustration of TorchRL features in the context
+    # of this algorithm.
+
+    # sphinx_gallery_start_ignore
+    import tempfile
+    import warnings
+
+    warnings.filterwarnings("ignore")
+    # sphinx_gallery_end_ignore
+
+    import os
+    import uuid
+
+    import torch
+    from torch import nn
+    from torchrl.collectors import MultiaSyncDataCollector
+    from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
+    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+    from torchrl.envs.libs.gym import GymEnv
+    from torchrl.envs.transforms import (
+        CatFrames,
+        Compose,
+        GrayScale,
+        ObservationNorm,
+        Resize,
+        ToTensorImage,
+        TransformedEnv,
+    )
+    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+
+    from torchrl.objectives import DQNLoss, SoftUpdate
+    from torchrl.record.loggers.csv import CSVLogger
+    from torchrl.trainers import (
+        LogReward,
+        Recorder,
+        ReplayBufferTrainer,
+        Trainer,
+        UpdateWeights,
+    )
+
+
+    def is_notebook() -> bool:
+        try:
+            shell = get_ipython().__class__.__name__
+            if shell == "ZMQInteractiveShell":
+                return True  # Jupyter notebook or qtconsole
+            elif shell == "TerminalInteractiveShell":
+                return False  # Terminal running IPython
+            else:
+                return False  # Other type (?)
+        except NameError:
+            return False  # Probably standard Python interpreter
+
+
+    ###############################################################################
+    # Let's get started with the various pieces we need for our algorithm:
+    #
+    # - An environment;
+    # - A policy (and related modules that we group under the "model" umbrella);
+    # - A data collector, which makes the policy play in the environment and
+    #   delivers training data;
+    # - A replay buffer to store the training data;
+    # - A loss module, which computes the objective function to train our policy
+    #   to maximise the return;
+    # - An optimizer, which performs parameter updates based on our loss.
+    #
+    # Additional modules include a logger, a recorder (executes the policy in
+    # "eval" mode) and a target network updater. With all these components into
+    # place, it is easy to see how one could misplace or misuse one component in
+    # the training script. The trainer is there to orchestrate everything for you!
+    #
+    # Building the environment
+    # ------------------------
+    #
+    # First let's write a helper function that will output an environment. As usual,
+    # the "raw" environment may be too simple to be used in practice and we'll need
+    # some data transformation to expose its output to the policy.
+    #
+    # We will be using five transforms:
+    #
+    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
+    # - :class:`torchrl.envs.transforms.ToTensorImage` will convert a ``[W, H, C]`` uint8
+    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+    #   ``[C, W, H]``;
+    # - :class:`torchrl.envs.transforms.RewardScaling` to reduce the scale of the return;
+    # - :class:`torchrl.envs.transforms.GrayScale` will turn our image into grayscale;
+    # - :class:`torchrl.envs.transforms.Resize` will resize the image in a 64x64 format;
+    # - :class:`torchrl.envs.transforms.CatFrames` will concatenate an arbitrary number of
+    #   successive frames (``N=4``) in a single tensor along the channel dimension.
+    #   This is useful as a single image does not carry information about the
+    #   motion of the cartpole. Some memory about past observations and actions
+    #   is needed, either via a recurrent neural network or using a stack of
+    #   frames.
+    # - :class:`torchrl.envs.transforms.ObservationNorm` which will normalize our observations
+    #   given some custom summary statistics.
+    #
+    # In practice, our environment builder has two arguments:
+    #
+    # - ``parallel``: determines whether multiple environments have to be run in
+    #   parallel. We stack the transforms after the
+    #   :class:`torchrl.envs.ParallelEnv` to take advantage
+    #   of vectorization of the operations on device, although this would
+    #   technically work with every single environment attached to its own set of
+    #   transforms.
+    # - ``obs_norm_sd`` will contain the normalizing constants for
+    #   the :class:`torchrl.envs.ObservationNorm` transform.
+    #
+
+
+    def make_env(
+        parallel=False,
+        obs_norm_sd=None,
+    ):
+        if obs_norm_sd is None:
+            obs_norm_sd = {"standard_normal": True}
+        if parallel:
+            base_env = ParallelEnv(
+                num_workers,
+                EnvCreator(
+                    lambda: GymEnv(
+                        "CartPole-v1",
+                        from_pixels=True,
+                        pixels_only=True,
+                        device=device,
+                    )
+                ),
+            )
         else:
-            return False  # Other type (?)
-    except NameError:
-        return False  # Probably standard Python interpreter
-
-
-###############################################################################
-# Let's get started with the various pieces we need for our algorithm:
-#
-# - An environment;
-# - A policy (and related modules that we group under the "model" umbrella);
-# - A data collector, which makes the policy play in the environment and
-#   delivers training data;
-# - A replay buffer to store the training data;
-# - A loss module, which computes the objective function to train our policy
-#   to maximise the return;
-# - An optimizer, which performs parameter updates based on our loss.
-#
-# Additional modules include a logger, a recorder (executes the policy in
-# "eval" mode) and a target network updater. With all these components into
-# place, it is easy to see how one could misplace or misuse one component in
-# the training script. The trainer is there to orchestrate everything for you!
-#
-# Building the environment
-# ------------------------
-#
-# First let's write a helper function that will output an environment. As usual,
-# the "raw" environment may be too simple to be used in practice and we'll need
-# some data transformation to expose its output to the policy.
-#
-# We will be using five transforms:
-#
-# - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-# - :class:`torchrl.envs.transforms.ToTensorImage` will convert a ``[W, H, C]`` uint8
-#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-#   ``[C, W, H]``;
-# - :class:`torchrl.envs.transforms.RewardScaling` to reduce the scale of the return;
-# - :class:`torchrl.envs.transforms.GrayScale` will turn our image into grayscale;
-# - :class:`torchrl.envs.transforms.Resize` will resize the image in a 64x64 format;
-# - :class:`torchrl.envs.transforms.CatFrames` will concatenate an arbitrary number of
-#   successive frames (``N=4``) in a single tensor along the channel dimension.
-#   This is useful as a single image does not carry information about the
-#   motion of the cartpole. Some memory about past observations and actions
-#   is needed, either via a recurrent neural network or using a stack of
-#   frames.
-# - :class:`torchrl.envs.transforms.ObservationNorm` which will normalize our observations
-#   given some custom summary statistics.
-#
-# In practice, our environment builder has two arguments:
-#
-# - ``parallel``: determines whether multiple environments have to be run in
-#   parallel. We stack the transforms after the
-#   :class:`torchrl.envs.ParallelEnv` to take advantage
-#   of vectorization of the operations on device, although this would
-#   technically work with every single environment attached to its own set of
-#   transforms.
-# - ``obs_norm_sd`` will contain the normalizing constants for
-#   the :class:`torchrl.envs.ObservationNorm` transform.
-#
-
-
-def make_env(
-    parallel=False,
-    obs_norm_sd=None,
-):
-    if obs_norm_sd is None:
-        obs_norm_sd = {"standard_normal": True}
-    if parallel:
-        base_env = ParallelEnv(
-            num_workers,
-            EnvCreator(
-                lambda: GymEnv(
-                    "CartPole-v1",
-                    from_pixels=True,
-                    pixels_only=True,
-                    device=device,
-                )
+            base_env = GymEnv(
+                "CartPole-v1",
+                from_pixels=True,
+                pixels_only=True,
+                device=device,
+            )
+
+        env = TransformedEnv(
+            base_env,
+            Compose(
+                StepCounter(),  # to count the steps of each trajectory
+                ToTensorImage(),
+                RewardScaling(loc=0.0, scale=0.1),
+                GrayScale(),
+                Resize(64, 64),
+                CatFrames(4, in_keys=["pixels"], dim=-3),
+                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
             ),
         )
-    else:
-        base_env = GymEnv(
-            "CartPole-v1",
-            from_pixels=True,
-            pixels_only=True,
-            device=device,
+        return env
+
+
+    ###############################################################################
+    # Compute normalizing constants
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # To normalize images, we don't want to normalize each pixel independently
+    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+    # shaped set of normalizing constants (loc and scale parameters).
+    # We will be using the ``reduce_dim`` argument
+    # of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+    # not all dimensions disappear in the process:
+    #
+
+
+    def get_norm_stats():
+        test_env = make_env()
+        test_env.transform[-1].init_stats(
+            num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+        )
+        obs_norm_sd = test_env.transform[-1].state_dict()
+        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+        print("state dict of the observation norm:", obs_norm_sd)
+        return obs_norm_sd
+
+
+    ###############################################################################
+    # Building the model (Deep Q-network)
+    # -----------------------------------
+    #
+    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+    # object which is a simple CNN followed by a two-layer MLP. The only trick used
+    # here is that the action values (i.e. left and right action value) are
+    # computed using
+    #
+    # .. math::
+    #
+    #    \mathbb{v} = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+    #
+    # where :math:`\mathbb{v}` is our vector of action values,
+    # :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
+    # :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
+    # :math:`n = \# obs` and :math:`m = \# actions`.
+    #
+    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+    # which will read the state-action
+    # values, pick up the one with the maximum value and write all those results
+    # in the input :class:`tensordict.TensorDict`.
+    #
+
+
+    def make_model(dummy_env):
+        cnn_kwargs = {
+            "num_cells": [32, 64, 64],
+            "kernel_sizes": [6, 4, 3],
+            "strides": [2, 2, 1],
+            "activation_class": nn.ELU,
+            # This can be used to reduce the size of the last layer of the CNN
+            # "squeeze_output": True,
+            # "aggregator_class": nn.AdaptiveAvgPool2d,
+            # "aggregator_kwargs": {"output_size": (1, 1)},
+        }
+        mlp_kwargs = {
+            "depth": 2,
+            "num_cells": [
+                64,
+                64,
+            ],
+            "activation_class": nn.ELU,
+        }
+        net = DuelingCnnDQNet(
+            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+        ).to(device)
+        net.value[-1].bias.data.fill_(init_bias)
+
+        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
+        # init actor: because the model is composed of lazy conv/linear layers,
+        # we must pass a fake batch of data through it to instantiate them.
+        tensordict = dummy_env.fake_tensordict()
+        actor(tensordict)
+
+        # we wrap our actor in an EGreedyWrapper for data collection
+        actor_explore = EGreedyWrapper(
+            actor,
+            annealing_num_steps=total_frames,
+            eps_init=eps_greedy_val,
+            eps_end=eps_greedy_val_env,
         )
 
-    env = TransformedEnv(
-        base_env,
-        Compose(
-            StepCounter(),  # to count the steps of each trajectory
-            ToTensorImage(),
-            RewardScaling(loc=0.0, scale=0.1),
-            GrayScale(),
-            Resize(64, 64),
-            CatFrames(4, in_keys=["pixels"], dim=-3),
-            ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
-        ),
-    )
-    return env
-
-
-###############################################################################
-# Compute normalizing constants
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# To normalize images, we don't want to normalize each pixel independently
-# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-# shaped set of normalizing constants (loc and scale parameters).
-# We will be using the ``reduce_dim`` argument
-# of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-# not all dimensions disappear in the process:
-#
-
-
-def get_norm_stats():
-    test_env = make_env()
-    test_env.transform[-1].init_stats(
-        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
+        return actor, actor_explore
+
+
+    ###############################################################################
+    # Collecting and storing data
+    # ---------------------------
+    #
+    # Replay buffers
+    # ~~~~~~~~~~~~~~
+    #
+    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
+    # They constitute the dataset we will be sampling from during training.
+    #
+    # Here, we will use a regular sampling strategy, although a prioritized RB
+    # could improve the performance significantly.
+    #
+    # We place the storage on disk using
+    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+    # storage is created in a lazy manner: it will only be instantiated once the
+    # first batch of data is passed to it.
+    #
+    # The only requirement of this storage is that the data passed to it at write
+    # time must always have the same shape.
+
+
+    def get_replay_buffer(buffer_size, n_optim, batch_size):
+        replay_buffer = TensorDictReplayBuffer(
+            batch_size=batch_size,
+            storage=LazyMemmapStorage(buffer_size),
+            prefetch=n_optim,
+        )
+        return replay_buffer
+
+
+    ###############################################################################
+    # Data collector
+    # ~~~~~~~~~~~~~~
+    #
+    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
+    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
+    # a data collector as a dataloader in the outer loop.
+    #
+    # We choose the following configuration: we will be running a series of
+    # parallel environments synchronously in parallel in different collectors,
+    # themselves running in parallel but asynchronously.
+    # The advantage of this configuration is that we can balance the amount of
+    # compute that is executed in batch with what we want to be executed
+    # asynchronously. We encourage the reader to experiment how the collection
+    # speed is impacted by modifying the number of collectors (ie the number of
+    # environment constructors passed to the collector) and the number of
+    # environment executed in parallel in each collector (controlled by the
+    # ``num_workers`` hyperparameter).
+    #
+    # When building the collector, we can choose on which device we want the
+    # environment and policy to execute the operations through the ``device``
+    # keyword argument. The ``storing_devices`` argument will modify the
+    # location of the data being collected: if the batches that we are gathering
+    # have a considerable size, we may want to store them on a different location
+    # than the device where the computation is happening. For asynchronous data
+    # collectors such as ours, different storing devices mean that the data that
+    # we collect won't sit on the same device each time, which is something that
+    # out training loop must account for. For simplicity, we set the devices to
+    # the same value for all sub-collectors.
+
+
+    def get_collector(
+        obs_norm_sd,
+        num_collectors,
+        actor_explore,
+        frames_per_batch,
+        total_frames,
+        device,
+    ):
+        data_collector = MultiaSyncDataCollector(
+            [
+                make_env(parallel=True, obs_norm_sd=obs_norm_sd),
+            ]
+            * num_collectors,
+            policy=actor_explore,
+            frames_per_batch=frames_per_batch,
+            total_frames=total_frames,
+            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+            exploration_mode="random",
+            # We set the all the devices to be identical. Below is an example of
+            # heterogeneous devices
+            device=device,
+            storing_device=device,
+            split_trajs=False,
+            postproc=MultiStep(gamma=gamma, n_steps=5),
+        )
+        return data_collector
+
+
+    ###############################################################################
+    # Loss function
+    # -------------
+    #
+    # Building our loss function is straightforward: we only need to provide
+    # the model and a bunch of hyperparameters to the DQNLoss class.
+    #
+    # Target parameters
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Many off-policy RL algorithms use the concept of "target parameters" when it
+    # comes to estimate the value of the next state or state-action pair.
+    # The target parameters are lagged copies of the model parameters. Because
+    # their predictions mismatch those of the current model configuration, they
+    # help learning by putting a pessimistic bound on the value being estimated.
+    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+    # in similar algorithms.
+    #
+
+
+    def get_loss_module(actor, gamma):
+        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+        target_updater = SoftUpdate(loss_module)
+        return loss_module, target_updater
+
+
+    ###############################################################################
+    # Hyperparameters
+    # ---------------
+    #
+    # Let's start with our hyperparameters. The following setting should work well
+    # in practice, and the performance of the algorithm should hopefully not be
+    # too sensitive to slight variations of these.
+
+    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+    ###############################################################################
+    # Optimizer
+    # ~~~~~~~~~
+
+    # the learning rate of the optimizer
+    lr = 2e-3
+    # weight decay
+    wd = 1e-5
+    # the beta parameters of Adam
+    betas = (0.9, 0.999)
+    # Optimization steps per batch collected (aka UPD or updates per data)
+    n_optim = 8
+
+    ###############################################################################
+    # DQN parameters
+    # ~~~~~~~~~~~~~~
+    # gamma decay factor
+    gamma = 0.99
+
+    ###############################################################################
+    # Smooth target network update decay parameter.
+    # This loosely corresponds to a 1/tau interval with hard target network
+    # update
+    tau = 0.02
+
+    ###############################################################################
+    # Data collection and replay buffer
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # .. note::
+    #   Values to be used for proper training have been commented.
+    #
+    # Total frames collected in the environment. In other implementations, the
+    # user defines a maximum number of episodes.
+    # This is harder to do with our data collectors since they return batches
+    # of N collected frames, where N is a constant.
+    # However, one can easily get the same restriction on number of episodes by
+    # breaking the training loop when a certain number
+    # episodes has been collected.
+    total_frames = 10_000  # 500000
+
+    ###############################################################################
+    # Random frames used to initialize the replay buffer.
+    init_random_frames = 100  # 1000
+
+    ###############################################################################
+    # Frames in each batch collected.
+    frames_per_batch = 32  # 128
+
+    ###############################################################################
+    # Frames sampled from the replay buffer at each optimization step
+    batch_size = 32  # 256
+
+    ###############################################################################
+    # Size of the replay buffer in terms of frames
+    buffer_size = min(total_frames, 100000)
+
+    ###############################################################################
+    # Number of environments run in parallel in each data collector
+    num_workers = 2  # 8
+    num_collectors = 2  # 4
+
+    ###############################################################################
+    # Environment and exploration
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    #
+    # We set the initial and final value of the epsilon factor in Epsilon-greedy
+    # exploration.
+    # Since our policy is deterministic, exploration is crucial: without it, the
+    # only source of randomness would be the environment reset.
+
+    eps_greedy_val = 0.1
+    eps_greedy_val_env = 0.005
+
+    ###############################################################################
+    # To speed up learning, we set the bias of the last layer of our value network
+    # to a predefined value (this is not mandatory)
+    init_bias = 2.0
+
+    ###############################################################################
+    # .. note::
+    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
+    #   was set to a very low number. To get a reasonable performance, use a greater
+    #   value e.g. 500000
+    #
+
+    ###############################################################################
+    # Building a Trainer
+    # ------------------
+    #
+    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+    # following keyword-only arguments:
+    #
+    # - ``collector``
+    # - ``loss_module``
+    # - ``optimizer``
+    # - ``logger``: A logger can be
+    # - ``total_frames``: this parameter defines the lifespan of the trainer.
+    # - ``frame_skip``: when a frame-skip is used, the collector must be made
+    #   aware of it in order to accurately count the number of frames
+    #   collected etc. Making the trainer aware of this parameter is not
+    #   mandatory but helps to have a fairer comparison between settings where
+    #   the total number of frames (budget) is fixed but the frame-skip is
+    #   variable.
+
+    stats = get_norm_stats()
+    test_env = make_env(parallel=False, obs_norm_sd=stats)
+    # Get model
+    actor, actor_explore = make_model(test_env)
+    loss_module, target_net_updater = get_loss_module(actor, gamma)
+    target_net_updater.init_()
+
+    collector = get_collector(
+        stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
     )
-    obs_norm_sd = test_env.transform[-1].state_dict()
-    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-    print("state dict of the observation norm:", obs_norm_sd)
-    return obs_norm_sd
-
-
-###############################################################################
-# Building the model (Deep Q-network)
-# -----------------------------------
-#
-# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-# object which is a simple CNN followed by a two-layer MLP. The only trick used
-# here is that the action values (i.e. left and right action value) are
-# computed using
-#
-# .. math::
-#
-#    \mathbb{v} = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-#
-# where :math:`\mathbb{v}` is our vector of action values,
-# :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
-# :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
-# :math:`n = \# obs` and :math:`m = \# actions`.
-#
-# Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
-# which will read the state-action
-# values, pick up the one with the maximum value and write all those results
-# in the input :class:`tensordict.TensorDict`.
-#
-
-
-def make_model(dummy_env):
-    cnn_kwargs = {
-        "num_cells": [32, 64, 64],
-        "kernel_sizes": [6, 4, 3],
-        "strides": [2, 2, 1],
-        "activation_class": nn.ELU,
-        # This can be used to reduce the size of the last layer of the CNN
-        # "squeeze_output": True,
-        # "aggregator_class": nn.AdaptiveAvgPool2d,
-        # "aggregator_kwargs": {"output_size": (1, 1)},
-    }
-    mlp_kwargs = {
-        "depth": 2,
-        "num_cells": [
-            64,
-            64,
-        ],
-        "activation_class": nn.ELU,
-    }
-    net = DuelingCnnDQNet(
-        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-    ).to(device)
-    net.value[-1].bias.data.fill_(init_bias)
-
-    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
-    # init actor: because the model is composed of lazy conv/linear layers,
-    # we must pass a fake batch of data through it to instantiate them.
-    tensordict = dummy_env.fake_tensordict()
-    actor(tensordict)
-
-    # we wrap our actor in an EGreedyWrapper for data collection
-    actor_explore = EGreedyWrapper(
-        actor,
-        annealing_num_steps=total_frames,
-        eps_init=eps_greedy_val,
-        eps_end=eps_greedy_val_env,
+    optimizer = torch.optim.Adam(
+        loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
     )
+    exp_name = f"dqn_exp_{uuid.uuid1()}"
+    tmpdir = tempfile.TemporaryDirectory()
+    logger = CSVLogger(exp_name=exp_name, log_dir=tmpdir.name)
+    warnings.warn(f"log dir: {logger.experiment.log_dir}")
 
-    return actor, actor_explore
-
-
-###############################################################################
-# Collecting and storing data
-# ---------------------------
-#
-# Replay buffers
-# ~~~~~~~~~~~~~~
-#
-# Replay buffers play a central role in off-policy RL algorithms such as DQN.
-# They constitute the dataset we will be sampling from during training.
-#
-# Here, we will use a regular sampling strategy, although a prioritized RB
-# could improve the performance significantly.
-#
-# We place the storage on disk using
-# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-# storage is created in a lazy manner: it will only be instantiated once the
-# first batch of data is passed to it.
-#
-# The only requirement of this storage is that the data passed to it at write
-# time must always have the same shape.
-
-
-def get_replay_buffer(buffer_size, n_optim, batch_size):
-    replay_buffer = TensorDictReplayBuffer(
-        batch_size=batch_size,
-        storage=LazyMemmapStorage(buffer_size),
-        prefetch=n_optim,
-    )
-    return replay_buffer
-
-
-###############################################################################
-# Data collector
-# ~~~~~~~~~~~~~~
-#
-# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
-# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
-# a data collector as a dataloader in the outer loop.
-#
-# We choose the following configuration: we will be running a series of
-# parallel environments synchronously in parallel in different collectors,
-# themselves running in parallel but asynchronously.
-# The advantage of this configuration is that we can balance the amount of
-# compute that is executed in batch with what we want to be executed
-# asynchronously. We encourage the reader to experiment how the collection
-# speed is impacted by modifying the number of collectors (ie the number of
-# environment constructors passed to the collector) and the number of
-# environment executed in parallel in each collector (controlled by the
-# ``num_workers`` hyperparameter).
-#
-# When building the collector, we can choose on which device we want the
-# environment and policy to execute the operations through the ``device``
-# keyword argument. The ``storing_devices`` argument will modify the
-# location of the data being collected: if the batches that we are gathering
-# have a considerable size, we may want to store them on a different location
-# than the device where the computation is happening. For asynchronous data
-# collectors such as ours, different storing devices mean that the data that
-# we collect won't sit on the same device each time, which is something that
-# out training loop must account for. For simplicity, we set the devices to
-# the same value for all sub-collectors.
-
-
-def get_collector(
-    obs_norm_sd,
-    num_collectors,
-    actor_explore,
-    frames_per_batch,
-    total_frames,
-    device,
-):
-    data_collector = MultiaSyncDataCollector(
-        [
-            make_env(parallel=True, obs_norm_sd=obs_norm_sd),
-        ]
-        * num_collectors,
-        policy=actor_explore,
-        frames_per_batch=frames_per_batch,
+    ###############################################################################
+    # We can control how often the scalars should be logged. Here we set this
+    # to a low value as our training loop is short:
+
+    log_interval = 500
+
+    trainer = Trainer(
+        collector=collector,
         total_frames=total_frames,
-        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-        exploration_mode="random",
-        # We set the all the devices to be identical. Below is an example of
-        # heterogeneous devices
-        device=device,
-        storing_device=device,
-        split_trajs=False,
-        postproc=MultiStep(gamma=gamma, n_steps=5),
+        frame_skip=1,
+        loss_module=loss_module,
+        optimizer=optimizer,
+        logger=logger,
+        optim_steps_per_batch=n_optim,
+        log_interval=log_interval,
     )
-    return data_collector
-
-
-###############################################################################
-# Loss function
-# -------------
-#
-# Building our loss function is straightforward: we only need to provide
-# the model and a bunch of hyperparameters to the DQNLoss class.
-#
-# Target parameters
-# ~~~~~~~~~~~~~~~~~
-#
-# Many off-policy RL algorithms use the concept of "target parameters" when it
-# comes to estimate the value of the next state or state-action pair.
-# The target parameters are lagged copies of the model parameters. Because
-# their predictions mismatch those of the current model configuration, they
-# help learning by putting a pessimistic bound on the value being estimated.
-# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-# in similar algorithms.
-#
-
-
-def get_loss_module(actor, gamma):
-    loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
-    target_updater = SoftUpdate(loss_module)
-    return loss_module, target_updater
-
-
-###############################################################################
-# Hyperparameters
-# ---------------
-#
-# Let's start with our hyperparameters. The following setting should work well
-# in practice, and the performance of the algorithm should hopefully not be
-# too sensitive to slight variations of these.
-
-device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-###############################################################################
-# Optimizer
-# ~~~~~~~~~
-
-# the learning rate of the optimizer
-lr = 2e-3
-# weight decay
-wd = 1e-5
-# the beta parameters of Adam
-betas = (0.9, 0.999)
-# Optimization steps per batch collected (aka UPD or updates per data)
-n_optim = 8
-
-###############################################################################
-# DQN parameters
-# ~~~~~~~~~~~~~~
-# gamma decay factor
-gamma = 0.99
-
-###############################################################################
-# Smooth target network update decay parameter.
-# This loosely corresponds to a 1/tau interval with hard target network
-# update
-tau = 0.02
-
-###############################################################################
-# Data collection and replay buffer
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# .. note::
-#   Values to be used for proper training have been commented.
-#
-# Total frames collected in the environment. In other implementations, the
-# user defines a maximum number of episodes.
-# This is harder to do with our data collectors since they return batches
-# of N collected frames, where N is a constant.
-# However, one can easily get the same restriction on number of episodes by
-# breaking the training loop when a certain number
-# episodes has been collected.
-total_frames = 10_000  # 500000
-
-###############################################################################
-# Random frames used to initialize the replay buffer.
-init_random_frames = 100  # 1000
-
-###############################################################################
-# Frames in each batch collected.
-frames_per_batch = 32  # 128
-
-###############################################################################
-# Frames sampled from the replay buffer at each optimization step
-batch_size = 32  # 256
-
-###############################################################################
-# Size of the replay buffer in terms of frames
-buffer_size = min(total_frames, 100000)
-
-###############################################################################
-# Number of environments run in parallel in each data collector
-num_workers = 2  # 8
-num_collectors = 2  # 4
-
-###############################################################################
-# Environment and exploration
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# We set the initial and final value of the epsilon factor in Epsilon-greedy
-# exploration.
-# Since our policy is deterministic, exploration is crucial: without it, the
-# only source of randomness would be the environment reset.
-
-eps_greedy_val = 0.1
-eps_greedy_val_env = 0.005
-
-###############################################################################
-# To speed up learning, we set the bias of the last layer of our value network
-# to a predefined value (this is not mandatory)
-init_bias = 2.0
-
-###############################################################################
-# .. note::
-#   For fast rendering of the tutorial ``total_frames`` hyperparameter
-#   was set to a very low number. To get a reasonable performance, use a greater
-#   value e.g. 500000
-#
-
-###############################################################################
-# Building a Trainer
-# ------------------
-#
-# TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
-# following keyword-only arguments:
-#
-# - ``collector``
-# - ``loss_module``
-# - ``optimizer``
-# - ``logger``: A logger can be
-# - ``total_frames``: this parameter defines the lifespan of the trainer.
-# - ``frame_skip``: when a frame-skip is used, the collector must be made
-#   aware of it in order to accurately count the number of frames
-#   collected etc. Making the trainer aware of this parameter is not
-#   mandatory but helps to have a fairer comparison between settings where
-#   the total number of frames (budget) is fixed but the frame-skip is
-#   variable.
-
-stats = get_norm_stats()
-test_env = make_env(parallel=False, obs_norm_sd=stats)
-# Get model
-actor, actor_explore = make_model(test_env)
-loss_module, target_net_updater = get_loss_module(actor, gamma)
-target_net_updater.init_()
-
-collector = get_collector(
-    stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
-)
-optimizer = torch.optim.Adam(
-    loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
-)
-exp_name = f"dqn_exp_{uuid.uuid1()}"
-tmpdir = tempfile.TemporaryDirectory()
-logger = CSVLogger(exp_name=exp_name, log_dir=tmpdir.name)
-warnings.warn(f"log dir: {logger.experiment.log_dir}")
-
-###############################################################################
-# We can control how often the scalars should be logged. Here we set this
-# to a low value as our training loop is short:
-
-log_interval = 500
-
-trainer = Trainer(
-    collector=collector,
-    total_frames=total_frames,
-    frame_skip=1,
-    loss_module=loss_module,
-    optimizer=optimizer,
-    logger=logger,
-    optim_steps_per_batch=n_optim,
-    log_interval=log_interval,
-)
-
-###############################################################################
-# Registering hooks
-# ~~~~~~~~~~~~~~~~~
-#
-# Registering hooks can be achieved in two separate ways:
-#
-# - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
-#   method is the first choice. One just needs to provide the trainer as input
-#   and the hook will be registered with a default name at a default location.
-#   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
-#   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
-#   can be cumbersome to implement.
-buffer_hook = ReplayBufferTrainer(
-    get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
-    flatten_tensordicts=True,
-)
-buffer_hook.register(trainer)
-weight_updater = UpdateWeights(collector, update_weights_interval=1)
-weight_updater.register(trainer)
-recorder = Recorder(
-    record_interval=100,  # log every 100 optimization steps
-    record_frames=1000,  # maximum number of frames in the record
-    frame_skip=1,
-    policy_exploration=actor_explore,
-    environment=test_env,
-    exploration_mode="mode",
-    log_keys=[("next", "reward")],
-    out_keys={("next", "reward"): "rewards"},
-    log_pbar=True,
-)
-recorder.register(trainer)
-
-###############################################################################
-# - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
-#   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
-#   In this case, a location must be explicitly passed (). This method gives
-#   more control over the location of the hook but it also requires more
-#   understanding of the Trainer mechanism.
-#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
-#   for a detailed description of the trainer hooks.
-#
-trainer.register_op("post_optim", target_net_updater.step)
-
-###############################################################################
-# We can log the training rewards too. Note that this is of limited interest
-# with CartPole, as rewards are always 1. The discounted sum of rewards is
-# maximised not by getting higher rewards but by keeping the cart-pole alive
-# for longer.
-# This will be reflected by the `total_rewards` value displayed in the
-# progress bar.
-#
-log_reward = LogReward(log_pbar=True)
-log_reward.register(trainer)
-
-###############################################################################
-# .. note::
-#   It is possible to link multiple optimizers to the trainer if needed.
-#   In this case, each optimizer will be tied to a field in the loss
-#   dictionary.
-#   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
-#
-# Here we are, ready to train our algorithm! A simple call to
-# ``trainer.train()`` and we'll be getting our results logged in.
-#
-trainer.train()
-
-###############################################################################
-# We can now quickly check the CSVs with the results.
-
-
-def print_csv_files_in_folder(folder_path):
-    """
-    Find all CSV files in a folder and return the first 10 lines of each file as a string.
-
-    Args:
-        folder_path (str): The relative path to the folder.
 
-    Returns:
-        str: A string containing the first 10 lines of each CSV file in the folder.
-    """
-    csv_files = []
-    output_str = ""
-    for file in os.listdir(folder_path):
-        if file.endswith(".csv"):
-            csv_files.append(os.path.join(folder_path, file))
-    for csv_file in csv_files:
-        output_str += f"File: {csv_file}\n"
-        with open(csv_file, "r") as f:
-            for i, line in enumerate(f):
-                if i == 10:
-                    break
-                output_str += line.strip() + "\n"
-        output_str += "\n"
-    return output_str
-
-
-print_csv_files_in_folder(logger.experiment.log_dir)
-
-###############################################################################
-# Conclusion and possible improvements
-# ------------------------------------
-#
-# In this tutorial we have learned:
-#
-# - How to write a Trainer, including building its components and registering
-#   them in the trainer;
-# - How to code a DQN algorithm, including how to create a policy that picks
-#   up the action with the highest value with
-#   :class:`torchrl.modules.QValueNetwork`;
-# - How to build a multiprocessed data collector;
-#
-# Possible improvements to this tutorial could include:
-#
-# - A prioritized replay buffer could also be used. This will give a
-#   higher priority to samples that have the worst value accuracy.
-#   Learn more on the
-#   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
-#   of the documentation.
-# - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
-#   for more information).
-# - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.
+    ###############################################################################
+    # Registering hooks
+    # ~~~~~~~~~~~~~~~~~
+    #
+    # Registering hooks can be achieved in two separate ways:
+    #
+    # - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
+    #   method is the first choice. One just needs to provide the trainer as input
+    #   and the hook will be registered with a default name at a default location.
+    #   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
+    #   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
+    #   can be cumbersome to implement.
+    buffer_hook = ReplayBufferTrainer(
+        get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
+        flatten_tensordicts=True,
+    )
+    buffer_hook.register(trainer)
+    weight_updater = UpdateWeights(collector, update_weights_interval=1)
+    weight_updater.register(trainer)
+    recorder = Recorder(
+        record_interval=100,  # log every 100 optimization steps
+        record_frames=1000,  # maximum number of frames in the record
+        frame_skip=1,
+        policy_exploration=actor_explore,
+        environment=test_env,
+        exploration_mode="mode",
+        log_keys=[("next", "reward")],
+        out_keys={("next", "reward"): "rewards"},
+        log_pbar=True,
+    )
+    recorder.register(trainer)
+
+    ###############################################################################
+    # - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
+    #   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
+    #   In this case, a location must be explicitly passed (). This method gives
+    #   more control over the location of the hook but it also requires more
+    #   understanding of the Trainer mechanism.
+    #   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+    #   for a detailed description of the trainer hooks.
+    #
+    trainer.register_op("post_optim", target_net_updater.step)
+
+    ###############################################################################
+    # We can log the training rewards too. Note that this is of limited interest
+    # with CartPole, as rewards are always 1. The discounted sum of rewards is
+    # maximised not by getting higher rewards but by keeping the cart-pole alive
+    # for longer.
+    # This will be reflected by the `total_rewards` value displayed in the
+    # progress bar.
+    #
+    log_reward = LogReward(log_pbar=True)
+    log_reward.register(trainer)
+
+    ###############################################################################
+    # .. note::
+    #   It is possible to link multiple optimizers to the trainer if needed.
+    #   In this case, each optimizer will be tied to a field in the loss
+    #   dictionary.
+    #   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
+    #
+    # Here we are, ready to train our algorithm! A simple call to
+    # ``trainer.train()`` and we'll be getting our results logged in.
+    #
+    trainer.train()
+
+    ###############################################################################
+    # We can now quickly check the CSVs with the results.
+
+
+    def print_csv_files_in_folder(folder_path):
+        """
+        Find all CSV files in a folder and return the first 10 lines of each file as a string.
+
+        Args:
+            folder_path (str): The relative path to the folder.
+
+        Returns:
+            str: A string containing the first 10 lines of each CSV file in the folder.
+        """
+        csv_files = []
+        output_str = ""
+        for file in os.listdir(folder_path):
+            if file.endswith(".csv"):
+                csv_files.append(os.path.join(folder_path, file))
+        for csv_file in csv_files:
+            output_str += f"File: {csv_file}\n"
+            with open(csv_file, "r") as f:
+                for i, line in enumerate(f):
+                    if i == 10:
+                        break
+                    output_str += line.strip() + "\n"
+            output_str += "\n"
+        return output_str
+
+
+    print_csv_files_in_folder(logger.experiment.log_dir)
+
+    ###############################################################################
+    # Conclusion and possible improvements
+    # ------------------------------------
+    #
+    # In this tutorial we have learned:
+    #
+    # - How to write a Trainer, including building its components and registering
+    #   them in the trainer;
+    # - How to code a DQN algorithm, including how to create a policy that picks
+    #   up the action with the highest value with
+    #   :class:`torchrl.modules.QValueNetwork`;
+    # - How to build a multiprocessed data collector;
+    #
+    # Possible improvements to this tutorial could include:
+    #
+    # - A prioritized replay buffer could also be used. This will give a
+    #   higher priority to samples that have the worst value accuracy.
+    #   Learn more on the
+    #   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+    #   of the documentation.
+    # - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
+    #   for more information).
+    # - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.

From 89e7b1b156180ba1598ee475904fb9df189b2b1c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 5 Apr 2023 16:44:58 +0100
Subject: [PATCH 85/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 1399 +++++++++++-----------
 1 file changed, 699 insertions(+), 700 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 3d0eef5adc9..47268647e71 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -1,711 +1,710 @@
-if __name__ == "__main__":
-    # -*- coding: utf-8 -*-
-    """
-    TorchRL trainer: A DQN example
-    ==============================
-    **Author**: `Vincent Moens <https://github.com/vmoens>`_
-    
-    """
-
-    ##############################################################################
-    # TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
-    # your training loop. The trainer executes a nested loop where the outer loop
-    # is the data collection and the inner loop consumes this data or some data
-    # retrieved from the replay buffer to train the model.
-    # At various points in this training loop, hooks can be attached and executed at
-    # given intervals.
-    #
-    # In this tutorial, we will be using the trainer class to train a DQN algorithm
-    # to solve the CartPole task from scratch.
-    #
-    # Main takeaways:
-    #
-    # - Building a trainer with its essential components: data collector, loss
-    #   module, replay buffer and optimizer.
-    # - Adding hooks to a trainer, such as loggers, target network updaters and such.
-    #
-    # The trainer is fully customisable and offers a large set of functionalities.
-    # The tutorial is organised around its construction.
-    # We will be detailing how to build each of the components of the library first,
-    # and then put the pieces together using the :class:`torchrl.trainers.Trainer`
-    # class.
-    #
-    # Along the road, we will also focus on some other aspects of the library:
-    #
-    # - how to build an environment in TorchRL, including transforms (e.g. data
-    #   normalization, frame concatenation, resizing and turning to grayscale)
-    #   and parallel execution. Unlike what we did in the
-    #   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
-    #   will normalize the pixels and not the state vector.
-    # - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
-    #   that estimates the action values and picks up the action with the highest
-    #   estimated return;
-    # - how to collect data from your environment efficiently and store them
-    #   in a replay buffer;
-    # - how to use multi-step, a simple preprocessing step for off-policy algorithms;
-    # - and finally how to evaluate your model.
-    #
-    # **Prerequisites**: We encourage you to get familiar with torchrl through the
-    # `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
-    #
-    # DQN
-    # ---
-    #
-    # DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
-    # the founding work in deep reinforcement learning.
-    #
-    # On a high level, the algorithm is quite simple: Q-learning consists in
-    # learning a table of state-action values in such a way that, when
-    # encountering any particular state, we know which action to pick just by
-    # searching for the one with the highest value. This simple setting
-    # requires the actions and states to be
-    # discrete, otherwise a lookup table cannot be built.
-    #
-    # DQN uses a neural network that encodes a map from the state-action space to
-    # a value (scalar) space, which amortizes the cost of storing and exploring all
-    # the possible state-action combinations: if a state has not been seen in the
-    # past, we can still pass it in conjunction with the various actions available
-    # through our neural network and get an interpolated value for each of the
-    # actions available.
-    #
-    # We will solve the classic control problem of the cart pole. From the
-    # Gymnasium doc from where this environment is retrieved:
-    #
-    # | A pole is attached by an un-actuated joint to a cart, which moves along a
-    # | frictionless track. The pendulum is placed upright on the cart and the goal
-    # | is to balance the pole by applying forces in the left and right direction
-    # | on the cart.
-    #
-    # .. figure:: /_static/img/cartpole_demo.gif
-    #    :alt: Cart Pole
-    #
-    # We do not aim at giving a SOTA implementation of the algorithm, but rather
-    # to provide a high-level illustration of TorchRL features in the context
-    # of this algorithm.
-
-    # sphinx_gallery_start_ignore
-    import tempfile
-    import warnings
-
-    warnings.filterwarnings("ignore")
-    # sphinx_gallery_end_ignore
-
-    import os
-    import uuid
-
-    import torch
-    from torch import nn
-    from torchrl.collectors import MultiaSyncDataCollector
-    from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
-    from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
-    from torchrl.envs.libs.gym import GymEnv
-    from torchrl.envs.transforms import (
-        CatFrames,
-        Compose,
-        GrayScale,
-        ObservationNorm,
-        Resize,
-        ToTensorImage,
-        TransformedEnv,
-    )
-    from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
-
-    from torchrl.objectives import DQNLoss, SoftUpdate
-    from torchrl.record.loggers.csv import CSVLogger
-    from torchrl.trainers import (
-        LogReward,
-        Recorder,
-        ReplayBufferTrainer,
-        Trainer,
-        UpdateWeights,
-    )
-
-
-    def is_notebook() -> bool:
-        try:
-            shell = get_ipython().__class__.__name__
-            if shell == "ZMQInteractiveShell":
-                return True  # Jupyter notebook or qtconsole
-            elif shell == "TerminalInteractiveShell":
-                return False  # Terminal running IPython
-            else:
-                return False  # Other type (?)
-        except NameError:
-            return False  # Probably standard Python interpreter
-
-
-    ###############################################################################
-    # Let's get started with the various pieces we need for our algorithm:
-    #
-    # - An environment;
-    # - A policy (and related modules that we group under the "model" umbrella);
-    # - A data collector, which makes the policy play in the environment and
-    #   delivers training data;
-    # - A replay buffer to store the training data;
-    # - A loss module, which computes the objective function to train our policy
-    #   to maximise the return;
-    # - An optimizer, which performs parameter updates based on our loss.
-    #
-    # Additional modules include a logger, a recorder (executes the policy in
-    # "eval" mode) and a target network updater. With all these components into
-    # place, it is easy to see how one could misplace or misuse one component in
-    # the training script. The trainer is there to orchestrate everything for you!
-    #
-    # Building the environment
-    # ------------------------
-    #
-    # First let's write a helper function that will output an environment. As usual,
-    # the "raw" environment may be too simple to be used in practice and we'll need
-    # some data transformation to expose its output to the policy.
-    #
-    # We will be using five transforms:
-    #
-    # - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
-    # - :class:`torchrl.envs.transforms.ToTensorImage` will convert a ``[W, H, C]`` uint8
-    #   tensor in a floating point tensor in the ``[0, 1]`` space with shape
-    #   ``[C, W, H]``;
-    # - :class:`torchrl.envs.transforms.RewardScaling` to reduce the scale of the return;
-    # - :class:`torchrl.envs.transforms.GrayScale` will turn our image into grayscale;
-    # - :class:`torchrl.envs.transforms.Resize` will resize the image in a 64x64 format;
-    # - :class:`torchrl.envs.transforms.CatFrames` will concatenate an arbitrary number of
-    #   successive frames (``N=4``) in a single tensor along the channel dimension.
-    #   This is useful as a single image does not carry information about the
-    #   motion of the cartpole. Some memory about past observations and actions
-    #   is needed, either via a recurrent neural network or using a stack of
-    #   frames.
-    # - :class:`torchrl.envs.transforms.ObservationNorm` which will normalize our observations
-    #   given some custom summary statistics.
-    #
-    # In practice, our environment builder has two arguments:
-    #
-    # - ``parallel``: determines whether multiple environments have to be run in
-    #   parallel. We stack the transforms after the
-    #   :class:`torchrl.envs.ParallelEnv` to take advantage
-    #   of vectorization of the operations on device, although this would
-    #   technically work with every single environment attached to its own set of
-    #   transforms.
-    # - ``obs_norm_sd`` will contain the normalizing constants for
-    #   the :class:`torchrl.envs.ObservationNorm` transform.
-    #
-
-
-    def make_env(
-        parallel=False,
-        obs_norm_sd=None,
-    ):
-        if obs_norm_sd is None:
-            obs_norm_sd = {"standard_normal": True}
-        if parallel:
-            base_env = ParallelEnv(
-                num_workers,
-                EnvCreator(
-                    lambda: GymEnv(
-                        "CartPole-v1",
-                        from_pixels=True,
-                        pixels_only=True,
-                        device=device,
-                    )
-                ),
-            )
+# -*- coding: utf-8 -*-
+"""
+TorchRL trainer: A DQN example
+==============================
+**Author**: `Vincent Moens <https://github.com/vmoens>`_
+
+"""
+
+##############################################################################
+# TorchRL provides a generic :class:`torchrl.trainers.Trainer` class to handle
+# your training loop. The trainer executes a nested loop where the outer loop
+# is the data collection and the inner loop consumes this data or some data
+# retrieved from the replay buffer to train the model.
+# At various points in this training loop, hooks can be attached and executed at
+# given intervals.
+#
+# In this tutorial, we will be using the trainer class to train a DQN algorithm
+# to solve the CartPole task from scratch.
+#
+# Main takeaways:
+#
+# - Building a trainer with its essential components: data collector, loss
+#   module, replay buffer and optimizer.
+# - Adding hooks to a trainer, such as loggers, target network updaters and such.
+#
+# The trainer is fully customisable and offers a large set of functionalities.
+# The tutorial is organised around its construction.
+# We will be detailing how to build each of the components of the library first,
+# and then put the pieces together using the :class:`torchrl.trainers.Trainer`
+# class.
+#
+# Along the road, we will also focus on some other aspects of the library:
+#
+# - how to build an environment in TorchRL, including transforms (e.g. data
+#   normalization, frame concatenation, resizing and turning to grayscale)
+#   and parallel execution. Unlike what we did in the
+#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+#   will normalize the pixels and not the state vector.
+# - how to design a :class:`torchrl.modules.QValueActor` object, i.e. an actor
+#   that estimates the action values and picks up the action with the highest
+#   estimated return;
+# - how to collect data from your environment efficiently and store them
+#   in a replay buffer;
+# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
+# - and finally how to evaluate your model.
+#
+# **Prerequisites**: We encourage you to get familiar with torchrl through the
+# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+#
+# DQN
+# ---
+#
+# DQN (`Deep Q-Learning <https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf>`_) was
+# the founding work in deep reinforcement learning.
+#
+# On a high level, the algorithm is quite simple: Q-learning consists in
+# learning a table of state-action values in such a way that, when
+# encountering any particular state, we know which action to pick just by
+# searching for the one with the highest value. This simple setting
+# requires the actions and states to be
+# discrete, otherwise a lookup table cannot be built.
+#
+# DQN uses a neural network that encodes a map from the state-action space to
+# a value (scalar) space, which amortizes the cost of storing and exploring all
+# the possible state-action combinations: if a state has not been seen in the
+# past, we can still pass it in conjunction with the various actions available
+# through our neural network and get an interpolated value for each of the
+# actions available.
+#
+# We will solve the classic control problem of the cart pole. From the
+# Gymnasium doc from where this environment is retrieved:
+#
+# | A pole is attached by an un-actuated joint to a cart, which moves along a
+# | frictionless track. The pendulum is placed upright on the cart and the goal
+# | is to balance the pole by applying forces in the left and right direction
+# | on the cart.
+#
+# .. figure:: /_static/img/cartpole_demo.gif
+#    :alt: Cart Pole
+#
+# We do not aim at giving a SOTA implementation of the algorithm, but rather
+# to provide a high-level illustration of TorchRL features in the context
+# of this algorithm.
+
+# sphinx_gallery_start_ignore
+import tempfile
+import warnings
+
+warnings.filterwarnings("ignore")
+# sphinx_gallery_end_ignore
+
+import os
+import uuid
+
+import torch
+from torch import nn
+from torchrl.collectors import MultiaSyncDataCollector
+from torchrl.data import LazyMemmapStorage, MultiStep, TensorDictReplayBuffer
+from torchrl.envs import EnvCreator, ParallelEnv, RewardScaling, StepCounter
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.envs.transforms import (
+    CatFrames,
+    Compose,
+    GrayScale,
+    ObservationNorm,
+    Resize,
+    ToTensorImage,
+    TransformedEnv,
+)
+from torchrl.modules import DuelingCnnDQNet, EGreedyWrapper, QValueActor
+
+from torchrl.objectives import DQNLoss, SoftUpdate
+from torchrl.record.loggers.csv import CSVLogger
+from torchrl.trainers import (
+    LogReward,
+    Recorder,
+    ReplayBufferTrainer,
+    Trainer,
+    UpdateWeights,
+)
+
+
+def is_notebook() -> bool:
+    try:
+        shell = get_ipython().__class__.__name__
+        if shell == "ZMQInteractiveShell":
+            return True  # Jupyter notebook or qtconsole
+        elif shell == "TerminalInteractiveShell":
+            return False  # Terminal running IPython
         else:
-            base_env = GymEnv(
-                "CartPole-v1",
-                from_pixels=True,
-                pixels_only=True,
-                device=device,
-            )
-
-        env = TransformedEnv(
-            base_env,
-            Compose(
-                StepCounter(),  # to count the steps of each trajectory
-                ToTensorImage(),
-                RewardScaling(loc=0.0, scale=0.1),
-                GrayScale(),
-                Resize(64, 64),
-                CatFrames(4, in_keys=["pixels"], dim=-3),
-                ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+            return False  # Other type (?)
+    except NameError:
+        return False  # Probably standard Python interpreter
+
+
+###############################################################################
+# Let's get started with the various pieces we need for our algorithm:
+#
+# - An environment;
+# - A policy (and related modules that we group under the "model" umbrella);
+# - A data collector, which makes the policy play in the environment and
+#   delivers training data;
+# - A replay buffer to store the training data;
+# - A loss module, which computes the objective function to train our policy
+#   to maximise the return;
+# - An optimizer, which performs parameter updates based on our loss.
+#
+# Additional modules include a logger, a recorder (executes the policy in
+# "eval" mode) and a target network updater. With all these components into
+# place, it is easy to see how one could misplace or misuse one component in
+# the training script. The trainer is there to orchestrate everything for you!
+#
+# Building the environment
+# ------------------------
+#
+# First let's write a helper function that will output an environment. As usual,
+# the "raw" environment may be too simple to be used in practice and we'll need
+# some data transformation to expose its output to the policy.
+#
+# We will be using five transforms:
+#
+# - :class:`torchrl.envs.StepCounter` to count the number of steps in each trajectory;
+# - :class:`torchrl.envs.transforms.ToTensorImage` will convert a ``[W, H, C]`` uint8
+#   tensor in a floating point tensor in the ``[0, 1]`` space with shape
+#   ``[C, W, H]``;
+# - :class:`torchrl.envs.transforms.RewardScaling` to reduce the scale of the return;
+# - :class:`torchrl.envs.transforms.GrayScale` will turn our image into grayscale;
+# - :class:`torchrl.envs.transforms.Resize` will resize the image in a 64x64 format;
+# - :class:`torchrl.envs.transforms.CatFrames` will concatenate an arbitrary number of
+#   successive frames (``N=4``) in a single tensor along the channel dimension.
+#   This is useful as a single image does not carry information about the
+#   motion of the cartpole. Some memory about past observations and actions
+#   is needed, either via a recurrent neural network or using a stack of
+#   frames.
+# - :class:`torchrl.envs.transforms.ObservationNorm` which will normalize our observations
+#   given some custom summary statistics.
+#
+# In practice, our environment builder has two arguments:
+#
+# - ``parallel``: determines whether multiple environments have to be run in
+#   parallel. We stack the transforms after the
+#   :class:`torchrl.envs.ParallelEnv` to take advantage
+#   of vectorization of the operations on device, although this would
+#   technically work with every single environment attached to its own set of
+#   transforms.
+# - ``obs_norm_sd`` will contain the normalizing constants for
+#   the :class:`torchrl.envs.ObservationNorm` transform.
+#
+
+
+def make_env(
+    parallel=False,
+    obs_norm_sd=None,
+):
+    if obs_norm_sd is None:
+        obs_norm_sd = {"standard_normal": True}
+    if parallel:
+        base_env = ParallelEnv(
+            num_workers,
+            EnvCreator(
+                lambda: GymEnv(
+                    "CartPole-v1",
+                    from_pixels=True,
+                    pixels_only=True,
+                    device=device,
+                )
             ),
         )
-        return env
-
-
-    ###############################################################################
-    # Compute normalizing constants
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # To normalize images, we don't want to normalize each pixel independently
-    # with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
-    # shaped set of normalizing constants (loc and scale parameters).
-    # We will be using the ``reduce_dim`` argument
-    # of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
-    # dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
-    # not all dimensions disappear in the process:
-    #
-
-
-    def get_norm_stats():
-        test_env = make_env()
-        test_env.transform[-1].init_stats(
-            num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
-        )
-        obs_norm_sd = test_env.transform[-1].state_dict()
-        # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
-        # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
-        print("state dict of the observation norm:", obs_norm_sd)
-        return obs_norm_sd
-
-
-    ###############################################################################
-    # Building the model (Deep Q-network)
-    # -----------------------------------
-    #
-    # The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
-    # object which is a simple CNN followed by a two-layer MLP. The only trick used
-    # here is that the action values (i.e. left and right action value) are
-    # computed using
-    #
-    # .. math::
-    #
-    #    \mathbb{v} = b(obs) + v(obs) - \mathbb{E}[v(obs)]
-    #
-    # where :math:`\mathbb{v}` is our vector of action values,
-    # :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
-    # :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
-    # :math:`n = \# obs` and :math:`m = \# actions`.
-    #
-    # Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
-    # which will read the state-action
-    # values, pick up the one with the maximum value and write all those results
-    # in the input :class:`tensordict.TensorDict`.
-    #
-
-
-    def make_model(dummy_env):
-        cnn_kwargs = {
-            "num_cells": [32, 64, 64],
-            "kernel_sizes": [6, 4, 3],
-            "strides": [2, 2, 1],
-            "activation_class": nn.ELU,
-            # This can be used to reduce the size of the last layer of the CNN
-            # "squeeze_output": True,
-            # "aggregator_class": nn.AdaptiveAvgPool2d,
-            # "aggregator_kwargs": {"output_size": (1, 1)},
-        }
-        mlp_kwargs = {
-            "depth": 2,
-            "num_cells": [
-                64,
-                64,
-            ],
-            "activation_class": nn.ELU,
-        }
-        net = DuelingCnnDQNet(
-            dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
-        ).to(device)
-        net.value[-1].bias.data.fill_(init_bias)
-
-        actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
-        # init actor: because the model is composed of lazy conv/linear layers,
-        # we must pass a fake batch of data through it to instantiate them.
-        tensordict = dummy_env.fake_tensordict()
-        actor(tensordict)
-
-        # we wrap our actor in an EGreedyWrapper for data collection
-        actor_explore = EGreedyWrapper(
-            actor,
-            annealing_num_steps=total_frames,
-            eps_init=eps_greedy_val,
-            eps_end=eps_greedy_val_env,
-        )
-
-        return actor, actor_explore
-
-
-    ###############################################################################
-    # Collecting and storing data
-    # ---------------------------
-    #
-    # Replay buffers
-    # ~~~~~~~~~~~~~~
-    #
-    # Replay buffers play a central role in off-policy RL algorithms such as DQN.
-    # They constitute the dataset we will be sampling from during training.
-    #
-    # Here, we will use a regular sampling strategy, although a prioritized RB
-    # could improve the performance significantly.
-    #
-    # We place the storage on disk using
-    # :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
-    # storage is created in a lazy manner: it will only be instantiated once the
-    # first batch of data is passed to it.
-    #
-    # The only requirement of this storage is that the data passed to it at write
-    # time must always have the same shape.
-
-
-    def get_replay_buffer(buffer_size, n_optim, batch_size):
-        replay_buffer = TensorDictReplayBuffer(
-            batch_size=batch_size,
-            storage=LazyMemmapStorage(buffer_size),
-            prefetch=n_optim,
-        )
-        return replay_buffer
-
-
-    ###############################################################################
-    # Data collector
-    # ~~~~~~~~~~~~~~
-    #
-    # As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
-    # `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
-    # a data collector as a dataloader in the outer loop.
-    #
-    # We choose the following configuration: we will be running a series of
-    # parallel environments synchronously in parallel in different collectors,
-    # themselves running in parallel but asynchronously.
-    # The advantage of this configuration is that we can balance the amount of
-    # compute that is executed in batch with what we want to be executed
-    # asynchronously. We encourage the reader to experiment how the collection
-    # speed is impacted by modifying the number of collectors (ie the number of
-    # environment constructors passed to the collector) and the number of
-    # environment executed in parallel in each collector (controlled by the
-    # ``num_workers`` hyperparameter).
-    #
-    # When building the collector, we can choose on which device we want the
-    # environment and policy to execute the operations through the ``device``
-    # keyword argument. The ``storing_devices`` argument will modify the
-    # location of the data being collected: if the batches that we are gathering
-    # have a considerable size, we may want to store them on a different location
-    # than the device where the computation is happening. For asynchronous data
-    # collectors such as ours, different storing devices mean that the data that
-    # we collect won't sit on the same device each time, which is something that
-    # out training loop must account for. For simplicity, we set the devices to
-    # the same value for all sub-collectors.
-
-
-    def get_collector(
-        obs_norm_sd,
-        num_collectors,
-        actor_explore,
-        frames_per_batch,
-        total_frames,
-        device,
-    ):
-        data_collector = MultiaSyncDataCollector(
-            [
-                make_env(parallel=True, obs_norm_sd=obs_norm_sd),
-            ]
-            * num_collectors,
-            policy=actor_explore,
-            frames_per_batch=frames_per_batch,
-            total_frames=total_frames,
-            # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
-            exploration_mode="random",
-            # We set the all the devices to be identical. Below is an example of
-            # heterogeneous devices
+    else:
+        base_env = GymEnv(
+            "CartPole-v1",
+            from_pixels=True,
+            pixels_only=True,
             device=device,
-            storing_device=device,
-            split_trajs=False,
-            postproc=MultiStep(gamma=gamma, n_steps=5),
         )
-        return data_collector
-
-
-    ###############################################################################
-    # Loss function
-    # -------------
-    #
-    # Building our loss function is straightforward: we only need to provide
-    # the model and a bunch of hyperparameters to the DQNLoss class.
-    #
-    # Target parameters
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Many off-policy RL algorithms use the concept of "target parameters" when it
-    # comes to estimate the value of the next state or state-action pair.
-    # The target parameters are lagged copies of the model parameters. Because
-    # their predictions mismatch those of the current model configuration, they
-    # help learning by putting a pessimistic bound on the value being estimated.
-    # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-    # in similar algorithms.
-    #
-
-
-    def get_loss_module(actor, gamma):
-        loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
-        target_updater = SoftUpdate(loss_module)
-        return loss_module, target_updater
-
-
-    ###############################################################################
-    # Hyperparameters
-    # ---------------
-    #
-    # Let's start with our hyperparameters. The following setting should work well
-    # in practice, and the performance of the algorithm should hopefully not be
-    # too sensitive to slight variations of these.
-
-    device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
-
-    ###############################################################################
-    # Optimizer
-    # ~~~~~~~~~
-
-    # the learning rate of the optimizer
-    lr = 2e-3
-    # weight decay
-    wd = 1e-5
-    # the beta parameters of Adam
-    betas = (0.9, 0.999)
-    # Optimization steps per batch collected (aka UPD or updates per data)
-    n_optim = 8
-
-    ###############################################################################
-    # DQN parameters
-    # ~~~~~~~~~~~~~~
-    # gamma decay factor
-    gamma = 0.99
-
-    ###############################################################################
-    # Smooth target network update decay parameter.
-    # This loosely corresponds to a 1/tau interval with hard target network
-    # update
-    tau = 0.02
-
-    ###############################################################################
-    # Data collection and replay buffer
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # .. note::
-    #   Values to be used for proper training have been commented.
-    #
-    # Total frames collected in the environment. In other implementations, the
-    # user defines a maximum number of episodes.
-    # This is harder to do with our data collectors since they return batches
-    # of N collected frames, where N is a constant.
-    # However, one can easily get the same restriction on number of episodes by
-    # breaking the training loop when a certain number
-    # episodes has been collected.
-    total_frames = 10_000  # 500000
-
-    ###############################################################################
-    # Random frames used to initialize the replay buffer.
-    init_random_frames = 100  # 1000
-
-    ###############################################################################
-    # Frames in each batch collected.
-    frames_per_batch = 32  # 128
-
-    ###############################################################################
-    # Frames sampled from the replay buffer at each optimization step
-    batch_size = 32  # 256
-
-    ###############################################################################
-    # Size of the replay buffer in terms of frames
-    buffer_size = min(total_frames, 100000)
-
-    ###############################################################################
-    # Number of environments run in parallel in each data collector
-    num_workers = 2  # 8
-    num_collectors = 2  # 4
-
-    ###############################################################################
-    # Environment and exploration
-    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    #
-    # We set the initial and final value of the epsilon factor in Epsilon-greedy
-    # exploration.
-    # Since our policy is deterministic, exploration is crucial: without it, the
-    # only source of randomness would be the environment reset.
-
-    eps_greedy_val = 0.1
-    eps_greedy_val_env = 0.005
-
-    ###############################################################################
-    # To speed up learning, we set the bias of the last layer of our value network
-    # to a predefined value (this is not mandatory)
-    init_bias = 2.0
-
-    ###############################################################################
-    # .. note::
-    #   For fast rendering of the tutorial ``total_frames`` hyperparameter
-    #   was set to a very low number. To get a reasonable performance, use a greater
-    #   value e.g. 500000
-    #
-
-    ###############################################################################
-    # Building a Trainer
-    # ------------------
-    #
-    # TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
-    # following keyword-only arguments:
-    #
-    # - ``collector``
-    # - ``loss_module``
-    # - ``optimizer``
-    # - ``logger``: A logger can be
-    # - ``total_frames``: this parameter defines the lifespan of the trainer.
-    # - ``frame_skip``: when a frame-skip is used, the collector must be made
-    #   aware of it in order to accurately count the number of frames
-    #   collected etc. Making the trainer aware of this parameter is not
-    #   mandatory but helps to have a fairer comparison between settings where
-    #   the total number of frames (budget) is fixed but the frame-skip is
-    #   variable.
-
-    stats = get_norm_stats()
-    test_env = make_env(parallel=False, obs_norm_sd=stats)
-    # Get model
-    actor, actor_explore = make_model(test_env)
-    loss_module, target_net_updater = get_loss_module(actor, gamma)
-    target_net_updater.init_()
-
-    collector = get_collector(
-        stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
+
+    env = TransformedEnv(
+        base_env,
+        Compose(
+            StepCounter(),  # to count the steps of each trajectory
+            ToTensorImage(),
+            RewardScaling(loc=0.0, scale=0.1),
+            GrayScale(),
+            Resize(64, 64),
+            CatFrames(4, in_keys=["pixels"], dim=-3),
+            ObservationNorm(in_keys=["pixels"], **obs_norm_sd),
+        ),
     )
-    optimizer = torch.optim.Adam(
-        loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
+    return env
+
+
+###############################################################################
+# Compute normalizing constants
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To normalize images, we don't want to normalize each pixel independently
+# with a full ``[C, W, H]`` normalizing mask, but with simpler ``[C, 1, 1]``
+# shaped set of normalizing constants (loc and scale parameters).
+# We will be using the ``reduce_dim`` argument
+# of :meth:`torchrl.envs.ObservationNorm.init_stats` to instruct which
+# dimensions must be reduced, and the ``keep_dims`` parameter to ensure that
+# not all dimensions disappear in the process:
+#
+
+
+def get_norm_stats():
+    test_env = make_env()
+    test_env.transform[-1].init_stats(
+        num_iter=1000, cat_dim=0, reduce_dim=[-1, -2, -4], keep_dims=(-1, -2)
     )
-    exp_name = f"dqn_exp_{uuid.uuid1()}"
-    tmpdir = tempfile.TemporaryDirectory()
-    logger = CSVLogger(exp_name=exp_name, log_dir=tmpdir.name)
-    warnings.warn(f"log dir: {logger.experiment.log_dir}")
-
-    ###############################################################################
-    # We can control how often the scalars should be logged. Here we set this
-    # to a low value as our training loop is short:
-
-    log_interval = 500
-
-    trainer = Trainer(
-        collector=collector,
-        total_frames=total_frames,
-        frame_skip=1,
-        loss_module=loss_module,
-        optimizer=optimizer,
-        logger=logger,
-        optim_steps_per_batch=n_optim,
-        log_interval=log_interval,
+    obs_norm_sd = test_env.transform[-1].state_dict()
+    # let's check that normalizing constants have a size of ``[C, 1, 1]`` where
+    # ``C=4`` (because of :class:`torchrl.envs.CatFrames`).
+    print("state dict of the observation norm:", obs_norm_sd)
+    return obs_norm_sd
+
+
+###############################################################################
+# Building the model (Deep Q-network)
+# -----------------------------------
+#
+# The following function builds a :class:`torchrl.modules.DuelingCnnDQNet`
+# object which is a simple CNN followed by a two-layer MLP. The only trick used
+# here is that the action values (i.e. left and right action value) are
+# computed using
+#
+# .. math::
+#
+#    \mathbb{v} = b(obs) + v(obs) - \mathbb{E}[v(obs)]
+#
+# where :math:`\mathbb{v}` is our vector of action values,
+# :math:`b` is a :math:`\mathbb{R}^n \rightarrow 1` function and :math:`v` is a
+# :math:`\mathbb{R}^n \rightarrow \mathbb{R}^m` function, for
+# :math:`n = \# obs` and :math:`m = \# actions`.
+#
+# Our network is wrapped in a :class:`torchrl.modules.QValueActor`,
+# which will read the state-action
+# values, pick up the one with the maximum value and write all those results
+# in the input :class:`tensordict.TensorDict`.
+#
+
+
+def make_model(dummy_env):
+    cnn_kwargs = {
+        "num_cells": [32, 64, 64],
+        "kernel_sizes": [6, 4, 3],
+        "strides": [2, 2, 1],
+        "activation_class": nn.ELU,
+        # This can be used to reduce the size of the last layer of the CNN
+        # "squeeze_output": True,
+        # "aggregator_class": nn.AdaptiveAvgPool2d,
+        # "aggregator_kwargs": {"output_size": (1, 1)},
+    }
+    mlp_kwargs = {
+        "depth": 2,
+        "num_cells": [
+            64,
+            64,
+        ],
+        "activation_class": nn.ELU,
+    }
+    net = DuelingCnnDQNet(
+        dummy_env.action_spec.shape[-1], 1, cnn_kwargs, mlp_kwargs
+    ).to(device)
+    net.value[-1].bias.data.fill_(init_bias)
+
+    actor = QValueActor(net, in_keys=["pixels"], spec=dummy_env.action_spec).to(device)
+    # init actor: because the model is composed of lazy conv/linear layers,
+    # we must pass a fake batch of data through it to instantiate them.
+    tensordict = dummy_env.fake_tensordict()
+    actor(tensordict)
+
+    # we wrap our actor in an EGreedyWrapper for data collection
+    actor_explore = EGreedyWrapper(
+        actor,
+        annealing_num_steps=total_frames,
+        eps_init=eps_greedy_val,
+        eps_end=eps_greedy_val_env,
     )
 
-    ###############################################################################
-    # Registering hooks
-    # ~~~~~~~~~~~~~~~~~
-    #
-    # Registering hooks can be achieved in two separate ways:
-    #
-    # - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
-    #   method is the first choice. One just needs to provide the trainer as input
-    #   and the hook will be registered with a default name at a default location.
-    #   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
-    #   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
-    #   can be cumbersome to implement.
-    buffer_hook = ReplayBufferTrainer(
-        get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
-        flatten_tensordicts=True,
+    return actor, actor_explore
+
+
+###############################################################################
+# Collecting and storing data
+# ---------------------------
+#
+# Replay buffers
+# ~~~~~~~~~~~~~~
+#
+# Replay buffers play a central role in off-policy RL algorithms such as DQN.
+# They constitute the dataset we will be sampling from during training.
+#
+# Here, we will use a regular sampling strategy, although a prioritized RB
+# could improve the performance significantly.
+#
+# We place the storage on disk using
+# :class:`torchrl.data.replay_buffers.storages.LazyMemmapStorage` class. This
+# storage is created in a lazy manner: it will only be instantiated once the
+# first batch of data is passed to it.
+#
+# The only requirement of this storage is that the data passed to it at write
+# time must always have the same shape.
+
+
+def get_replay_buffer(buffer_size, n_optim, batch_size):
+    replay_buffer = TensorDictReplayBuffer(
+        batch_size=batch_size,
+        storage=LazyMemmapStorage(buffer_size),
+        prefetch=n_optim,
     )
-    buffer_hook.register(trainer)
-    weight_updater = UpdateWeights(collector, update_weights_interval=1)
-    weight_updater.register(trainer)
-    recorder = Recorder(
-        record_interval=100,  # log every 100 optimization steps
-        record_frames=1000,  # maximum number of frames in the record
-        frame_skip=1,
-        policy_exploration=actor_explore,
-        environment=test_env,
-        exploration_mode="mode",
-        log_keys=[("next", "reward")],
-        out_keys={("next", "reward"): "rewards"},
-        log_pbar=True,
+    return replay_buffer
+
+
+###############################################################################
+# Data collector
+# ~~~~~~~~~~~~~~
+#
+# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
+# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
+# a data collector as a dataloader in the outer loop.
+#
+# We choose the following configuration: we will be running a series of
+# parallel environments synchronously in parallel in different collectors,
+# themselves running in parallel but asynchronously.
+# The advantage of this configuration is that we can balance the amount of
+# compute that is executed in batch with what we want to be executed
+# asynchronously. We encourage the reader to experiment how the collection
+# speed is impacted by modifying the number of collectors (ie the number of
+# environment constructors passed to the collector) and the number of
+# environment executed in parallel in each collector (controlled by the
+# ``num_workers`` hyperparameter).
+#
+# When building the collector, we can choose on which device we want the
+# environment and policy to execute the operations through the ``device``
+# keyword argument. The ``storing_devices`` argument will modify the
+# location of the data being collected: if the batches that we are gathering
+# have a considerable size, we may want to store them on a different location
+# than the device where the computation is happening. For asynchronous data
+# collectors such as ours, different storing devices mean that the data that
+# we collect won't sit on the same device each time, which is something that
+# out training loop must account for. For simplicity, we set the devices to
+# the same value for all sub-collectors.
+
+
+def get_collector(
+    obs_norm_sd,
+    num_collectors,
+    actor_explore,
+    frames_per_batch,
+    total_frames,
+    device,
+):
+    data_collector = MultiaSyncDataCollector(
+        [
+            make_env(parallel=True, obs_norm_sd=obs_norm_sd),
+        ]
+        * num_collectors,
+        policy=actor_explore,
+        frames_per_batch=frames_per_batch,
+        total_frames=total_frames,
+        # this is the default behaviour: the collector runs in ``"random"`` (or explorative) mode
+        exploration_mode="random",
+        # We set the all the devices to be identical. Below is an example of
+        # heterogeneous devices
+        device=device,
+        storing_device=device,
+        split_trajs=False,
+        postproc=MultiStep(gamma=gamma, n_steps=5),
     )
-    recorder.register(trainer)
-
-    ###############################################################################
-    # - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
-    #   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
-    #   In this case, a location must be explicitly passed (). This method gives
-    #   more control over the location of the hook but it also requires more
-    #   understanding of the Trainer mechanism.
-    #   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
-    #   for a detailed description of the trainer hooks.
-    #
-    trainer.register_op("post_optim", target_net_updater.step)
-
-    ###############################################################################
-    # We can log the training rewards too. Note that this is of limited interest
-    # with CartPole, as rewards are always 1. The discounted sum of rewards is
-    # maximised not by getting higher rewards but by keeping the cart-pole alive
-    # for longer.
-    # This will be reflected by the `total_rewards` value displayed in the
-    # progress bar.
-    #
-    log_reward = LogReward(log_pbar=True)
-    log_reward.register(trainer)
-
-    ###############################################################################
-    # .. note::
-    #   It is possible to link multiple optimizers to the trainer if needed.
-    #   In this case, each optimizer will be tied to a field in the loss
-    #   dictionary.
-    #   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
-    #
-    # Here we are, ready to train our algorithm! A simple call to
-    # ``trainer.train()`` and we'll be getting our results logged in.
-    #
-    trainer.train()
-
-    ###############################################################################
-    # We can now quickly check the CSVs with the results.
-
-
-    def print_csv_files_in_folder(folder_path):
-        """
-        Find all CSV files in a folder and return the first 10 lines of each file as a string.
-
-        Args:
-            folder_path (str): The relative path to the folder.
-
-        Returns:
-            str: A string containing the first 10 lines of each CSV file in the folder.
-        """
-        csv_files = []
-        output_str = ""
-        for file in os.listdir(folder_path):
-            if file.endswith(".csv"):
-                csv_files.append(os.path.join(folder_path, file))
-        for csv_file in csv_files:
-            output_str += f"File: {csv_file}\n"
-            with open(csv_file, "r") as f:
-                for i, line in enumerate(f):
-                    if i == 10:
-                        break
-                    output_str += line.strip() + "\n"
-            output_str += "\n"
-        return output_str
-
-
-    print_csv_files_in_folder(logger.experiment.log_dir)
-
-    ###############################################################################
-    # Conclusion and possible improvements
-    # ------------------------------------
-    #
-    # In this tutorial we have learned:
-    #
-    # - How to write a Trainer, including building its components and registering
-    #   them in the trainer;
-    # - How to code a DQN algorithm, including how to create a policy that picks
-    #   up the action with the highest value with
-    #   :class:`torchrl.modules.QValueNetwork`;
-    # - How to build a multiprocessed data collector;
-    #
-    # Possible improvements to this tutorial could include:
-    #
-    # - A prioritized replay buffer could also be used. This will give a
-    #   higher priority to samples that have the worst value accuracy.
-    #   Learn more on the
-    #   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
-    #   of the documentation.
-    # - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
-    #   for more information).
-    # - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.
+    return data_collector
+
+
+###############################################################################
+# Loss function
+# -------------
+#
+# Building our loss function is straightforward: we only need to provide
+# the model and a bunch of hyperparameters to the DQNLoss class.
+#
+# Target parameters
+# ~~~~~~~~~~~~~~~~~
+#
+# Many off-policy RL algorithms use the concept of "target parameters" when it
+# comes to estimate the value of the next state or state-action pair.
+# The target parameters are lagged copies of the model parameters. Because
+# their predictions mismatch those of the current model configuration, they
+# help learning by putting a pessimistic bound on the value being estimated.
+# This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
+# in similar algorithms.
+#
+
+
+def get_loss_module(actor, gamma):
+    loss_module = DQNLoss(actor, gamma=gamma, delay_value=True)
+    target_updater = SoftUpdate(loss_module)
+    return loss_module, target_updater
+
+
+###############################################################################
+# Hyperparameters
+# ---------------
+#
+# Let's start with our hyperparameters. The following setting should work well
+# in practice, and the performance of the algorithm should hopefully not be
+# too sensitive to slight variations of these.
+
+device = "cuda:0" if torch.cuda.device_count() > 0 else "cpu"
+
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
+
+# the learning rate of the optimizer
+lr = 2e-3
+# weight decay
+wd = 1e-5
+# the beta parameters of Adam
+betas = (0.9, 0.999)
+# Optimization steps per batch collected (aka UPD or updates per data)
+n_optim = 8
+
+###############################################################################
+# DQN parameters
+# ~~~~~~~~~~~~~~
+# gamma decay factor
+gamma = 0.99
+
+###############################################################################
+# Smooth target network update decay parameter.
+# This loosely corresponds to a 1/tau interval with hard target network
+# update
+tau = 0.02
+
+###############################################################################
+# Data collection and replay buffer
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# .. note::
+#   Values to be used for proper training have been commented.
+#
+# Total frames collected in the environment. In other implementations, the
+# user defines a maximum number of episodes.
+# This is harder to do with our data collectors since they return batches
+# of N collected frames, where N is a constant.
+# However, one can easily get the same restriction on number of episodes by
+# breaking the training loop when a certain number
+# episodes has been collected.
+total_frames = 10_000  # 500000
+
+###############################################################################
+# Random frames used to initialize the replay buffer.
+init_random_frames = 100  # 1000
+
+###############################################################################
+# Frames in each batch collected.
+frames_per_batch = 32  # 128
+
+###############################################################################
+# Frames sampled from the replay buffer at each optimization step
+batch_size = 32  # 256
+
+###############################################################################
+# Size of the replay buffer in terms of frames
+buffer_size = min(total_frames, 100000)
+
+###############################################################################
+# Number of environments run in parallel in each data collector
+num_workers = 2  # 8
+num_collectors = 2  # 4
+
+###############################################################################
+# Environment and exploration
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We set the initial and final value of the epsilon factor in Epsilon-greedy
+# exploration.
+# Since our policy is deterministic, exploration is crucial: without it, the
+# only source of randomness would be the environment reset.
+
+eps_greedy_val = 0.1
+eps_greedy_val_env = 0.005
+
+###############################################################################
+# To speed up learning, we set the bias of the last layer of our value network
+# to a predefined value (this is not mandatory)
+init_bias = 2.0
+
+###############################################################################
+# .. note::
+#   For fast rendering of the tutorial ``total_frames`` hyperparameter
+#   was set to a very low number. To get a reasonable performance, use a greater
+#   value e.g. 500000
+#
+
+###############################################################################
+# Building a Trainer
+# ------------------
+#
+# TorchRL's :class:`torchrl.trainers.Trainer` class constructor takes the
+# following keyword-only arguments:
+#
+# - ``collector``
+# - ``loss_module``
+# - ``optimizer``
+# - ``logger``: A logger can be
+# - ``total_frames``: this parameter defines the lifespan of the trainer.
+# - ``frame_skip``: when a frame-skip is used, the collector must be made
+#   aware of it in order to accurately count the number of frames
+#   collected etc. Making the trainer aware of this parameter is not
+#   mandatory but helps to have a fairer comparison between settings where
+#   the total number of frames (budget) is fixed but the frame-skip is
+#   variable.
+
+stats = get_norm_stats()
+test_env = make_env(parallel=False, obs_norm_sd=stats)
+# Get model
+actor, actor_explore = make_model(test_env)
+loss_module, target_net_updater = get_loss_module(actor, gamma)
+target_net_updater.init_()
+
+collector = get_collector(
+    stats, num_collectors, actor_explore, frames_per_batch, total_frames, device
+)
+optimizer = torch.optim.Adam(
+    loss_module.parameters(), lr=lr, weight_decay=wd, betas=betas
+)
+exp_name = f"dqn_exp_{uuid.uuid1()}"
+tmpdir = tempfile.TemporaryDirectory()
+logger = CSVLogger(exp_name=exp_name, log_dir=tmpdir.name)
+warnings.warn(f"log dir: {logger.experiment.log_dir}")
+
+###############################################################################
+# We can control how often the scalars should be logged. Here we set this
+# to a low value as our training loop is short:
+
+log_interval = 500
+
+trainer = Trainer(
+    collector=collector,
+    total_frames=total_frames,
+    frame_skip=1,
+    loss_module=loss_module,
+    optimizer=optimizer,
+    logger=logger,
+    optim_steps_per_batch=n_optim,
+    log_interval=log_interval,
+)
+
+###############################################################################
+# Registering hooks
+# ~~~~~~~~~~~~~~~~~
+#
+# Registering hooks can be achieved in two separate ways:
+#
+# - If the hook has it, the :meth:`torchrl.trainers.TrainerHookBase.register`
+#   method is the first choice. One just needs to provide the trainer as input
+#   and the hook will be registered with a default name at a default location.
+#   For some hooks, the registration can be quite complex: :class:`torchrl.trainers.ReplayBufferTrainer`
+#   requires 3 hooks (``extend``, ``sample`` and ``update_priority``) which
+#   can be cumbersome to implement.
+buffer_hook = ReplayBufferTrainer(
+    get_replay_buffer(buffer_size, n_optim, batch_size=batch_size),
+    flatten_tensordicts=True,
+)
+buffer_hook.register(trainer)
+weight_updater = UpdateWeights(collector, update_weights_interval=1)
+weight_updater.register(trainer)
+recorder = Recorder(
+    record_interval=100,  # log every 100 optimization steps
+    record_frames=1000,  # maximum number of frames in the record
+    frame_skip=1,
+    policy_exploration=actor_explore,
+    environment=test_env,
+    exploration_mode="mode",
+    log_keys=[("next", "reward")],
+    out_keys={("next", "reward"): "rewards"},
+    log_pbar=True,
+)
+recorder.register(trainer)
+
+###############################################################################
+# - Any callable (including :class:`torchrl.trainers.TrainerHookBase`
+#   subclasses) can be registered using :meth:`torchrl.trainers.Trainer.register_op`.
+#   In this case, a location must be explicitly passed (). This method gives
+#   more control over the location of the hook but it also requires more
+#   understanding of the Trainer mechanism.
+#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+#   for a detailed description of the trainer hooks.
+#
+trainer.register_op("post_optim", target_net_updater.step)
+
+###############################################################################
+# We can log the training rewards too. Note that this is of limited interest
+# with CartPole, as rewards are always 1. The discounted sum of rewards is
+# maximised not by getting higher rewards but by keeping the cart-pole alive
+# for longer.
+# This will be reflected by the `total_rewards` value displayed in the
+# progress bar.
+#
+log_reward = LogReward(log_pbar=True)
+log_reward.register(trainer)
+
+###############################################################################
+# .. note::
+#   It is possible to link multiple optimizers to the trainer if needed.
+#   In this case, each optimizer will be tied to a field in the loss
+#   dictionary.
+#   Check the :class:`torchrl.trainers.OptimizerHook` to learn more.
+#
+# Here we are, ready to train our algorithm! A simple call to
+# ``trainer.train()`` and we'll be getting our results logged in.
+#
+trainer.train()
+
+###############################################################################
+# We can now quickly check the CSVs with the results.
+
+
+def print_csv_files_in_folder(folder_path):
+    """
+    Find all CSV files in a folder and return the first 10 lines of each file as a string.
+
+    Args:
+        folder_path (str): The relative path to the folder.
+
+    Returns:
+        str: A string containing the first 10 lines of each CSV file in the folder.
+    """
+    csv_files = []
+    output_str = ""
+    for file in os.listdir(folder_path):
+        if file.endswith(".csv"):
+            csv_files.append(os.path.join(folder_path, file))
+    for csv_file in csv_files:
+        output_str += f"File: {csv_file}\n"
+        with open(csv_file, "r") as f:
+            for i, line in enumerate(f):
+                if i == 10:
+                    break
+                output_str += line.strip() + "\n"
+        output_str += "\n"
+    return output_str
+
+
+print_csv_files_in_folder(logger.experiment.log_dir)
+
+###############################################################################
+# Conclusion and possible improvements
+# ------------------------------------
+#
+# In this tutorial we have learned:
+#
+# - How to write a Trainer, including building its components and registering
+#   them in the trainer;
+# - How to code a DQN algorithm, including how to create a policy that picks
+#   up the action with the highest value with
+#   :class:`torchrl.modules.QValueNetwork`;
+# - How to build a multiprocessed data collector;
+#
+# Possible improvements to this tutorial could include:
+#
+# - A prioritized replay buffer could also be used. This will give a
+#   higher priority to samples that have the worst value accuracy.
+#   Learn more on the
+#   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+#   of the documentation.
+# - A distributional loss (see :class:`torchrl.objectives.DistributionalDQNLoss`
+#   for more information).
+# - More fancy exploration techniques, such as :class:`torchrl.modules.NoisyLinear` layers and such.

From 7d65ca46500d4d5a53b4aef3b8ad181e8c99bba4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 5 Apr 2023 17:36:43 +0100
Subject: [PATCH 86/89] remove prints

---
 torchrl/trainers/trainers.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/torchrl/trainers/trainers.py b/torchrl/trainers/trainers.py
index 4a04acd4c98..69f33b796de 100644
--- a/torchrl/trainers/trainers.py
+++ b/torchrl/trainers/trainers.py
@@ -506,7 +506,6 @@ def _log(self, log_pbar=False, **kwargs) -> None:
         collected_frames = self.collected_frames
         for key, item in kwargs.items():
             self._log_dict[key].append(item)
-            print(f"collected_frames {collected_frames}, self._last_log.get({key}, 0) {self._last_log.get(key, 0)}, self._log_interval {self._log_interval}")
             if (collected_frames - self._last_log.get(key, 0)) > self._log_interval:
                 self._last_log[key] = collected_frames
                 _log = True
@@ -514,7 +513,6 @@ def _log(self, log_pbar=False, **kwargs) -> None:
                 _log = False
             method = LOGGER_METHODS.get(key, "log_scalar")
             if _log and self.logger is not None:
-                print("logging!", key, self.logger.experiment.log_dir)
                 getattr(self.logger, method)(key, item, step=collected_frames)
             if method == "log_scalar" and self.progress_bar and log_pbar:
                 if isinstance(item, torch.Tensor):

From 0d238d51b4b7b098b20972892e5a0446df01c816 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 5 Apr 2023 18:24:40 +0100
Subject: [PATCH 87/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 47268647e71..f07ec471f3a 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -659,19 +659,18 @@ def get_loss_module(actor, gamma):
 
 def print_csv_files_in_folder(folder_path):
     """
-    Find all CSV files in a folder and return the first 10 lines of each file as a string.
+    Find all CSV files in a folder and prints the first 10 lines of each file.
 
     Args:
         folder_path (str): The relative path to the folder.
 
-    Returns:
-        str: A string containing the first 10 lines of each CSV file in the folder.
     """
     csv_files = []
     output_str = ""
-    for file in os.listdir(folder_path):
-        if file.endswith(".csv"):
-            csv_files.append(os.path.join(folder_path, file))
+    for dirpath, _, filenames in os.walk(folder_path):
+        for file in filenames:
+            if file.endswith(".csv"):
+                csv_files.append(os.path.join(dirpath, file))
     for csv_file in csv_files:
         output_str += f"File: {csv_file}\n"
         with open(csv_file, "r") as f:
@@ -680,10 +679,10 @@ def print_csv_files_in_folder(folder_path):
                     break
                 output_str += line.strip() + "\n"
         output_str += "\n"
-    return output_str
+    print(output_str)
 
 
-print_csv_files_in_folder(logger.experiment.log_dir)
+print_csv_files_in_folder("/var/folders/zs/9lq15k8x61l1g0c_sf__63c80000gn/T/tmpejpilvhb/dqn_exp_6d35f974-d3c2-11ed-8df6-acde48001122")
 
 ###############################################################################
 # Conclusion and possible improvements

From 33133cbe11efafaee7ba58936b2afbc1236e998a Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 6 Apr 2023 11:07:35 +0100
Subject: [PATCH 88/89] amend

---
 tutorials/sphinx-tutorials/coding_ddpg.py | 5 +++--
 tutorials/sphinx-tutorials/coding_dqn.py  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 914186f4ed9..53a6ae10e47 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -1179,8 +1179,9 @@ def ceil_div(x, y):
 # We make a simple plot of the average rewards during training. We can observe
 # that our policy learned quite well to solve the task.
 #
-# **Note**: As already mentioned above, to get a more reasonable performance,
-# use a greater value for ``total_frames`` e.g. 1M.
+# .. note::
+#   As already mentioned above, to get a more reasonable performance,
+#   use a greater value for ``total_frames`` e.g. 1M.
 
 from matplotlib import pyplot as plt
 
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index f07ec471f3a..7b03e13af15 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -486,7 +486,7 @@ def get_loss_module(actor, gamma):
 # However, one can easily get the same restriction on number of episodes by
 # breaking the training loop when a certain number
 # episodes has been collected.
-total_frames = 10_000  # 500000
+total_frames = 5_000  # 500000
 
 ###############################################################################
 # Random frames used to initialize the replay buffer.

From c221982dbbce16d3427a9bc150e7c7ada953518a Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 6 Apr 2023 12:35:42 +0100
Subject: [PATCH 89/89] amend

---
 tutorials/sphinx-tutorials/coding_dqn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
index 7b03e13af15..4603cecf37f 100644
--- a/tutorials/sphinx-tutorials/coding_dqn.py
+++ b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -682,7 +682,7 @@ def print_csv_files_in_folder(folder_path):
     print(output_str)
 
 
-print_csv_files_in_folder("/var/folders/zs/9lq15k8x61l1g0c_sf__63c80000gn/T/tmpejpilvhb/dqn_exp_6d35f974-d3c2-11ed-8df6-acde48001122")
+print_csv_files_in_folder(logger.experiment.log_dir)
 
 ###############################################################################
 # Conclusion and possible improvements