Update

Vincent Moens · Vincent Moens · commit f4d223c16228 · 2024-10-15T14:02:32.000+01:00
[ghstack-poisoned]
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -972,6 +972,26 @@ The following classes are deprecated and just point to the classes above:
     UnboundedContinuousTensorSpec
     UnboundedDiscreteTensorSpec
 
+Trees and Forests
+-----------------
+
+TorchRL offers a set of classes and functions that can be used to represent trees and forests efficiently.
+
+.. currentmodule:: torchrl.data
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    BinaryToDecimal
+    HashToInt
+    QueryModule
+    RandomProjectionHash
+    SipHash
+    TensorDictMap
+    TensorMap
+
+
 Reinforcement Learning From Human Feedback (RLHF)
 -------------------------------------------------
 
diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
@@ -26,6 +26,7 @@
 )
 from tensordict.base import _NESTED_TENSORS_AS_LISTS
 from tensordict.memmap import MemoryMappedTensor
+from tensordict.utils import _zip_strict
 from torch import multiprocessing as mp
 from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten
 from torchrl._utils import _make_ordinal_device, implement_for, logger as torchrl_logger
@@ -258,7 +259,7 @@ def set(
                     np.ndarray,
                 ),
             ):
-                for _cursor, _data in zip(cursor, data, strict=True):
+                for _cursor, _data in _zip_strict(cursor, data):
                     self.set(_cursor, _data, set_cursor=set_cursor)
             else:
                 raise TypeError(
diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -514,32 +514,21 @@ def out_keys(self, values):
 
     @dispatch
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        shape = None
-        if tensordict.ndimension() > 1:
-            shape = tensordict.shape
-            tensordict_reshape = tensordict.reshape(-1)
-        else:
-            tensordict_reshape = tensordict
-
-        q_loss, metadata = self.q_loss(tensordict_reshape)
-        cql_loss, cql_metadata = self.cql_loss(tensordict_reshape)
+        q_loss, metadata = self.q_loss(tensordict)
+        cql_loss, cql_metadata = self.cql_loss(tensordict)
         if self.with_lagrange:
-            alpha_prime_loss, alpha_prime_metadata = self.alpha_prime_loss(
-                tensordict_reshape
-            )
+            alpha_prime_loss, alpha_prime_metadata = self.alpha_prime_loss(tensordict)
             metadata.update(alpha_prime_metadata)
-        loss_actor_bc, bc_metadata = self.actor_bc_loss(tensordict_reshape)
-        loss_actor, actor_metadata = self.actor_loss(tensordict_reshape)
+        loss_actor_bc, bc_metadata = self.actor_bc_loss(tensordict)
+        loss_actor, actor_metadata = self.actor_loss(tensordict)
         loss_alpha, alpha_metadata = self.alpha_loss(actor_metadata)
         metadata.update(bc_metadata)
         metadata.update(cql_metadata)
         metadata.update(actor_metadata)
         metadata.update(alpha_metadata)
-        tensordict_reshape.set(
+        tensordict.set(
             self.tensor_keys.priority, metadata.pop("td_error").detach().max(0).values
         )
-        if shape:
-            tensordict.update(tensordict_reshape.view(shape))
         out = {
             "loss_actor": loss_actor,
             "loss_actor_bc": loss_actor_bc,
@@ -682,7 +671,9 @@ def _get_value_v(self, tensordict, _alpha, actor_params, qval_params):
                 )
                 # take max over actions
                 state_action_value = state_action_value.reshape(
-                    self.num_qvalue_nets, tensordict.shape[0], self.num_random, -1
+                    torch.Size(
+                        [self.num_qvalue_nets, *tensordict.shape, self.num_random, -1]
+                    )
                 ).max(-2)[0]
                 # take min over qvalue nets
                 next_state_value = state_action_value.min(0)[0]
@@ -739,14 +730,13 @@ def cql_loss(self, tensordict: TensorDictBase) -> Tuple[Tensor, dict]:
                 "This could be caused by calling cql_loss method before q_loss method."
             )
 
-        random_actions_tensor = (
-            torch.FloatTensor(
-                tensordict.shape[0] * self.num_random,
+        random_actions_tensor = pred_q1.new_empty(
+            (
+                *tensordict.shape[:-1],
+                tensordict.shape[-1] * self.num_random,
                 tensordict[self.tensor_keys.action].shape[-1],
             )
-            .uniform_(-1, 1)
-            .to(tensordict.device)
-        )
+        ).uniform_(-1, 1)
         curr_actions_td, curr_log_pis = self._get_policy_actions(
             tensordict.copy(),
             self.actor_network_params,
@@ -833,31 +823,31 @@ def filter_and_repeat(name, x):
                 q_new[0] - new_log_pis.detach().unsqueeze(-1),
                 q_curr[0] - curr_log_pis.detach().unsqueeze(-1),
             ],
-            1,
+            -1,
         )
         cat_q2 = torch.cat(
             [
                 q_random[1] - random_density,
                 q_new[1] - new_log_pis.detach().unsqueeze(-1),
                 q_curr[1] - curr_log_pis.detach().unsqueeze(-1),
             ],
-            1,
+            -1,
         )
 
         min_qf1_loss = (
-            torch.logsumexp(cat_q1 / self.temperature, dim=1)
+            torch.logsumexp(cat_q1 / self.temperature, dim=-1)
             * self.min_q_weight
             * self.temperature
         )
         min_qf2_loss = (
-            torch.logsumexp(cat_q2 / self.temperature, dim=1)
+            torch.logsumexp(cat_q2 / self.temperature, dim=-1)
             * self.min_q_weight
             * self.temperature
         )
 
         # Subtract the log likelihood of data
-        cql_q1_loss = min_qf1_loss - pred_q1 * self.min_q_weight
-        cql_q2_loss = min_qf2_loss - pred_q2 * self.min_q_weight
+        cql_q1_loss = min_qf1_loss.flatten() - pred_q1 * self.min_q_weight
+        cql_q2_loss = min_qf2_loss.flatten() - pred_q2 * self.min_q_weight
 
         # write cql losses in tensordict for alpha prime loss
         tensordict.set(self.tensor_keys.cql_q1_loss, cql_q1_loss)
@@ -1080,9 +1070,9 @@ def __init__(
         self.loss_function = loss_function
         if action_space is None:
             # infer from value net
-            try:
+            if hasattr(value_network, "action_space"):
                 action_space = value_network.spec
-            except AttributeError:
+            else:
                 # let's try with action_space then
                 try:
                     action_space = value_network.action_space
@@ -1205,8 +1195,6 @@ def value_loss(
         with torch.no_grad():
             td_error = (pred_val_index - target_value).pow(2)
             td_error = td_error.unsqueeze(-1)
-        if tensordict.device is not None:
-            td_error = td_error.to(tensordict.device)
 
         tensordict.set(
             self.tensor_keys.priority,
diff --git a/torchrl/objectives/crossq.py b/torchrl/objectives/crossq.py
@@ -495,23 +495,14 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         To see what keys are expected in the input tensordict and what keys are expected as output, check the
         class's `"in_keys"` and `"out_keys"` attributes.
         """
-        shape = None
-        if tensordict.ndimension() > 1:
-            shape = tensordict.shape
-            tensordict_reshape = tensordict.reshape(-1)
-        else:
-            tensordict_reshape = tensordict
-
-        loss_qvalue, value_metadata = self.qvalue_loss(tensordict_reshape)
-        loss_actor, metadata_actor = self.actor_loss(tensordict_reshape)
+        loss_qvalue, value_metadata = self.qvalue_loss(tensordict)
+        loss_actor, metadata_actor = self.actor_loss(tensordict)
         loss_alpha = self.alpha_loss(log_prob=metadata_actor["log_prob"])
-        tensordict_reshape.set(self.tensor_keys.priority, value_metadata["td_error"])
+        tensordict.set(self.tensor_keys.priority, value_metadata["td_error"])
         if loss_actor.shape != loss_qvalue.shape:
             raise RuntimeError(
                 f"Losses shape mismatch: {loss_actor.shape} and {loss_qvalue.shape}"
             )
-        if shape:
-            tensordict.update(tensordict_reshape.view(shape))
         entropy = -metadata_actor["log_prob"]
         out = {
             "loss_actor": loss_actor,
diff --git a/torchrl/objectives/iql.py b/torchrl/objectives/iql.py
@@ -373,16 +373,9 @@ def _forward_value_estimator_keys(self, **kwargs) -> None:
 
     @dispatch
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        shape = None
-        if tensordict.ndimension() > 1:
-            shape = tensordict.shape
-            tensordict_reshape = tensordict.reshape(-1)
-        else:
-            tensordict_reshape = tensordict
-
-        loss_actor, metadata = self.actor_loss(tensordict_reshape)
-        loss_qvalue, metadata_qvalue = self.qvalue_loss(tensordict_reshape)
-        loss_value, metadata_value = self.value_loss(tensordict_reshape)
+        loss_actor, metadata = self.actor_loss(tensordict)
+        loss_qvalue, metadata_qvalue = self.qvalue_loss(tensordict)
+        loss_value, metadata_value = self.value_loss(tensordict)
         metadata.update(metadata_qvalue)
         metadata.update(metadata_value)
 
@@ -392,13 +385,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             raise RuntimeError(
                 f"Losses shape mismatch: {loss_actor.shape}, {loss_qvalue.shape} and {loss_value.shape}"
             )
-        tensordict_reshape.set(
+        tensordict.set(
             self.tensor_keys.priority, metadata.pop("td_error").detach().max(0).values
         )
-        if shape:
-            tensordict.update(tensordict_reshape.view(shape))
-
-        entropy = -tensordict_reshape.get(self.tensor_keys.log_prob).detach()
+        entropy = -tensordict.get(self.tensor_keys.log_prob).detach()
         out = {
             "loss_actor": loss_actor,
             "loss_qvalue": loss_qvalue,
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -577,30 +577,21 @@ def out_keys(self, values):
 
     @dispatch
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        shape = None
-        if tensordict.ndimension() > 1:
-            shape = tensordict.shape
-            tensordict_reshape = tensordict.reshape(-1)
-        else:
-            tensordict_reshape = tensordict
-
         if self._version == 1:
-            loss_qvalue, value_metadata = self._qvalue_v1_loss(tensordict_reshape)
-            loss_value, _ = self._value_loss(tensordict_reshape)
+            loss_qvalue, value_metadata = self._qvalue_v1_loss(tensordict)
+            loss_value, _ = self._value_loss(tensordict)
         else:
-            loss_qvalue, value_metadata = self._qvalue_v2_loss(tensordict_reshape)
+            loss_qvalue, value_metadata = self._qvalue_v2_loss(tensordict)
             loss_value = None
-        loss_actor, metadata_actor = self._actor_loss(tensordict_reshape)
+        loss_actor, metadata_actor = self._actor_loss(tensordict)
         loss_alpha = self._alpha_loss(log_prob=metadata_actor["log_prob"])
-        tensordict_reshape.set(self.tensor_keys.priority, value_metadata["td_error"])
+        tensordict.set(self.tensor_keys.priority, value_metadata["td_error"])
         if (loss_actor.shape != loss_qvalue.shape) or (
             loss_value is not None and loss_actor.shape != loss_value.shape
         ):
             raise RuntimeError(
                 f"Losses shape mismatch: {loss_actor.shape}, {loss_qvalue.shape} and {loss_value.shape}"
             )
-        if shape:
-            tensordict.update(tensordict_reshape.view(shape))
         entropy = -metadata_actor["log_prob"]
         out = {
             "loss_actor": loss_actor,
@@ -1158,26 +1149,17 @@ def in_keys(self, values):
 
     @dispatch
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        shape = None
-        if tensordict.ndimension() > 1:
-            shape = tensordict.shape
-            tensordict_reshape = tensordict.reshape(-1)
-        else:
-            tensordict_reshape = tensordict
-
-        loss_value, metadata_value = self._value_loss(tensordict_reshape)
-        loss_actor, metadata_actor = self._actor_loss(tensordict_reshape)
+        loss_value, metadata_value = self._value_loss(tensordict)
+        loss_actor, metadata_actor = self._actor_loss(tensordict)
         loss_alpha = self._alpha_loss(
             log_prob=metadata_actor["log_prob"],
         )
 
-        tensordict_reshape.set(self.tensor_keys.priority, metadata_value["td_error"])
+        tensordict.set(self.tensor_keys.priority, metadata_value["td_error"])
         if loss_actor.shape != loss_value.shape:
             raise RuntimeError(
                 f"Losses shape mismatch: {loss_actor.shape}, and {loss_value.shape}"
             )
-        if shape:
-            tensordict.update(tensordict_reshape.view(shape))
         entropy = -metadata_actor["log_prob"]
         out = {
             "loss_actor": loss_actor,