pytorch
diff --git a/‎.github/unittest/linux_libs/scripts_gym/setup_env.sh
Lines changed: 0 additions & 1 deletion b/‎.github/unittest/linux_libs/scripts_gym/setup_env.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/unittest/linux_libs/scripts_isaaclab/isaac.sh
Lines changed: 81 additions & 0 deletions b/‎.github/unittest/linux_libs/scripts_isaaclab/isaac.sh
Lines changed: 81 additions & 0 deletions
diff --git a/‎.github/workflows/test-linux-libs.yml
Lines changed: 18 additions & 0 deletions b/‎.github/workflows/test-linux-libs.yml
Lines changed: 18 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 2 additions & 0 deletions b/‎docs/source/reference/envs.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/trees/mcts.py
Lines changed: 58 additions & 24 deletions b/‎examples/trees/mcts.py
Lines changed: 58 additions & 24 deletions
diff --git a/‎sota-implementations/sac/utils.py
Lines changed: 4 additions & 4 deletions b/‎sota-implementations/sac/utils.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎test/mocking_classes.py
Lines changed: 56 additions & 1 deletion b/‎test/mocking_classes.py
Lines changed: 56 additions & 1 deletion
diff --git a/‎test/test_collector.py
Lines changed: 1 addition & 1 deletion b/‎test/test_collector.py
Lines changed: 1 addition & 1 deletion
@@ -10,7 +10,6 @@ set -e
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 # Avoid error: "fatal: unsafe repository"
 apt-get update && apt-get install -y git wget gcc g++
-
 apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
 apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev
 
 
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+set -e
+set -v
+
+#if [[ "${{ github.ref }}" =~ release/* ]]; then
+#  export RELEASE=1
+#  export TORCH_VERSION=stable
+#else
+export RELEASE=0
+export TORCH_VERSION=nightly
+#fi
+
+set -euo pipefail
+export PYTHON_VERSION="3.10"
+export CU_VERSION="12.8"
+export TAR_OPTIONS="--no-same-owner"
+export UPLOAD_CHANNEL="nightly"
+export TF_CPP_MIN_LOG_LEVEL=0
+export BATCHED_PIPE_TIMEOUT=60
+export TD_GET_DEFAULTS_TO_NONE=1
+export OMNI_KIT_ACCEPT_EULA=yes
+
+nvidia-smi
+
+# Setup
+apt-get update && apt-get install -y git wget gcc g++
+apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
+apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev
+
+git config --global --add safe.directory '*'
+root_dir="$(git rev-parse --show-toplevel)"
+conda_dir="${root_dir}/conda"
+env_dir="${root_dir}/env"
+lib_dir="${env_dir}/lib"
+
+cd "${root_dir}"
+
+# install conda
+printf "* Installing conda\n"
+wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+bash ./miniconda.sh -b -f -p "${conda_dir}"
+eval "$(${conda_dir}/bin/conda shell.bash hook)"
+
+
+conda create --prefix ${env_dir} python=3.10 -y
+conda activate ${env_dir}
+
+# Pin pytorch to 2.5.1 for IsaacLab
+conda install pytorch==2.5.1 torchvision==0.20.1 pytorch-cuda=12.4 -c pytorch -c nvidia -y
+
+conda run -p ${env_dir} pip install --upgrade pip
+conda run -p ${env_dir} pip install 'isaacsim[all,extscache]==4.5.0' --extra-index-url https://pypi.nvidia.com
+conda install conda-forge::"cmake>3.22" -y
+
+git clone https://github.com/isaac-sim/IsaacLab.git
+cd IsaacLab
+conda run -p ${env_dir} ./isaaclab.sh --install sb3
+cd ../
+
+# install tensordict
+if [[ "$RELEASE" == 0 ]]; then
+  conda install "anaconda::cmake>=3.22" -y
+  conda run -p ${env_dir} python3 -m pip install "pybind11[global]"
+  conda run -p ${env_dir} python3 -m pip install git+https://github.com/pytorch/tensordict.git
+else
+  conda run -p ${env_dir} python3 -m pip install tensordict
+fi
+
+# smoke test
+conda run -p ${env_dir} python -c "import tensordict"
+
+printf "* Installing torchrl\n"
+conda run -p ${env_dir} python setup.py develop
+conda run -p ${env_dir} python -c "import torchrl"
+
+# Install pytest
+conda run -p ${env_dir} python -m pip install pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures pytest-error-for-skips pytest-asyncio
+
+# Run tests
+conda run -p ${env_dir} python -m pytest test/test_libs.py -k isaac -s
@@ -230,6 +230,24 @@ jobs:
         ./.github/unittest/linux_libs/scripts_gym/batch_scripts.sh
         ./.github/unittest/linux_libs/scripts_gym/post_process.sh
 
+  unittests-isaaclab:
+    strategy:
+      matrix:
+        python_version: ["3.10"]
+        cuda_arch_version: ["12.8"]
+    if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments/Isaac') }}
+    uses: vmoens/test-infra/.github/workflows/isaac_linux_job_v2.yml@main
+    with:
+      repository: pytorch/rl
+      runner: "linux.g5.4xlarge.nvidia.gpu"
+      docker-image: "nvcr.io/nvidia/isaac-lab:2.1.0"
+      test-infra-repository: vmoens/test-infra
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.8"
+      timeout: 120
+      script: |
+        ./.github/unittest/linux_libs/scripts_isaaclab/isaac.sh
+
   unittests-jumanji:
     strategy:
       matrix:
 
@@ -56,6 +56,9 @@ cover/
 *.mo
 *.pot
 
+# Jupyter
+*.ipynb
+
 # Django stuff:
 *.log
 local_settings.py
 
@@ -1112,6 +1112,7 @@ to be able to create this other composition:
     CenterCrop
     ClipTransform
     Compose
+    ConditionalPolicySwitch
     ConditionalSkip
     Crop
     DataLoadingPrimer
@@ -1417,6 +1418,7 @@ the following function will return ``1`` when queried:
     HabitatEnv
     IsaacGymEnv
     IsaacGymWrapper
+    IsaacLabWrapper
     JumanjiEnv
     JumanjiWrapper
     MeltingpotEnv
 
@@ -10,6 +10,8 @@
 import torchrl.envs
 import torchrl.modules.mcts
 from tensordict import TensorDict
+from torchrl.data import Composite, Unbounded
+from torchrl.envs import Transform
 
 pgn_or_fen = "fen"
 mask_actions = True
@@ -25,39 +27,68 @@
 )
 
 
-class TransformReward:
-    def __call__(self, td):
-        if "reward" not in td:
-            return td
+class TurnBasedChess(Transform):
+    def transform_observation_spec(self, obsspec):
+        obsspec["agent0", "turn"] = Unbounded(dtype=torch.bool, shape=())
+        obsspec["agent1", "turn"] = Unbounded(dtype=torch.bool, shape=())
+        return obsspec
 
-        reward = td["reward"]
+    def transform_reward_spec(self, reward_spec):
+        reward = reward_spec["reward"].clone()
+        del reward_spec["reward"]
+        return Composite(
+            agent0=Composite(reward=reward),
+            agent1=Composite(reward=reward),
+        )
+
+    def _reset(self, _td, td):
+        td["agent0", "turn"] = td["turn"]
+        td["agent1", "turn"] = ~td["turn"]
+        return td
+
+    def _step(self, td, td_next):
+        td_next["agent0", "turn"] = td_next["turn"]
+        td_next["agent1", "turn"] = ~td_next["turn"]
+
+        reward = td_next["reward"]
+        turn = td["turn"]
 
         if reward == 0.5:
             reward = 0
-        elif reward == 1 and td["turn"]:
-            reward = -reward
+        elif reward == 1:
+            if not turn:
+                reward = -reward
 
-        td["reward"] = reward
-        return td
+        td_next["agent0", "reward"] = reward
+        td_next["agent1", "reward"] = -reward
+        del td_next["reward"]
+
+        return td_next
 
 
-# ChessEnv sets the reward to 0.5 for a draw and 1 for a win for either player.
-# Need to transform the reward to be:
-#   white win = 1
-#   draw = 0
-#   black win = -1
-transform_reward = TransformReward()
-env = env.append_transform(transform_reward)
+env = env.append_transform(TurnBasedChess())
+env.rollout(3)
 
 forest = torchrl.data.MCTSForest()
 forest.reward_keys = env.reward_keys
 forest.done_keys = env.done_keys
 forest.action_keys = env.action_keys
 
 if mask_actions:
-    forest.observation_keys = [f"{pgn_or_fen}_hash", "turn", "action_mask"]
+    forest.observation_keys = [
+        f"{pgn_or_fen}_hash",
+        "turn",
+        "action_mask",
+        ("agent0", "turn"),
+        ("agent1", "turn"),
+    ]
 else:
-    forest.observation_keys = [f"{pgn_or_fen}_hash", "turn"]
+    forest.observation_keys = [
+        f"{pgn_or_fen}_hash",
+        "turn",
+        ("agent0", "turn"),
+        ("agent1", "turn"),
+    ]
 
 
 def tree_format_fn(tree):
@@ -72,17 +103,20 @@ def tree_format_fn(tree):
 
 def get_best_move(fen, mcts_steps, rollout_steps):
     root = env.reset(TensorDict({"fen": fen}))
-    mcts = torchrl.modules.mcts.MCTS(mcts_steps, rollout_steps)
+    agent_keys = ["agent0", "agent1"]
+    mcts = torchrl.modules.mcts.MCTS(mcts_steps, rollout_steps, agent_keys=agent_keys)
     tree = mcts(forest, root, env)
     moves = []
 
     for subtree in tree.subtree:
-        san = subtree.rollout[0]["next", "san"]
-        reward_sum = subtree.wins
+        td = subtree.rollout[0]
+        san = td["next", "san"]
+        active_agent = agent_keys[
+            torch.stack([td[agent]["turn"] for agent in agent_keys]).nonzero()
+        ]
+        reward_sum = subtree.wins[active_agent, "reward"]
         visits = subtree.visits
         value_avg = (reward_sum / visits).item()
-        if not root["turn"]:
-            value_avg = -value_avg
         moves.append((value_avg, san))
 
     moves = sorted(moves, key=lambda x: -x[0])
@@ -97,7 +131,7 @@ def get_best_move(fen, mcts_steps, rollout_steps):
     return moves[0][1]
 
 
-for idx in range(30):
+for idx in range(3):
     print("==========")
     print(idx)
     print("==========")
 
@@ -128,9 +128,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):
     device = cfg.collector.device
     if device in ("", None):
         if torch.cuda.is_available():
-            if torch.cuda.device_count() < 2:
-                raise RuntimeError("Requires >= 2 GPUs")
-            device = torch.device("cuda:1")
+            device = torch.device("cuda:0")
         else:
             device = torch.device("cpu")
     collector = SyncDataCollector(
@@ -158,7 +156,9 @@ def make_collector_async(
     device = cfg.collector.device
     if device in ("", None):
         if torch.cuda.is_available():
-            device = torch.device("cuda:0")
+            if torch.cuda.device_count() < 2:
+                raise RuntimeError("Requires >= 2 GPUs")
+            device = torch.device("cuda:1")
         else:
             device = torch.device("cpu")
 
 
@@ -13,7 +13,7 @@
 from tensordict import tensorclass, TensorDict, TensorDictBase
 from tensordict.nn import TensorDictModuleBase
 from tensordict.utils import expand_right, NestedKey
-
+from torchrl._utils import logger as torchrl_logger
 from torchrl.data import (
     Binary,
     Bounded,
@@ -2533,3 +2533,58 @@ def __next__(self):
             else:
                 tokens = tensors
         return {"tokens": tokens, "attention_mask": tokens != 0}
+
+
+class MockNestedResetEnv(EnvBase):
+    """To test behaviour of envs with nested done states - where the root done prevails over others."""
+
+    def __init__(self, num_steps: int, done_at_root: bool) -> None:
+        super().__init__(device="cpu")
+        self._num_steps = num_steps
+        self._counter = 0
+        self.done_at_root = done_at_root
+        self.done_spec = Composite(
+            {
+                ("agent_1", "done"): Binary(1, dtype=torch.bool),
+                ("agent_2", "done"): Binary(1, dtype=torch.bool),
+            }
+        )
+        if done_at_root:
+            self.full_done_spec["done"] = Binary(1, dtype=torch.bool)
+
+    def _reset(self, tensordict: TensorDict) -> TensorDict:
+        torchrl_logger.info(f"Reset after {self._counter} steps!")
+        if tensordict is not None:
+            torchrl_logger.info(f"tensordict at reset {tensordict.to_dict()}")
+        self._counter = 0
+        result = TensorDict(
+            {
+                ("agent_1", "done"): torch.tensor([False], dtype=torch.bool),
+                ("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
+            },
+        )
+        if self.done_at_root:
+            result["done"] = torch.tensor([False], dtype=torch.bool)
+        return result
+
+    def _step(self, tensordict: TensorDict) -> TensorDict:
+        self._counter += 1
+        done = torch.tensor([self._counter >= self._num_steps], dtype=torch.bool)
+        if self.done_at_root:
+            return TensorDict(
+                {
+                    "done": done,
+                    ("agent_1", "done"): torch.tensor([True], dtype=torch.bool),
+                    ("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
+                },
+            )
+        else:
+            return TensorDict(
+                {
+                    ("agent_1", "done"): done,
+                    ("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
+                },
+            )
+
+    def _set_seed(self):
+        pass
@@ -3419,7 +3419,7 @@ def test_collector_rb_multisync(
             assert len(rb) == pred_len
         collector.shutdown()
         assert len(rb) == 256
-        if not extend_buffer:
+        if extend_buffer:
             steps_counts = rb["step_count"].squeeze().split(16)
             collector_ids = rb["collector", "traj_ids"].squeeze().split(16)
             for step_count, ids in zip(steps_counts, collector_ids):