Skip to content

Commit b4f0a0e

Browse files
committed
Update
[ghstack-poisoned]
2 parents 73d8e56 + 8418518 commit b4f0a0e

File tree

44 files changed

+2011
-220
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2011
-220
lines changed

.github/unittest/linux_libs/scripts_gym/setup_env.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ set -e
1010
this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
1111
# Avoid error: "fatal: unsafe repository"
1212
apt-get update && apt-get install -y git wget gcc g++
13-
1413
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
1514
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev
1615

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
set -v
5+
6+
#if [[ "${{ github.ref }}" =~ release/* ]]; then
7+
# export RELEASE=1
8+
# export TORCH_VERSION=stable
9+
#else
10+
export RELEASE=0
11+
export TORCH_VERSION=nightly
12+
#fi
13+
14+
set -euo pipefail
15+
export PYTHON_VERSION="3.10"
16+
export CU_VERSION="12.8"
17+
export TAR_OPTIONS="--no-same-owner"
18+
export UPLOAD_CHANNEL="nightly"
19+
export TF_CPP_MIN_LOG_LEVEL=0
20+
export BATCHED_PIPE_TIMEOUT=60
21+
export TD_GET_DEFAULTS_TO_NONE=1
22+
export OMNI_KIT_ACCEPT_EULA=yes
23+
24+
nvidia-smi
25+
26+
# Setup
27+
apt-get update && apt-get install -y git wget gcc g++
28+
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
29+
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev
30+
31+
git config --global --add safe.directory '*'
32+
root_dir="$(git rev-parse --show-toplevel)"
33+
conda_dir="${root_dir}/conda"
34+
env_dir="${root_dir}/env"
35+
lib_dir="${env_dir}/lib"
36+
37+
cd "${root_dir}"
38+
39+
# install conda
40+
printf "* Installing conda\n"
41+
wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
42+
bash ./miniconda.sh -b -f -p "${conda_dir}"
43+
eval "$(${conda_dir}/bin/conda shell.bash hook)"
44+
45+
46+
conda create --prefix ${env_dir} python=3.10 -y
47+
conda activate ${env_dir}
48+
49+
# Pin pytorch to 2.5.1 for IsaacLab
50+
conda install pytorch==2.5.1 torchvision==0.20.1 pytorch-cuda=12.4 -c pytorch -c nvidia -y
51+
52+
conda run -p ${env_dir} pip install --upgrade pip
53+
conda run -p ${env_dir} pip install 'isaacsim[all,extscache]==4.5.0' --extra-index-url https://pypi.nvidia.com
54+
conda install conda-forge::"cmake>3.22" -y
55+
56+
git clone https://github.com/isaac-sim/IsaacLab.git
57+
cd IsaacLab
58+
conda run -p ${env_dir} ./isaaclab.sh --install sb3
59+
cd ../
60+
61+
# install tensordict
62+
if [[ "$RELEASE" == 0 ]]; then
63+
conda install "anaconda::cmake>=3.22" -y
64+
conda run -p ${env_dir} python3 -m pip install "pybind11[global]"
65+
conda run -p ${env_dir} python3 -m pip install git+https://github.com/pytorch/tensordict.git
66+
else
67+
conda run -p ${env_dir} python3 -m pip install tensordict
68+
fi
69+
70+
# smoke test
71+
conda run -p ${env_dir} python -c "import tensordict"
72+
73+
printf "* Installing torchrl\n"
74+
conda run -p ${env_dir} python setup.py develop
75+
conda run -p ${env_dir} python -c "import torchrl"
76+
77+
# Install pytest
78+
conda run -p ${env_dir} python -m pip install pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures pytest-error-for-skips pytest-asyncio
79+
80+
# Run tests
81+
conda run -p ${env_dir} python -m pytest test/test_libs.py -k isaac -s

.github/workflows/test-linux-libs.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,24 @@ jobs:
230230
./.github/unittest/linux_libs/scripts_gym/batch_scripts.sh
231231
./.github/unittest/linux_libs/scripts_gym/post_process.sh
232232
233+
unittests-isaaclab:
234+
strategy:
235+
matrix:
236+
python_version: ["3.10"]
237+
cuda_arch_version: ["12.8"]
238+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments/Isaac') }}
239+
uses: vmoens/test-infra/.github/workflows/isaac_linux_job_v2.yml@main
240+
with:
241+
repository: pytorch/rl
242+
runner: "linux.g5.4xlarge.nvidia.gpu"
243+
docker-image: "nvcr.io/nvidia/isaac-lab:2.1.0"
244+
test-infra-repository: vmoens/test-infra
245+
gpu-arch-type: cuda
246+
gpu-arch-version: "12.8"
247+
timeout: 120
248+
script: |
249+
./.github/unittest/linux_libs/scripts_isaaclab/isaac.sh
250+
233251
unittests-jumanji:
234252
strategy:
235253
matrix:

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ cover/
5656
*.mo
5757
*.pot
5858

59+
# Jupyter
60+
*.ipynb
61+
5962
# Django stuff:
6063
*.log
6164
local_settings.py

docs/source/reference/envs.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,7 @@ to be able to create this other composition:
11121112
CenterCrop
11131113
ClipTransform
11141114
Compose
1115+
ConditionalPolicySwitch
11151116
ConditionalSkip
11161117
Crop
11171118
DataLoadingPrimer
@@ -1417,6 +1418,7 @@ the following function will return ``1`` when queried:
14171418
HabitatEnv
14181419
IsaacGymEnv
14191420
IsaacGymWrapper
1421+
IsaacLabWrapper
14201422
JumanjiEnv
14211423
JumanjiWrapper
14221424
MeltingpotEnv

examples/trees/mcts.py

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import torchrl.envs
1111
import torchrl.modules.mcts
1212
from tensordict import TensorDict
13+
from torchrl.data import Composite, Unbounded
14+
from torchrl.envs import Transform
1315

1416
pgn_or_fen = "fen"
1517
mask_actions = True
@@ -25,39 +27,68 @@
2527
)
2628

2729

28-
class TransformReward:
29-
def __call__(self, td):
30-
if "reward" not in td:
31-
return td
30+
class TurnBasedChess(Transform):
31+
def transform_observation_spec(self, obsspec):
32+
obsspec["agent0", "turn"] = Unbounded(dtype=torch.bool, shape=())
33+
obsspec["agent1", "turn"] = Unbounded(dtype=torch.bool, shape=())
34+
return obsspec
3235

33-
reward = td["reward"]
36+
def transform_reward_spec(self, reward_spec):
37+
reward = reward_spec["reward"].clone()
38+
del reward_spec["reward"]
39+
return Composite(
40+
agent0=Composite(reward=reward),
41+
agent1=Composite(reward=reward),
42+
)
43+
44+
def _reset(self, _td, td):
45+
td["agent0", "turn"] = td["turn"]
46+
td["agent1", "turn"] = ~td["turn"]
47+
return td
48+
49+
def _step(self, td, td_next):
50+
td_next["agent0", "turn"] = td_next["turn"]
51+
td_next["agent1", "turn"] = ~td_next["turn"]
52+
53+
reward = td_next["reward"]
54+
turn = td["turn"]
3455

3556
if reward == 0.5:
3657
reward = 0
37-
elif reward == 1 and td["turn"]:
38-
reward = -reward
58+
elif reward == 1:
59+
if not turn:
60+
reward = -reward
3961

40-
td["reward"] = reward
41-
return td
62+
td_next["agent0", "reward"] = reward
63+
td_next["agent1", "reward"] = -reward
64+
del td_next["reward"]
65+
66+
return td_next
4267

4368

44-
# ChessEnv sets the reward to 0.5 for a draw and 1 for a win for either player.
45-
# Need to transform the reward to be:
46-
# white win = 1
47-
# draw = 0
48-
# black win = -1
49-
transform_reward = TransformReward()
50-
env = env.append_transform(transform_reward)
69+
env = env.append_transform(TurnBasedChess())
70+
env.rollout(3)
5171

5272
forest = torchrl.data.MCTSForest()
5373
forest.reward_keys = env.reward_keys
5474
forest.done_keys = env.done_keys
5575
forest.action_keys = env.action_keys
5676

5777
if mask_actions:
58-
forest.observation_keys = [f"{pgn_or_fen}_hash", "turn", "action_mask"]
78+
forest.observation_keys = [
79+
f"{pgn_or_fen}_hash",
80+
"turn",
81+
"action_mask",
82+
("agent0", "turn"),
83+
("agent1", "turn"),
84+
]
5985
else:
60-
forest.observation_keys = [f"{pgn_or_fen}_hash", "turn"]
86+
forest.observation_keys = [
87+
f"{pgn_or_fen}_hash",
88+
"turn",
89+
("agent0", "turn"),
90+
("agent1", "turn"),
91+
]
6192

6293

6394
def tree_format_fn(tree):
@@ -72,17 +103,20 @@ def tree_format_fn(tree):
72103

73104
def get_best_move(fen, mcts_steps, rollout_steps):
74105
root = env.reset(TensorDict({"fen": fen}))
75-
mcts = torchrl.modules.mcts.MCTS(mcts_steps, rollout_steps)
106+
agent_keys = ["agent0", "agent1"]
107+
mcts = torchrl.modules.mcts.MCTS(mcts_steps, rollout_steps, agent_keys=agent_keys)
76108
tree = mcts(forest, root, env)
77109
moves = []
78110

79111
for subtree in tree.subtree:
80-
san = subtree.rollout[0]["next", "san"]
81-
reward_sum = subtree.wins
112+
td = subtree.rollout[0]
113+
san = td["next", "san"]
114+
active_agent = agent_keys[
115+
torch.stack([td[agent]["turn"] for agent in agent_keys]).nonzero()
116+
]
117+
reward_sum = subtree.wins[active_agent, "reward"]
82118
visits = subtree.visits
83119
value_avg = (reward_sum / visits).item()
84-
if not root["turn"]:
85-
value_avg = -value_avg
86120
moves.append((value_avg, san))
87121

88122
moves = sorted(moves, key=lambda x: -x[0])
@@ -97,7 +131,7 @@ def get_best_move(fen, mcts_steps, rollout_steps):
97131
return moves[0][1]
98132

99133

100-
for idx in range(30):
134+
for idx in range(3):
101135
print("==========")
102136
print(idx)
103137
print("==========")

sota-implementations/sac/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):
128128
device = cfg.collector.device
129129
if device in ("", None):
130130
if torch.cuda.is_available():
131-
if torch.cuda.device_count() < 2:
132-
raise RuntimeError("Requires >= 2 GPUs")
133-
device = torch.device("cuda:1")
131+
device = torch.device("cuda:0")
134132
else:
135133
device = torch.device("cpu")
136134
collector = SyncDataCollector(
@@ -158,7 +156,9 @@ def make_collector_async(
158156
device = cfg.collector.device
159157
if device in ("", None):
160158
if torch.cuda.is_available():
161-
device = torch.device("cuda:0")
159+
if torch.cuda.device_count() < 2:
160+
raise RuntimeError("Requires >= 2 GPUs")
161+
device = torch.device("cuda:1")
162162
else:
163163
device = torch.device("cpu")
164164

test/mocking_classes.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from tensordict import tensorclass, TensorDict, TensorDictBase
1414
from tensordict.nn import TensorDictModuleBase
1515
from tensordict.utils import expand_right, NestedKey
16-
16+
from torchrl._utils import logger as torchrl_logger
1717
from torchrl.data import (
1818
Binary,
1919
Bounded,
@@ -2533,3 +2533,58 @@ def __next__(self):
25332533
else:
25342534
tokens = tensors
25352535
return {"tokens": tokens, "attention_mask": tokens != 0}
2536+
2537+
2538+
class MockNestedResetEnv(EnvBase):
2539+
"""To test behaviour of envs with nested done states - where the root done prevails over others."""
2540+
2541+
def __init__(self, num_steps: int, done_at_root: bool) -> None:
2542+
super().__init__(device="cpu")
2543+
self._num_steps = num_steps
2544+
self._counter = 0
2545+
self.done_at_root = done_at_root
2546+
self.done_spec = Composite(
2547+
{
2548+
("agent_1", "done"): Binary(1, dtype=torch.bool),
2549+
("agent_2", "done"): Binary(1, dtype=torch.bool),
2550+
}
2551+
)
2552+
if done_at_root:
2553+
self.full_done_spec["done"] = Binary(1, dtype=torch.bool)
2554+
2555+
def _reset(self, tensordict: TensorDict) -> TensorDict:
2556+
torchrl_logger.info(f"Reset after {self._counter} steps!")
2557+
if tensordict is not None:
2558+
torchrl_logger.info(f"tensordict at reset {tensordict.to_dict()}")
2559+
self._counter = 0
2560+
result = TensorDict(
2561+
{
2562+
("agent_1", "done"): torch.tensor([False], dtype=torch.bool),
2563+
("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
2564+
},
2565+
)
2566+
if self.done_at_root:
2567+
result["done"] = torch.tensor([False], dtype=torch.bool)
2568+
return result
2569+
2570+
def _step(self, tensordict: TensorDict) -> TensorDict:
2571+
self._counter += 1
2572+
done = torch.tensor([self._counter >= self._num_steps], dtype=torch.bool)
2573+
if self.done_at_root:
2574+
return TensorDict(
2575+
{
2576+
"done": done,
2577+
("agent_1", "done"): torch.tensor([True], dtype=torch.bool),
2578+
("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
2579+
},
2580+
)
2581+
else:
2582+
return TensorDict(
2583+
{
2584+
("agent_1", "done"): done,
2585+
("agent_2", "done"): torch.tensor([False], dtype=torch.bool),
2586+
},
2587+
)
2588+
2589+
def _set_seed(self):
2590+
pass

test/test_collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3419,7 +3419,7 @@ def test_collector_rb_multisync(
34193419
assert len(rb) == pred_len
34203420
collector.shutdown()
34213421
assert len(rb) == 256
3422-
if not extend_buffer:
3422+
if extend_buffer:
34233423
steps_counts = rb["step_count"].squeeze().split(16)
34243424
collector_ids = rb["collector", "traj_ids"].squeeze().split(16)
34253425
for step_count, ids in zip(steps_counts, collector_ids):

0 commit comments

Comments
 (0)