Update

Vincent Moens · Vincent Moens · commit 8b4c97f42d85 · 2025-04-09T14:50:31.000+01:00
[ghstack-poisoned]
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -1424,7 +1424,6 @@ def step(self, action):
 
             return CustomEnv(**kwargs)
 
-    @pytest.fixture(scope="function")
     def counting_env(self):
         import gymnasium as gym
         from gymnasium import Env
@@ -1482,9 +1481,10 @@ def test_gymnasium_autoreset(self, venv):  # noqa
 
     @implement_for("gymnasium", "1.1.0")
     @pytest.mark.parametrize("venv", ["sync", "async"])
-    def test_gymnasium_autoreset(self, venv, counting_env):  # noqa
+    def test_gymnasium_autoreset(self, venv):  # noqa
         import gymnasium as gym
 
+        counting_env = self.counting_env()
         if venv == "sync":
             venv = gym.vector.SyncVectorEnv
         else:
@@ -1516,8 +1516,25 @@ def test_gymnasium_autoreset(self, venv, counting_env):  # noqa
         torch.testing.assert_close(r0["next", "observation"], r1["next", "observation"])
         torch.testing.assert_close(r0["next", "done"], r1["next", "done"])
 
+    @implement_for("gym")
+    def test_resetting_strategies(self):
+        return
+
+    @implement_for("gymnasium", None, "1.0.0")
     @pytest.mark.parametrize("heterogeneous", [False, True])
-    def test_resetting_strategies(self, heterogeneous):
+    def test_resetting_strategies(self, heterogeneous):  # noqa
+        self._test_resetting_strategies(heterogeneous, {})
+
+    @implement_for("gymnasium", "1.1.0")
+    @pytest.mark.parametrize("heterogeneous", [False, True])
+    def test_resetting_strategies(self, heterogeneous):  # noqa
+        import gymnasium as gym
+
+        self._test_resetting_strategies(
+            heterogeneous, {"autoreset_mode": gym.vector.AutoresetMode.SAME_STEP}
+        )
+
+    def _test_resetting_strategies(self, heterogeneous, kwargs):
         if _has_gymnasium:
             backend = "gymnasium"
         else:
@@ -1533,7 +1550,8 @@ def test_resetting_strategies(self, heterogeneous):
                 env = GymWrapper(
                     gym_backend().vector.AsyncVectorEnv(
                         [functools.partial(self._get_dummy_gym_env, backend=backend)]
-                        * 4
+                        * 4,
+                        **kwargs,
                     )
                 )
             else:
@@ -1546,7 +1564,8 @@ def test_resetting_strategies(self, heterogeneous):
                                 backend=backend,
                             )
                             for i in range(4)
-                        ]
+                        ],
+                        **kwargs,
                     )
                 )
             try:
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -2214,7 +2214,7 @@ def register_gym(
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2276,12 +2276,12 @@ def register_gym(
                 enforcer wrapper should be applied to ensure users run functions
                 in the correct order.
                 Defaults to ``True``.
-            autoreset (bool, optional): [Gym >= 0.14] Whether the autoreset wrapper
+            autoreset (bool, optional): [Gym >= 0.14 and <1.0.0] Whether the autoreset wrapper
                 should be added such that reset does not need to be called.
                 Defaults to ``False``.
             disable_env_checker: [Gym >= 0.14] Whether the environment
                 checker should be disabled for the environment. Defaults to ``False``.
-            apply_api_compatibility: [Gym >= 0.26] If to apply the `StepAPICompatibility` wrapper.
+            apply_api_compatibility: [Gym >= 0.26 and <1.0.0] If to apply the `StepAPICompatibility` wrapper.
                 Defaults to ``False``.
             **kwargs: arbitrary keyword arguments which are passed to the environment constructor.
 
@@ -2403,7 +2403,7 @@ def _register_gym(
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2428,7 +2428,7 @@ def _register_gym(
             nondeterministic=nondeterministic,
             max_episode_steps=max_episode_steps,
             order_enforce=order_enforce,
-            autoreset=autoreset,
+            autoreset=bool(autoreset),
             disable_env_checker=disable_env_checker,
             apply_api_compatibility=apply_api_compatibility,
         )
@@ -2445,7 +2445,7 @@ def _register_gym(  # noqa: F811
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2477,7 +2477,7 @@ def _register_gym(  # noqa: F811
             nondeterministic=nondeterministic,
             max_episode_steps=max_episode_steps,
             order_enforce=order_enforce,
-            autoreset=autoreset,
+            autoreset=bool(autoreset),
             disable_env_checker=disable_env_checker,
         )
 
@@ -2493,7 +2493,7 @@ def _register_gym(  # noqa: F811
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2531,7 +2531,7 @@ def _register_gym(  # noqa: F811
             nondeterministic=nondeterministic,
             max_episode_steps=max_episode_steps,
             order_enforce=order_enforce,
-            autoreset=autoreset,
+            autoreset=bool(autoreset),
         )
 
     @implement_for("gym", "0.21", "0.24", class_method=True)
@@ -2546,7 +2546,7 @@ def _register_gym(  # noqa: F811
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2565,7 +2565,7 @@ def _register_gym(  # noqa: F811
                     "disable_env_checker", gym.__version__
                 )
             )
-        if autoreset is not False:
+        if autoreset is not None:
             raise TypeError(
                 cls._GYM_UNRECOGNIZED_KWARG.format("autoreset", gym.__version__)
             )
@@ -2602,7 +2602,7 @@ def _register_gym(  # noqa: F811
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2620,7 +2620,7 @@ def _register_gym(  # noqa: F811
                     "disable_env_checker", gym.__version__
                 )
             )
-        if autoreset is not False:
+        if autoreset is not None:
             raise TypeError(
                 cls._GYM_UNRECOGNIZED_KWARG.format("autoreset", gym.__version__)
             )
@@ -2648,7 +2648,7 @@ def _register_gym(  # noqa: F811
             max_episode_steps=max_episode_steps,
         )
 
-    @implement_for("gymnasium", class_method=True)
+    @implement_for("gymnasium", None, "1.0.0", class_method=True)
     def _register_gym(  # noqa: F811
         cls,
         id,
@@ -2660,7 +2660,7 @@ def _register_gym(  # noqa: F811
         nondeterministic: bool = False,
         max_episode_steps: int | None = None,
         order_enforce: bool = True,
-        autoreset: bool = False,
+        autoreset: bool | None = None,
         disable_env_checker: bool = False,
         apply_api_compatibility: bool = False,
         **kwargs,
@@ -2686,11 +2686,62 @@ def _register_gym(  # noqa: F811
             nondeterministic=nondeterministic,
             max_episode_steps=max_episode_steps,
             order_enforce=order_enforce,
-            autoreset=autoreset,
+            autoreset=bool(autoreset),
             disable_env_checker=disable_env_checker,
             apply_api_compatibility=apply_api_compatibility,
         )
 
+    @implement_for("gymnasium", "1.1.0", class_method=True)
+    def _register_gym(  # noqa: F811
+        cls,
+        id,
+        entry_point: Callable | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
+        to_numpy: bool = False,
+        reward_threshold: float | None = None,
+        nondeterministic: bool = False,
+        max_episode_steps: int | None = None,
+        order_enforce: bool = True,
+        autoreset: bool | None = None,
+        disable_env_checker: bool = False,
+        apply_api_compatibility: bool = False,
+        **kwargs,
+    ):
+        import gymnasium
+        from torchrl.envs.libs._gym_utils import _TorchRLGymnasiumWrapper
+
+        if autoreset is not None:
+            raise TypeError(
+                f"the autoreset argument is deprecated in gymnasium>=1.0. Got autoreset={autoreset}"
+            )
+        if entry_point is None:
+            entry_point = cls
+
+        entry_point = partial(
+            _TorchRLGymnasiumWrapper,
+            entry_point=entry_point,
+            info_keys=info_keys,
+            to_numpy=to_numpy,
+            transform=transform,
+            **kwargs,
+        )
+        if apply_api_compatibility is not False:
+            raise TypeError(
+                cls._GYM_UNRECOGNIZED_KWARG.format(
+                    "apply_api_compatibility", gymnasium.__version__
+                )
+            )
+        return gymnasium.register(
+            id=id,
+            entry_point=entry_point,
+            reward_threshold=reward_threshold,
+            nondeterministic=nondeterministic,
+            max_episode_steps=max_episode_steps,
+            order_enforce=order_enforce,
+            disable_env_checker=disable_env_checker,
+        )
+
     def forward(self, *args, **kwargs):
         raise NotImplementedError(
             "EnvBase.forward is not implemented. If you ended here during a call to `ParallelEnv(...)`, please use "
diff --git a/torchrl/envs/libs/_gym_utils.py b/torchrl/envs/libs/_gym_utils.py
@@ -125,7 +125,7 @@ def _action_keys(self):
     import gymnasium
 
     class _TorchRLGymnasiumWrapper(gymnasium.Env, _BaseGymWrapper):
-        @implement_for("gymnasium", "1.0.0")
+        @implement_for("gymnasium", "1.0.0", "1.1.0")
         def step(self, action):  # noqa: F811
             raise ImportError(GYMNASIUM_1_ERROR)
 
@@ -157,9 +157,43 @@ def step(self, action):  # noqa: F811
                 out = tree_map(lambda x: x.detach().cpu().numpy(), out)
             return out
 
+        @implement_for("gymnasium", "1.1.0")
+        def step(self, action):  # noqa: F811
+            action_keys = self._action_keys
+            if len(action_keys) == 1:
+                self._tensordict.set(action_keys[0], action)
+            else:
+                raise RuntimeError(
+                    "Wrapping environments with more than one action key is not supported yet."
+                )
+            self.torchrl_env.step(self._tensordict)
+            _tensordict = step_mdp(self._tensordict)
+            observation = self._tensordict.get("next")
+            if self.info_keys:
+                info = observation.select(*self.info_keys).to_dict()
+            else:
+                info = {}
+            observation = observation.select(*self._observation_keys).to_dict()
+            reward = self._tensordict.get(("next", "reward"))
+            terminated = self._tensordict.get(("next", "terminated"))
+            truncated = self._tensordict.get(
+                ("next", "truncated"), torch.zeros_like(terminated)
+            )
+            self._tensordict = _tensordict.select(*self._input_keys)
+            out = (observation, reward, terminated, truncated, info)
+            if self.to_numpy:
+                out = tree_map(lambda x: x.detach().cpu().numpy(), out)
+            return out
+
         @implement_for("gymnasium", None, "1.0.0")
-        def reset(self):  # noqa: F811
-            self._tensordict = self.torchrl_env.reset()
+        def reset(
+            self, seed: int | None = None, options: dict | None = None
+        ):  # noqa: F811
+            if seed is not None:
+                self.torchrl_env.set_seed(seed)
+            if options is None:
+                options = {}
+            self._tensordict = self.torchrl_env.reset(**options)
             observation = self._tensordict
             if self.info_keys:
                 info = observation.select(*self.info_keys).to_dict()
@@ -171,10 +205,30 @@ def reset(self):  # noqa: F811
                 out = tree_map(lambda x: x.detach().cpu().numpy(), out)
             return out
 
-        @implement_for("gymnasium", "1.0.0")
+        @implement_for("gymnasium", "1.0.0", "1.1.0")
         def reset(self):  # noqa: F811
             raise ImportError(GYMNASIUM_1_ERROR)
 
+        @implement_for("gymnasium", "1.1.0")
+        def reset(  # noqa: F811
+            self, seed: int | None = None, options: dict | None = None
+        ):
+            if seed is not None:
+                self.torchrl_env.set_seed(seed)
+            if options is None:
+                options = {}
+            self._tensordict = self.torchrl_env.reset(**options)
+            observation = self._tensordict
+            if self.info_keys:
+                info = observation.select(*self.info_keys).to_dict()
+            else:
+                info = {}
+            observation = observation.select(*self._observation_keys).to_dict()
+            out = observation, info
+            if self.to_numpy:
+                out = tree_map(lambda x: x.detach().cpu().numpy(), out)
+            return out
+
 else:
 
     class _TorchRLGymnasiumWrapper:
diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -1033,14 +1033,14 @@ def __init__(self, env=None, categorical_action_encoding=False, **kwargs):
 
     @implement_for("gymnasium", "1.1.0")
     def _validate_env(self, env):
-        auto_reset_mode = getattr(env, "auto_reset_mode", None)
-        if auto_reset_mode is not None:
-            from gymnasium import AutoResetMode
+        autoreset_mode = getattr(env, "autoreset_mode", None)
+        if autoreset_mode is not None:
+            from gymnasium.vector import AutoresetMode
 
-            if auto_reset_mode not in (AutoResetMode.DISABLED, AutoResetMode.SAME_STEP):
+            if autoreset_mode not in (AutoresetMode.DISABLED, AutoresetMode.SAME_STEP):
                 raise RuntimeError(
                     "The auto-reset mode must be one of SAME_STEP or DISABLED (which is preferred). Got "
-                    f"auto_reset_mode={auto_reset_mode}."
+                    f"autoreset_mode={autoreset_mode}."
                 )
 
     @implement_for("gym", None, "1.1.0")