Cherry-pick: Fix torchrl scripts for PT 2.6 TorchRL>=0.6 (#3199) (#3266)

svekars · vmoens · web-flow · commit 8604b4be1e04 · 2025-01-27T14:27:52.000-08:00
Fixes #3195 Fixing TorchRL scripts for Pytorch 2.6 release Co-authored-by: Vincent Moens <vmoens@meta.com>
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -51,7 +51,6 @@
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
     "intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed.
-    "advanced_source/pendulum",
 ]
 
 def tutorial_source_dirs() -> List[Path]:
diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
         record_frames=1000,
         policy_exploration=actor_model_explore,
         environment=environment,
-        exploration_type=ExplorationType.MEAN,
+        exploration_type=ExplorationType.DETERMINISTIC,
         record_interval=record_interval,
     )
     return recorder_obj
diff --git a/advanced_source/pendulum.py b/advanced_source/pendulum.py
@@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
     env,
     # ``Unsqueeze`` the observations that we will concatenate
     UnsqueezeTransform(
-        unsqueeze_dim=-1,
+        dim=-1,
         in_keys=["th", "thdot"],
         in_keys_inv=["th", "thdot"],
     ),
diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py
@@ -433,7 +433,7 @@
     exploration_module.step(data.numel())
     updater.step()
 
-    with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+    with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
         rollout = env.rollout(10000, stoch_policy)
         traj_lens.append(rollout.get(("next", "step_count")).max().item())
 
diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
@@ -419,8 +419,8 @@
     in_keys=["loc", "scale"],
     distribution_class=TanhNormal,
     distribution_kwargs={
-        "min": env.action_spec.space.low,
-        "max": env.action_spec.space.high,
+        "low": env.action_spec.space.low,
+        "high": env.action_spec.space.high,
     },
     return_log_prob=True,
     # we'll need the log-prob for the numerator of the importance weights

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,6 @@`
`51`	`51`	`"intermediate_source/text_to_speech_with_torchaudio",`
`52`	`52`	`"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.`
`53`	`53`	`"intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed.`
`54`		`- "advanced_source/pendulum",`
`55`	`54`	`]`
`56`	`55`
`57`	`56`	`def tutorial_source_dirs() -> List[Path]:`
Original file line number	Diff line number	Diff line change
`@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):`
`893`	`893`	`record_frames=1000,`
`894`	`894`	`policy_exploration=actor_model_explore,`
`895`	`895`	`environment=environment,`
`896`		`- exploration_type=ExplorationType.MEAN,`
	`896`	`+ exploration_type=ExplorationType.DETERMINISTIC,`
`897`	`897`	`record_interval=record_interval,`
`898`	`898`	`)`
`899`	`899`	`return recorder_obj`