diff --git a/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst b/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst new file mode 100644 index 000000000000..c461b9d0a165 --- /dev/null +++ b/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst @@ -0,0 +1,8 @@ +Changed +^^^^^^^ + +* Clarified the documentation of the action-space semantics in the Factory and FORGE control + configurations. In the Factory environments, the action thresholds scale per-step end-effector + displacements and the action bounds clip the target relative to the fixed asset, while in the + FORGE environments the action bounds map actions onto the operational volume around the fixed + asset and the randomized action thresholds clip the per-step motion, following the FORGE paper. diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py index a250380e2566..43286d5e6bcc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py @@ -50,6 +50,18 @@ class ObsRandCfg: @configclass class CtrlCfg: + """Controller and action-space configuration for the Factory environments. + + In the Factory environments, policy actions are interpreted as displacements relative to the + current end-effector pose: ``pos_action_threshold`` [m] and ``rot_action_threshold`` [rad] scale + the normalized policy action to a per-step target displacement, while ``pos_action_bounds`` [m] + clips the resulting position target relative to the fixed asset to bound the workspace. + + The FORGE environments reuse this configuration with a different action space (absolute targets + relative to the fixed asset), in which these parameters play different roles. See ``ForgeCtrlCfg`` + in ``isaaclab_tasks/contrib/forge/forge_env_cfg.py``. + """ + ema_factor = 0.2 pos_action_bounds = [0.05, 0.05, 0.05] diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py index 5dda4205256f..7d7090aa8e10 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py @@ -150,7 +150,23 @@ def _get_observations(self): return {"policy": obs_tensors, "critic": state_tensors} def _apply_action(self): - """FORGE actions are defined as targets relative to the fixed asset.""" + """FORGE actions are defined as targets relative to the fixed asset. + + Unlike the Factory environments, where actions are interpreted as displacements relative to the + current end-effector pose, FORGE actions encode absolute pose targets relative to the fixed asset. + As a result, the control parameters play different roles in the two environment families: + + * ``pos_action_bounds`` and ``rot_action_bounds`` map the normalized policy action onto the + operational volume around the fixed asset. + * ``pos_action_threshold`` and ``rot_action_threshold`` clip the per-step motion of the target + relative to the current end-effector pose. They are applied through the per-environment + ``pos_threshold`` and ``rot_threshold`` tensors and correspond to the action scale (lambda) + in the FORGE paper, which is randomized per episode as part of the dynamics randomization + scheme and exposed to the critic as privileged state. + + Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich + Manipulation under Uncertainty", Sec. III-B, Eq. 6. https://arxiv.org/abs/2408.04587 + """ if self.last_update_timestamp < self._robot._data._sim_timestamp: self._compute_intermediate_values(dt=self.physics_dt) diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py index 1fb870f4191b..daa89853e2e3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py @@ -26,6 +26,25 @@ @configclass class ForgeCtrlCfg(CtrlCfg): + """Controller and action-space configuration for the FORGE environments. + + In the FORGE environments, policy actions encode absolute pose targets relative to the fixed + asset: ``pos_action_bounds`` [m] and ``rot_action_bounds`` [rad] map the normalized policy action + onto the operational volume around the fixed asset, while ``pos_action_threshold`` [m] and + ``rot_action_threshold`` [rad] clip the per-step motion of the target relative to the current + end-effector pose. This differs from the Factory environments, where actions are displacements + relative to the current end-effector pose and the same parameters play the opposite roles. + + The per-step clips correspond to the action scale (lambda) in the FORGE paper (Sec. III-B, Eq. 6). + They are randomized per episode via ``pos_threshold_noise_level`` and ``rot_threshold_noise_level`` + as part of the dynamics randomization scheme and are exposed to the critic as privileged state. + With the default values, the randomized position action scale spans [1.6, 2.5] cm, matching the + randomization parameters in Appendix A of the paper. + + Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich + Manipulation under Uncertainty". https://arxiv.org/abs/2408.04587 + """ + ema_factor_range = [0.025, 0.1] default_task_prop_gains = [565.0, 565.0, 565.0, 28.0, 28.0, 28.0] task_prop_gains_noise_level = [0.41, 0.41, 0.41, 0.41, 0.41, 0.41]