From 4d1abb71de874d3ddfe1702b5ba299a116769952 Mon Sep 17 00:00:00 2001 From: Advait Jayant Date: Fri, 12 Jun 2026 00:33:31 +0100 Subject: [PATCH 1/2] Clarify Factory vs FORGE action-space control semantics The Factory and FORGE environments share the CtrlCfg fields pos/rot_action_bounds and pos/rot_action_threshold, but use them in opposite roles: Factory actions are displacements relative to the current end-effector pose (threshold scales the per-step action, bounds clip the target relative to the fixed asset), while FORGE actions are absolute targets relative to the fixed asset (bounds map the action onto the operational volume, the randomized threshold clips the per-step motion). This mirrors the action scale (lambda) defined in Sec. III-B, Eq. 6 of the FORGE paper and its Appendix A randomization ranges, but has repeatedly been read as an accidental swap when comparing the two environments side by side. Document both semantics on CtrlCfg, ForgeCtrlCfg and ForgeEnv._apply_action with references to the paper. Related to #5424 Signed-off-by: Advait Jayant --- .../0xadvait-clarify-forge-ctrl-semantics.rst | 8 ++++++++ .../contrib/factory/factory_env_cfg.py | 12 ++++++++++++ .../isaaclab_tasks/contrib/forge/forge_env.py | 17 ++++++++++++++++- .../contrib/forge/forge_env_cfg.py | 19 +++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst diff --git a/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst b/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst new file mode 100644 index 000000000000..c461b9d0a165 --- /dev/null +++ b/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst @@ -0,0 +1,8 @@ +Changed +^^^^^^^ + +* Clarified the documentation of the action-space semantics in the Factory and FORGE control + configurations. In the Factory environments, the action thresholds scale per-step end-effector + displacements and the action bounds clip the target relative to the fixed asset, while in the + FORGE environments the action bounds map actions onto the operational volume around the fixed + asset and the randomized action thresholds clip the per-step motion, following the FORGE paper. diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py index a250380e2566..43286d5e6bcc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py @@ -50,6 +50,18 @@ class ObsRandCfg: @configclass class CtrlCfg: + """Controller and action-space configuration for the Factory environments. + + In the Factory environments, policy actions are interpreted as displacements relative to the + current end-effector pose: ``pos_action_threshold`` [m] and ``rot_action_threshold`` [rad] scale + the normalized policy action to a per-step target displacement, while ``pos_action_bounds`` [m] + clips the resulting position target relative to the fixed asset to bound the workspace. + + The FORGE environments reuse this configuration with a different action space (absolute targets + relative to the fixed asset), in which these parameters play different roles. See ``ForgeCtrlCfg`` + in ``isaaclab_tasks/contrib/forge/forge_env_cfg.py``. + """ + ema_factor = 0.2 pos_action_bounds = [0.05, 0.05, 0.05] diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py index 5dda4205256f..d50855f51662 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py @@ -150,7 +150,22 @@ def _get_observations(self): return {"policy": obs_tensors, "critic": state_tensors} def _apply_action(self): - """FORGE actions are defined as targets relative to the fixed asset.""" + """FORGE actions are defined as targets relative to the fixed asset. + + Unlike the Factory environments, where actions are interpreted as displacements relative to the + current end-effector pose, FORGE actions encode absolute pose targets relative to the fixed asset. + As a result, the control parameters play different roles in the two environment families: + + * ``pos_action_bounds`` and ``rot_action_bounds`` map the normalized policy action onto the + operational volume around the fixed asset. + * ``pos_threshold`` and ``rot_threshold`` clip the per-step motion of the target relative to the + current end-effector pose. They correspond to the action scale (lambda) in the FORGE paper, + which is randomized per episode as part of the dynamics randomization scheme and exposed to + the critic as privileged state. + + Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich + Manipulation under Uncertainty", Sec. III-B, Eq. 6. https://arxiv.org/abs/2408.04587 + """ if self.last_update_timestamp < self._robot._data._sim_timestamp: self._compute_intermediate_values(dt=self.physics_dt) diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py index 1fb870f4191b..daa89853e2e3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py @@ -26,6 +26,25 @@ @configclass class ForgeCtrlCfg(CtrlCfg): + """Controller and action-space configuration for the FORGE environments. + + In the FORGE environments, policy actions encode absolute pose targets relative to the fixed + asset: ``pos_action_bounds`` [m] and ``rot_action_bounds`` [rad] map the normalized policy action + onto the operational volume around the fixed asset, while ``pos_action_threshold`` [m] and + ``rot_action_threshold`` [rad] clip the per-step motion of the target relative to the current + end-effector pose. This differs from the Factory environments, where actions are displacements + relative to the current end-effector pose and the same parameters play the opposite roles. + + The per-step clips correspond to the action scale (lambda) in the FORGE paper (Sec. III-B, Eq. 6). + They are randomized per episode via ``pos_threshold_noise_level`` and ``rot_threshold_noise_level`` + as part of the dynamics randomization scheme and are exposed to the critic as privileged state. + With the default values, the randomized position action scale spans [1.6, 2.5] cm, matching the + randomization parameters in Appendix A of the paper. + + Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich + Manipulation under Uncertainty". https://arxiv.org/abs/2408.04587 + """ + ema_factor_range = [0.025, 0.1] default_task_prop_gains = [565.0, 565.0, 565.0, 28.0, 28.0, 28.0] task_prop_gains_noise_level = [0.41, 0.41, 0.41, 0.41, 0.41, 0.41] From 33cd34b5c7925ab3eea2149a289a8f8cf065f6d8 Mon Sep 17 00:00:00 2001 From: Advait Jayant Date: Fri, 12 Jun 2026 00:56:04 +0100 Subject: [PATCH 2/2] Use config field names in ForgeEnv action docstring Reference pos_action_threshold/rot_action_threshold consistently with the ForgeCtrlCfg docstring, and note the runtime tensors they are applied through. Signed-off-by: Advait Jayant --- .../isaaclab_tasks/contrib/forge/forge_env.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py index d50855f51662..7d7090aa8e10 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py @@ -158,10 +158,11 @@ def _apply_action(self): * ``pos_action_bounds`` and ``rot_action_bounds`` map the normalized policy action onto the operational volume around the fixed asset. - * ``pos_threshold`` and ``rot_threshold`` clip the per-step motion of the target relative to the - current end-effector pose. They correspond to the action scale (lambda) in the FORGE paper, - which is randomized per episode as part of the dynamics randomization scheme and exposed to - the critic as privileged state. + * ``pos_action_threshold`` and ``rot_action_threshold`` clip the per-step motion of the target + relative to the current end-effector pose. They are applied through the per-environment + ``pos_threshold`` and ``rot_threshold`` tensors and correspond to the action scale (lambda) + in the FORGE paper, which is randomized per episode as part of the dynamics randomization + scheme and exposed to the critic as privileged state. Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich Manipulation under Uncertainty", Sec. III-B, Eq. 6. https://arxiv.org/abs/2408.04587