isaac-sim · 0xadvait · Jun 11, 2026 · Jun 11, 2026
diff --git a/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst b/source/isaaclab_tasks/changelog.d/0xadvait-clarify-forge-ctrl-semantics.rst
@@ -0,0 +1,8 @@
+Changed
+^^^^^^^
+
+* Clarified the documentation of the action-space semantics in the Factory and FORGE control
+  configurations. In the Factory environments, the action thresholds scale per-step end-effector
+  displacements and the action bounds clip the target relative to the fixed asset, while in the
+  FORGE environments the action bounds map actions onto the operational volume around the fixed
+  asset and the randomized action thresholds clip the per-step motion, following the FORGE paper.
diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/factory/factory_env_cfg.py
@@ -50,6 +50,18 @@ class ObsRandCfg:
 
 @configclass
 class CtrlCfg:
+    """Controller and action-space configuration for the Factory environments.
+
+    In the Factory environments, policy actions are interpreted as displacements relative to the
+    current end-effector pose: ``pos_action_threshold`` [m] and ``rot_action_threshold`` [rad] scale
+    the normalized policy action to a per-step target displacement, while ``pos_action_bounds`` [m]
+    clips the resulting position target relative to the fixed asset to bound the workspace.
+
+    The FORGE environments reuse this configuration with a different action space (absolute targets
+    relative to the fixed asset), in which these parameters play different roles. See ``ForgeCtrlCfg``
+    in ``isaaclab_tasks/contrib/forge/forge_env_cfg.py``.
+    """
+
     ema_factor = 0.2
 
     pos_action_bounds = [0.05, 0.05, 0.05]

diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env.py
@@ -150,7 +150,23 @@ def _get_observations(self):
         return {"policy": obs_tensors, "critic": state_tensors}
 
     def _apply_action(self):
-        """FORGE actions are defined as targets relative to the fixed asset."""
+        """FORGE actions are defined as targets relative to the fixed asset.
+
+        Unlike the Factory environments, where actions are interpreted as displacements relative to the
+        current end-effector pose, FORGE actions encode absolute pose targets relative to the fixed asset.
+        As a result, the control parameters play different roles in the two environment families:
+
+        * ``pos_action_bounds`` and ``rot_action_bounds`` map the normalized policy action onto the
+          operational volume around the fixed asset.
+        * ``pos_action_threshold`` and ``rot_action_threshold`` clip the per-step motion of the target
+          relative to the current end-effector pose. They are applied through the per-environment
+          ``pos_threshold`` and ``rot_threshold`` tensors and correspond to the action scale (lambda)
+          in the FORGE paper, which is randomized per episode as part of the dynamics randomization
+          scheme and exposed to the critic as privileged state.
+
+        Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich
+        Manipulation under Uncertainty", Sec. III-B, Eq. 6. https://arxiv.org/abs/2408.04587
+        """
         if self.last_update_timestamp < self._robot._data._sim_timestamp:
             self._compute_intermediate_values(dt=self.physics_dt)
 

diff --git a/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/contrib/forge/forge_env_cfg.py
@@ -26,6 +26,25 @@
 
 @configclass
 class ForgeCtrlCfg(CtrlCfg):
+    """Controller and action-space configuration for the FORGE environments.
+
+    In the FORGE environments, policy actions encode absolute pose targets relative to the fixed
+    asset: ``pos_action_bounds`` [m] and ``rot_action_bounds`` [rad] map the normalized policy action
+    onto the operational volume around the fixed asset, while ``pos_action_threshold`` [m] and
+    ``rot_action_threshold`` [rad] clip the per-step motion of the target relative to the current
+    end-effector pose. This differs from the Factory environments, where actions are displacements
+    relative to the current end-effector pose and the same parameters play the opposite roles.
+
+    The per-step clips correspond to the action scale (lambda) in the FORGE paper (Sec. III-B, Eq. 6).
+    They are randomized per episode via ``pos_threshold_noise_level`` and ``rot_threshold_noise_level``
+    as part of the dynamics randomization scheme and are exposed to the critic as privileged state.
+    With the default values, the randomized position action scale spans [1.6, 2.5] cm, matching the
+    randomization parameters in Appendix A of the paper.
+
+    Reference: Noseworthy et al., "FORGE: Force-Guided Exploration for Robust Contact-Rich
+    Manipulation under Uncertainty". https://arxiv.org/abs/2408.04587
+    """
+
     ema_factor_range = [0.025, 0.1]
     default_task_prop_gains = [565.0, 565.0, 565.0, 28.0, 28.0, 28.0]
     task_prop_gains_noise_level = [0.41, 0.41, 0.41, 0.41, 0.41, 0.41]