pytorch · MartinPavella · Jun 15, 2026
@@ -109,6 +109,43 @@ def node_is_effectively_static_tensor(
     )
 
 
+def weights_are_effectively_static(
+    node: Node, parameters_mapping: dict[str, Parameter], weight_index: int = 1
+) -> bool:
+    """Neutron IR sometimes requires some weights to be static. This method checks if this is the case for the
+        provided `node`.
+
+    Sometimes a `permute_copy` is inserted to transpose the weights during edge lowering. The `permute_copy` is
+    then removed during conversion to Neutron IR if it transposes static data. In those cases, the weights will be
+    static. Therefore, it is ok if the weights are produced by a `permute_copy` with a static input.
+
+    :param node: Tensor node to check for data.
+    :param parameters_mapping: Dict mapping tensor names to their static data. Should be inferred from the
+                                `state_dict` attribute of an edge program.
+    :param weight_index: Index to the `node.args` where the weight is located. Defaults to 1.
+    :return: True if the weight at the given index is effectively static.
+    """
+
+    def _is_permute_copy(node_: Node) -> bool:
+        return hasattr(node_, "target") and node_.target == PermuteCopy
+
+    if (
+        _is_dequantize(dq_node := node.args[weight_index])
+        and _is_quantize(q_node := dq_node.args[0])
+        and _is_permute_copy(permute_copy_node := q_node.args[0])
+    ):
+        # The weights are produced by a `permute_copy`. Its input (the weights) must be static.
+        return node_is_effectively_static_tensor(
+            permute_copy_node.args[0], parameters_mapping
+        )
+
+    else:
+        # There is no `permute_copy`. The weights must be static directly.
+        return node_is_effectively_static_tensor(
+            node.args[weight_index], parameters_mapping
+        )
+
+
 def try_get_tensor_constant_from_node(
     graph_module: GraphModule, node: Node
 ) -> Parameter | None:

@@ -1,9 +1,15 @@
-# Copyright 2024-2025 NXP
+# Copyright 2024-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.edge_helper import input_rank
+import torch
+
+from executorch.backends.nxp.backend.edge_helper import (
+    input_rank,
+    node_is_effectively_static_tensor,
+    weights_are_effectively_static,
+)
 from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
@@ -12,10 +18,18 @@
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
     fully_connected_options,
 )
+
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
 from torch.nn import Parameter
 
 
+# The edge operator signature is: aten.addmm(bias, input, weight, *, beta=1, alpha=1)
+MAIN_INPUT_IDX = 1
+WEIGHT_IDX = 2
+BIAS_IDX = 0
+
+
 class AddMMConverter(NodeConverter):
     """Convert the `aten.addmm` operator to TFLite `FullyConnected` with a bias input."""
 
@@ -29,12 +43,67 @@ def _is_supported_in_IR(
             return False
 
         # The weights must be 2D.
-        if input_rank(node, 2) != 2:
+        if input_rank(node, WEIGHT_IDX) != 2:
+            return False
+
+        alpha, beta = node.kwargs.get("alpha", 1), node.kwargs.get("beta", 1)
+        if alpha != 1 or beta != 1:
+            # As these cases seem rare, conversion is not implemented for the time being.
+            return False
+
+        return True
+
+    @staticmethod
+    def _is_supported_on_target(
+        node: Node,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+        custom_delegation_options: CustomDelegationOptions,
+    ) -> bool:
+        # Main input and output must be `int8` or `uint8`.
+        if not NodeConverter.uses_quantization_type_for_io(
+            node, [torch.int8, torch.uint8], [MAIN_INPUT_IDX], [0]
+        ):
+            return False
+
+        # Weights must be `int8`.
+        if not NodeConverter.uses_quantization_type_for_io(
+            node, [torch.int8], [WEIGHT_IDX], []
+        ):
+            return False
+
+        # Bias must be `int32`.
+        if not NodeConverter.uses_quantization_type_for_io(
+            node, [torch.int32], [BIAS_IDX], []
+        ):
+            return False
+
+        # Weights must be constant.
+        if not weights_are_effectively_static(
+            node, parameters_mapping, weight_index=WEIGHT_IDX
+        ):
+            return False
+
+        # The bias must be constant.
+        if not node_is_effectively_static_tensor(
+            node.args[BIAS_IDX], parameters_mapping
+        ):
             return False
 
         return True
 
     def convert(self, node: Node):
+        """Convert the `aten.addmm` operator to NeutronIR `FullyConnected`.
+        The schema is:
+            addmm(
+                Tensor self,
+                Tensor mat1,
+                Tensor mat2,
+                *,
+                Scalar beta=1,
+                Scalar alpha=1
+            ) -> Tensor
+        """
         self.assert_convertible(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)
@@ -47,14 +116,14 @@ def convert(self, node: Node):
         w = t_op.tmp_inputs[2]
         y = t_op.tmp_outputs[0]
 
-        # Assign the operator its TFLite inputs and outputs
+        # Assign the operator its Neutron IR inputs and outputs
         t_op.tmp_inputs = [x, w, bias]
         t_op.tmp_outputs = [y]
 
         ops = OpsList(middle_op=t_op)
 
         # The `aten.addmm` uses main input with shape [M, N] and the weights have the shape [N, O].
-        # TFLite `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
+        # Neutron IR `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
         # Insert a `Transpose` operator to permute the weights to achieve correct conversion. (The `Transpose` will not
         #  be present in the output model if the weights are static.)
         ops.add_pre(self.builder.create_transpose_operator_before(t_op, 1, [1, 0]))

@@ -1,9 +1,14 @@
-# Copyright 2024-2025 NXP
+# Copyright 2024-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.edge_helper import input_rank
+import torch
+
+from executorch.backends.nxp.backend.edge_helper import (
+    input_rank,
+    weights_are_effectively_static,
+)
 from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
@@ -12,6 +17,7 @@
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
     fully_connected_options,
 )
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
 from torch.nn import Parameter
 
@@ -33,8 +39,37 @@ def _is_supported_in_IR(
 
         return True
 
+    @staticmethod
+    def _is_supported_on_target(
+        node: Node,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+        custom_delegation_options: CustomDelegationOptions,
+    ) -> bool:
+        # Main input and output must be `int8` or `uint8`.
+        if not NodeConverter.uses_quantization_type_for_io(
+            node, [torch.int8, torch.uint8], [0], [0]
+        ):
+            return False
+
+        # Weights must be `int8`.
+        if not NodeConverter.uses_quantization_type_for_io(node, [torch.int8], [1], []):
+            return False
+
+        # Weights must be static.
+        if not weights_are_effectively_static(node, parameters_mapping):
+            return False
+
+        return True
+
     def convert(self, node: Node):
-        """Convert the `aten.mm` operator to TFLite `FullyConnected` without a bias input."""
+        """Convert the `aten.mm` operator to Neutron IR `FullyConnected` without a bias input.
+        The schema is:
+            mm(
+                Tensor self,
+                Tensor mat2
+            ) -> Tensor
+        """
         self.assert_convertible(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)
@@ -44,14 +79,14 @@ def convert(self, node: Node):
         w = t_op.tmp_inputs[1]
         y = t_op.tmp_outputs[0]
 
-        # Assign the operator its TFLite inputs and outputs
+        # Assign the operator its Neutron IR inputs and outputs
         t_op.tmp_inputs = [x, w]
         t_op.tmp_outputs = [y]
 
         ops = OpsList(middle_op=t_op)
 
         # The `aten.mm` uses main input with shape [M, N] and the weights have the shape [N, O].
-        # TFLite `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
+        # Neutron IR `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
         # Insert a `Transpose` operator to permute the weights to achieve correct conversion. (The `Transpose` will not
         #  be present in the output model if the weights are static.)
         ops.add_pre(self.builder.create_transpose_operator_before(t_op, 1, [1, 0]))

@@ -9,6 +9,7 @@
 
 from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass
 from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer
+from executorch.backends.nxp.tests.ops_aliases import PermuteCopy
 
 # noinspection PyProtectedMember
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -109,9 +110,11 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
     main_cluster_node_to_auxiliary_nodes = {
         AddMM: [
             ViewCopy,
+            PermuteCopy,
         ],
         MM: [
             ViewCopy,
+            PermuteCopy,
         ],
         ViewCopy: [Clone, CloneDimOrder],
         Conv: [