Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions backends/nxp/backend/edge_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,43 @@ def node_is_effectively_static_tensor(
)


def weights_are_effectively_static(
node: Node, parameters_mapping: dict[str, Parameter], weight_index: int = 1
) -> bool:
"""Neutron IR sometimes requires some weights to be static. This method checks if this is the case for the
provided `node`.

Sometimes a `permute_copy` is inserted to transpose the weights during edge lowering. The `permute_copy` is
then removed during conversion to Neutron IR if it transposes static data. In those cases, the weights will be
static. Therefore, it is ok if the weights are produced by a `permute_copy` with a static input.

:param node: Tensor node to check for data.
:param parameters_mapping: Dict mapping tensor names to their static data. Should be inferred from the
`state_dict` attribute of an edge program.
:param weight_index: Index to the `node.args` where the weight is located. Defaults to 1.
:return: True if the weight at the given index is effectively static.
"""

def _is_permute_copy(node_: Node) -> bool:
return hasattr(node_, "target") and node_.target == PermuteCopy

if (
_is_dequantize(dq_node := node.args[weight_index])
and _is_quantize(q_node := dq_node.args[0])
and _is_permute_copy(permute_copy_node := q_node.args[0])
):
# The weights are produced by a `permute_copy`. Its input (the weights) must be static.
return node_is_effectively_static_tensor(
permute_copy_node.args[0], parameters_mapping
)

else:
# There is no `permute_copy`. The weights must be static directly.
return node_is_effectively_static_tensor(
node.args[weight_index], parameters_mapping
)


def try_get_tensor_constant_from_node(
graph_module: GraphModule, node: Node
) -> Parameter | None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
# Copyright 2024-2025 NXP
# Copyright 2024-2026 NXP
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.backends.nxp.backend.edge_helper import input_rank
import torch

from executorch.backends.nxp.backend.edge_helper import (
input_rank,
node_is_effectively_static_tensor,
weights_are_effectively_static,
)
from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
from executorch.backends.nxp.backend.ir.converter.node_converter import (
CustomDelegationOptions,
Expand All @@ -12,10 +18,18 @@
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
fully_connected_options,
)

from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
from torch.fx import Node
from torch.nn import Parameter


# The edge operator signature is: aten.addmm(bias, input, weight, *, beta=1, alpha=1)
MAIN_INPUT_IDX = 1
WEIGHT_IDX = 2
BIAS_IDX = 0


class AddMMConverter(NodeConverter):
"""Convert the `aten.addmm` operator to TFLite `FullyConnected` with a bias input."""

Expand All @@ -29,12 +43,67 @@ def _is_supported_in_IR(
return False

# The weights must be 2D.
if input_rank(node, 2) != 2:
if input_rank(node, WEIGHT_IDX) != 2:
return False

alpha, beta = node.kwargs.get("alpha", 1), node.kwargs.get("beta", 1)
if alpha != 1 or beta != 1:
# As these cases seem rare, conversion is not implemented for the time being.
return False

return True

@staticmethod
def _is_supported_on_target(
node: Node,
neutron_target_spec: NeutronTargetSpec,
parameters_mapping: dict[str, Parameter],
custom_delegation_options: CustomDelegationOptions,
) -> bool:
# Main input and output must be `int8` or `uint8`.
if not NodeConverter.uses_quantization_type_for_io(
node, [torch.int8, torch.uint8], [MAIN_INPUT_IDX], [0]
):
return False

# Weights must be `int8`.
if not NodeConverter.uses_quantization_type_for_io(
node, [torch.int8], [WEIGHT_IDX], []
):
return False

# Bias must be `int32`.
if not NodeConverter.uses_quantization_type_for_io(
node, [torch.int32], [BIAS_IDX], []
):
return False

# Weights must be constant.
if not weights_are_effectively_static(
node, parameters_mapping, weight_index=WEIGHT_IDX
):
return False

# The bias must be constant.
if not node_is_effectively_static_tensor(
node.args[BIAS_IDX], parameters_mapping
):
return False

return True

def convert(self, node: Node):
"""Convert the `aten.addmm` operator to NeutronIR `FullyConnected`.
The schema is:
addmm(
Tensor self,
Tensor mat1,
Tensor mat2,
*,
Scalar beta=1,
Scalar alpha=1
) -> Tensor
"""
self.assert_convertible(node)

t_op = self._create_tflite_op_with_io_tensors(node)
Expand All @@ -47,14 +116,14 @@ def convert(self, node: Node):
w = t_op.tmp_inputs[2]
y = t_op.tmp_outputs[0]

# Assign the operator its TFLite inputs and outputs
# Assign the operator its Neutron IR inputs and outputs
t_op.tmp_inputs = [x, w, bias]
t_op.tmp_outputs = [y]

ops = OpsList(middle_op=t_op)

# The `aten.addmm` uses main input with shape [M, N] and the weights have the shape [N, O].
# TFLite `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
# Neutron IR `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
# Insert a `Transpose` operator to permute the weights to achieve correct conversion. (The `Transpose` will not
# be present in the output model if the weights are static.)
ops.add_pre(self.builder.create_transpose_operator_before(t_op, 1, [1, 0]))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# Copyright 2024-2025 NXP
# Copyright 2024-2026 NXP
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.backends.nxp.backend.edge_helper import input_rank
import torch

from executorch.backends.nxp.backend.edge_helper import (
input_rank,
weights_are_effectively_static,
)
from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
from executorch.backends.nxp.backend.ir.converter.node_converter import (
CustomDelegationOptions,
Expand All @@ -12,6 +17,7 @@
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
fully_connected_options,
)
from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
from torch.fx import Node
from torch.nn import Parameter

Expand All @@ -33,8 +39,37 @@ def _is_supported_in_IR(

return True

@staticmethod
def _is_supported_on_target(
node: Node,
neutron_target_spec: NeutronTargetSpec,
parameters_mapping: dict[str, Parameter],
custom_delegation_options: CustomDelegationOptions,
) -> bool:
# Main input and output must be `int8` or `uint8`.
if not NodeConverter.uses_quantization_type_for_io(
node, [torch.int8, torch.uint8], [0], [0]
):
return False

# Weights must be `int8`.
if not NodeConverter.uses_quantization_type_for_io(node, [torch.int8], [1], []):
return False

# Weights must be static.
if not weights_are_effectively_static(node, parameters_mapping):
return False

return True

def convert(self, node: Node):
"""Convert the `aten.mm` operator to TFLite `FullyConnected` without a bias input."""
"""Convert the `aten.mm` operator to Neutron IR `FullyConnected` without a bias input.
The schema is:
mm(
Tensor self,
Tensor mat2
) -> Tensor
"""
self.assert_convertible(node)

t_op = self._create_tflite_op_with_io_tensors(node)
Expand All @@ -44,14 +79,14 @@ def convert(self, node: Node):
w = t_op.tmp_inputs[1]
y = t_op.tmp_outputs[0]

# Assign the operator its TFLite inputs and outputs
# Assign the operator its Neutron IR inputs and outputs
t_op.tmp_inputs = [x, w]
t_op.tmp_outputs = [y]

ops = OpsList(middle_op=t_op)

# The `aten.mm` uses main input with shape [M, N] and the weights have the shape [N, O].
# TFLite `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
# Neutron IR `FullyConnected` requires the weights to have shape [O, N] (if the main input has shape [M, N]).
# Insert a `Transpose` operator to permute the weights to achieve correct conversion. (The `Transpose` will not
# be present in the output model if the weights are static.)
ops.add_pre(self.builder.create_transpose_operator_before(t_op, 1, [1, 0]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass
from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer
from executorch.backends.nxp.tests.ops_aliases import PermuteCopy

# noinspection PyProtectedMember
from executorch.exir.dialects._ops import ops as exir_ops
Expand Down Expand Up @@ -109,9 +110,11 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
main_cluster_node_to_auxiliary_nodes = {
AddMM: [
ViewCopy,
PermuteCopy,
],
MM: [
ViewCopy,
PermuteCopy,
],
ViewCopy: [Clone, CloneDimOrder],
Conv: [
Expand Down
Loading
Loading