Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions backends/nxp/backend/edge_program_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,28 @@ class EdgeProgramToIRConverter:
_default_target_spec = NeutronTargetSpec("imxrt700")
_default_delegation_options = CustomDelegationOptions()

def __init__(self):
self.edge_to_tflite_map = {}

def convert_program(
self,
edge_program: ExportedProgram,
conversion_config: ConversionConfig = _default_conversion_config,
neutron_target_spec: NeutronTargetSpec = _default_target_spec,
custom_delegation_options: CustomDelegationOptions = _default_delegation_options,
) -> tuple[bytes, dict[str, DataFormat]]:
) -> tuple[bytes, dict[str, DataFormat], dict[int, tuple[int, ...]]]:
"""
Convert ExportedProgram in Edge dialect to IR (TFLite flatbuffers) as bytes.

:param edge_program: Converter ExportedProgram.
:param conversion_config: ConversionConfig instance.
:param neutron_target_spec: Object for querying the target platform to retrieve its properties.
:param custom_delegation_options: Custom user options which affect node delegation.
:return: TFLite flatbuffers as bytes.
:return: TFLite flatbuffers as bytes, I/O formats, and edge-to-tflite mapping.
"""
# Reset the edge to tflite map for each conversion
self.edge_to_tflite_map = {}

parameters_mapping = self.map_inputs_to_parameters(edge_program)
dim_order_map = self.map_nodes_to_dim_order(edge_program)

Expand All @@ -110,14 +116,17 @@ def convert_program(
# Apply optimizations and finalize the model.
internal_tflite_model = cc.tflite_builder.finish()

# Get the final edge to tflite mapping after optimization
self.edge_to_tflite_map = cc.tflite_builder.edge_to_tflite_map

# Extract the formats of the model's inputs and outputs.
io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)

# TFLite model generation
flatbuffers_builder = flatbuffers.Builder()
internal_tflite_model.gen_tflite(flatbuffers_builder)

return bytes(flatbuffers_builder.Output()), io_formats
return bytes(flatbuffers_builder.Output()), io_formats, self.edge_to_tflite_map

@staticmethod
def append_placeholders_and_tensors(nodes: list[Node], context: ConversionContext):
Expand Down Expand Up @@ -159,7 +168,6 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
]

for node in nodes:
if node.op == "call_function":
if node.target in qdq_related_functions and "cluster" in node.meta:
Expand All @@ -171,7 +179,22 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
# The node was already processed alongside the Q/DQ ops.
pass
elif node.target in functions_converters:
# Get TFLite op count BEFORE conversion
tflite_op_count_before = len(conversion_context.tflite_builder.get_operators().vector)
# Convert the node
functions_converters[node.target](conversion_context).convert(node)
# Get TFLite op count AFTER conversion
tflite_op_count_after = len(conversion_context.tflite_builder.get_operators().vector)

# Track the mapping - store edge debug handle in operators
edge_debug_handle = node.meta.get("debug_handle", None)
if edge_debug_handle is not None and tflite_op_count_after > tflite_op_count_before:
operators = conversion_context.tflite_builder.get_operators().vector
for i in range(tflite_op_count_before, tflite_op_count_after):
# Store edge debug handle in operator's temporary attribute
operators[i].tmp_edge_debug_handle = edge_debug_handle
logger.i(f"Tagged TFLite ops {list(range(tflite_op_count_before, tflite_op_count_after))} with edge debug_handle={edge_debug_handle} for node '{node.name}'")

else:
logger.e(
logger.Code.NOT_IMPLEMENTED,
Expand Down
24 changes: 24 additions & 0 deletions backends/nxp/backend/ir/converter/builder/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class ModelBuilder:

conversion_config: ConversionConfig

edge_to_tflite_map: dict[int, tuple[int, ...]] # Mapping edge debug handles to tuple of TFLite operator indices

_default_conversion_config = ConversionConfig()

def __init__(
Expand All @@ -105,6 +107,7 @@ def __init__(
self._nchw_tensor_version = {}
self._skipped_output_map = {}
self._zeros_tensor_map = {}
self.edge_to_tflite_map = {}

def create_zeros_tensor(
self, dims: List[int], name: str, dtype: np.dtype, can_reuse: bool = False
Expand Down Expand Up @@ -503,6 +506,9 @@ def finish(self) -> tflite_model.Model:
self.conversion_config.optimization_blacklist,
)

# Create the final edge-to-tflite mapping after model optimization
self._create_edge_to_tflite_mapping()

self._keep_one_empty_buffer()

# Remove outputs, which are not produced by any node. Otherwise, there would be errors after inference.
Expand All @@ -524,6 +530,24 @@ def finish(self) -> tflite_model.Model:

return self._tfl_model

def _create_edge_to_tflite_mapping(self):
"""Create edge-to-TFLite mapping and save it to the edge_to_tflite_map class variable.

This function should be called after all model optimizations have been applied to match the output TFLite model.
"""

edge_to_tflite_dict = {}
for idx, op in enumerate(self.get_operators().vector):
if hasattr(op, 'tmp_edge_debug_handle') and op.tmp_edge_debug_handle is not None:
debug_handle = op.tmp_edge_debug_handle
if debug_handle not in edge_to_tflite_dict:
edge_to_tflite_dict[debug_handle] = []
edge_to_tflite_dict[debug_handle].append(idx)

# Convert lists to tuples in the dictionary
self.edge_to_tflite_map = {k: tuple(v) for k, v in edge_to_tflite_dict.items()}
logger.i(f"\nFinal edge_to_tflite_map after optimization: {self.edge_to_tflite_map}")

def _assign_io_tensor_indices(self, inputs, outputs, allow_inputs_stripping: bool):
for tensor in outputs.tmp_outputs:
try:
Expand Down
5 changes: 5 additions & 0 deletions backends/nxp/backend/ir/tflite_generator/tflite_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ class Operator(meta.TFLiteObject):
# If `True`, this is an extra operator added during conversion. It was not present in the original input model.
tmp_added_extra: bool

# Edge program debug handle for mapping edge nodes to TFLite operators
tmp_edge_debug_handle: Optional[int]

def __init__(
self,
inputs: OperatorInputs = None,
Expand Down Expand Up @@ -541,6 +544,8 @@ def __init__(
self.tmp_version = 1
self.tmp_added_extra = False

self.tmp_edge_debug_handle = None

def uses_per_channel_quantization(self) -> bool:
"""Determine if this operator uses per-channel quantization."""
for tensor in itertools.chain(self.tmp_inputs, self.tmp_outputs):
Expand Down
9 changes: 9 additions & 0 deletions backends/nxp/backend/neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def convert(
delegation_tag: str,
fetch_constants_to_sram: bool = False,
use_new_flow_neutron_c: bool = False,
use_profiling: bool = False,
) -> bytes:
"""
Call Neutron Converter.
Expand All @@ -77,6 +78,7 @@ def convert(
:param delegation_tag: The delegation tag of model partition.
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
:param use_profiling: Enable profiling for neutron delegated model.
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).

:return: TFLite model with Neutron microcode as bytes.
Expand All @@ -95,6 +97,13 @@ def convert(
if hasattr(cctx.compilationOpts, "useNewFlowNeutronC"):
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c

if use_profiling:
cctx.compilationOpts.useProfiling = use_profiling
cctx.compilationOpts.dumpAfterImport = "console"
cctx.compilationOpts.dumpAfterGenerate = "console"
cctx.compilationOpts.verbose = True
#cctx.compilationOpts.dumpGraphs = 1

# Try to use multiprocessing for isolation, but fall back to direct execution
# if the environment doesn't support it (e.g., in sandcastle/build environments)
try:
Expand Down
Loading
Loading