pytorch · roman-janik-nxp · Jun 2, 2026 · Jun 4, 2026 · robert-kalmar · Jun 15, 2026
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,7 @@ arm-scratch/
 executorch.egg-info
 pip-out/
 build-profiling/
+**/ddr_*_temp
 
 # Any exported models and profiling outputs
 *.bin

@@ -436,7 +436,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
 
         graph_module.recompile()
 
-        operators_not_to_delegate = self.delegation_spec[1][3].value.decode().split(",")
+        operators_not_to_delegate = self.delegation_spec[1][4].value.decode().split(",")
         logging.info(f"Operators not to delegate: {operators_not_to_delegate}")
 
         parameters_mapping = EdgeProgramToIRConverter.map_inputs_to_parameters(

@@ -9,8 +9,9 @@
 #
 
 import logging
+import os
 import struct
-from typing import final, List, Optional
+from typing import final
 
 import numpy as np
 import torch
@@ -45,10 +46,11 @@ class NeutronCompileSpecBuilder:
     config: NeutronTargetSpec
 
     def __init__(self):
-        self.compile_spec: List[CompileSpec] = []
+        self.compile_spec: list[CompileSpec] = []
         self.compiler_flags = []
         self.output_format = None
-        self.operators_not_to_delegate: List[str] = []
+        self.test_dir = None
+        self.operators_not_to_delegate: list[str] = []
         self.use_neutron_for_format_conversion = True
         self.fetch_constants_to_sram = False
         self.dump_kernel_selection_code = False
@@ -62,15 +64,17 @@ def _replace_colons(self, operator: str) -> str:
     def neutron_compile_spec(
         self,
         config: str,
-        extra_flags: Optional[str] = None,
-        operators_not_to_delegate: Optional[List[str]] = None,
+        test_dir: str | None = None,
+        extra_flags: str | None = None,
+        operators_not_to_delegate: list[str] | None = None,
         use_neutron_for_format_conversion: bool = True,
         fetch_constants_to_sram: bool = False,
         dump_kernel_selection_code: bool = False,
     ) -> "NeutronCompileSpecBuilder":
         """Generate compile spec for Neutron NPU
 
         :param config: Neutron accelerator configuration, e.g. "imxrt700"
+        :param test_dir: Test directory to store test related files.
         :param extra_flags: Extra flags for the Neutron compiler
         :param operators_not_to_delegate: List of operators that should not be delegated
         :param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
@@ -83,6 +87,7 @@ def neutron_compile_spec(
         """
 
         self.config = NeutronTargetSpec(config)
+        self.test_dir = test_dir if test_dir is not None else os.getcwd()
 
         assert (
             self.output_format is None
@@ -113,6 +118,7 @@ def build(self):
                 CompileSpec("output_format", "tflite".encode()),
                 CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()),
                 CompileSpec("target", self.config.get_name().encode()),
+                CompileSpec("test_dir", f"{self.test_dir}".encode()),
                 CompileSpec(
                     "operators_not_to_delegate",
                     ",".join(self.operators_not_to_delegate).encode(),
@@ -136,17 +142,19 @@ def build(self):
 
 def generate_neutron_compile_spec(
     config: str,  # The target platform. For example "imxrt700".
-    system_config: Optional[str] = None,
-    extra_flags: Optional[str] = None,
-    operators_not_to_delegate: Optional[List[str]] = None,
+    system_config: str | None = None,
+    extra_flags: str | None = None,
+    test_dir: str | None = None,
+    operators_not_to_delegate: list[str] | None = None,
     use_neutron_for_format_conversion: bool = True,
     fetch_constants_to_sram: bool = False,
     dump_kernel_selection_code: bool = False,
-) -> List[CompileSpec]:
+) -> list[CompileSpec]:
     return (
         NeutronCompileSpecBuilder()
         .neutron_compile_spec(
             config,
+            test_dir=test_dir,
             extra_flags=extra_flags,
             operators_not_to_delegate=operators_not_to_delegate,
             use_neutron_for_format_conversion=use_neutron_for_format_conversion,
@@ -163,7 +171,7 @@ class NeutronBackend(BackendDetails):
     @staticmethod
     def preprocess(  # noqa C901
         edge_program: ExportedProgram,
-        compile_spec: List[CompileSpec],
+        compile_spec: list[CompileSpec],
     ) -> PreprocessResult:
         logging.info("NeutronBackend::preprocess")
 
@@ -173,6 +181,7 @@ def preprocess(  # noqa C901
         compile_flags = []
         binary = bytes()
         target = ""
+        test_dir = ""
         use_neutron_for_format_conversion = None
         fetch_constants_to_sram = False
         dump_kernel_selection_code = None
@@ -181,6 +190,8 @@ def preprocess(  # noqa C901
                 output_format = spec.value.decode()
             if spec.key == "target":
                 target = spec.value.decode()
+            if spec.key == "test_dir":
+                test_dir = spec.value.decode()
             if spec.key == "compile_flags":
                 compile_flags.append(spec.value.decode())
             if spec.key == "use_neutron_for_format_conversion":
@@ -230,14 +241,16 @@ def preprocess(  # noqa C901
 
             # Dump the tflite file if logging level is enabled
             if logging.root.isEnabledFor(logging.DEBUG):
-                import os
-
                 logging.debug(
-                    f"Serializing converted graph with tag {delegation_tag} to {os.getcwd()}"
+                    f"Serializing converted graph with tag {delegation_tag} to {test_dir}"
                 )
-                with open(f"{delegation_tag}_pure.et.tflite", "wb") as f:
+                with open(
+                    os.path.join(test_dir, f"{delegation_tag}_pure.et.tflite"), "wb"
+                ) as f:
                     f.write(bytes(tflite_model))
-                with open(f"{delegation_tag}_neutron.et.tflite", "wb") as f:
+                with open(
+                    os.path.join(test_dir, f"{delegation_tag}_neutron.et.tflite"), "wb"
+                ) as f:
                     f.write(bytes(neutron_model))
 
             binary = PayloadComposer().get_binary_payload(io_formats, neutron_model)

@@ -180,6 +180,7 @@ def to_quantized_edge_program(
     operators_not_to_delegate: list[str] = None,
     get_calibration_inputs_fn: GetCalibrationInputsFn = get_random_calibration_inputs,
     target: str = "imxrt700",
+    test_dir: str | None = None,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
     remove_quant_io_ops: bool = False,
@@ -217,6 +218,7 @@ def to_quantized_edge_program(
     preserve_ops = [torch.ops.aten.prelu.default]
     compile_spec = generate_neutron_compile_spec(
         target,
+        test_dir=test_dir,
         operators_not_to_delegate=operators_not_to_delegate,
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         fetch_constants_to_sram=fetch_constants_to_sram,
@@ -266,6 +268,7 @@ def to_quantized_edge_program(
 def to_quantized_executorch_program(
     model: torch.nn.Module,
     input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]],
+    test_dir: str | None = None,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
     use_neutron_for_format_conversion: bool = True,
@@ -287,6 +290,7 @@ def to_quantized_executorch_program(
     edge_program_manager = to_quantized_edge_program(
         model,
         input_spec,
+        test_dir=test_dir,
         use_qat=use_qat,
         train_fn=train_fn,
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,

@@ -34,7 +34,7 @@ def cifar_test_files(tmp_path_factory):
 
 
 @pytest.mark.parametrize("channels_last", [False, True])
-def test_cifarnet(mocker, cifar_test_files, channels_last):
+def test_cifarnet(mocker, request, cifar_test_files, channels_last):
     model = (
         CifarNet(
             pth_file=os.path.join(
@@ -64,9 +64,10 @@ def test_cifarnet(mocker, cifar_test_files, channels_last):
     lower_run_compare(
         model,
         [input_spec],
+        BaseGraphVerifier(1, non_dlg_nodes),
+        request,
         dataset_creator=CopyDatasetCreator(cifar_test_files),
         output_comparator=comparator,
-        dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes),
         mocker=mocker,
         # Run the channels last reference in PyTorch as the ExecuTorch CPU model contains incorrectly
         #  lowered channels last convolution weights, which cause incorrect inference results. The issue
@@ -79,7 +80,7 @@ def test_cifarnet(mocker, cifar_test_files, channels_last):
     )
 
 
-def test_cifarnet_qat(mocker, cifar_test_files):
+def test_cifarnet_qat(mocker, request, cifar_test_files):
     model = CifarNet().get_eager_model().eval()
 
     input_shape = (1, 3, 32, 32)
@@ -94,9 +95,10 @@ def test_cifarnet_qat(mocker, cifar_test_files):
     lower_run_compare(
         model,
         input_shape,
+        BaseGraphVerifier(1, non_dlg_nodes),
+        request,
         dataset_creator=CopyDatasetCreator(cifar_test_files),
         output_comparator=comparator,
-        dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes),
         mocker=mocker,
         use_qat=True,
     )
@@ -208,7 +208,7 @@ class TestConvertDivToMul:
         ids=lambda is_scalar: "scalar" if is_scalar else "tensor",
     )
     def test__static__full_pipeline(
-        self, mocker, input_shape: tuple[int, ...], is_scalar: bool
+        self, mocker, request, input_shape: tuple[int, ...], is_scalar: bool
     ):
         if is_scalar:
             divisor = np.random.uniform(0.01, 15)
@@ -231,5 +231,6 @@ def test__static__full_pipeline(
             model,
             input_shape,
             graph_verifier,
+            request,
             dataset_creator,
         )
@@ -0,0 +1,155 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+import numpy as np
+import pytest
+import torch
+
+from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec
+from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.models import AddTensorModule, AvgPool2dModule
+from executorch.backends.nxp.tests.nsys_testing import (
+    get_test_name,
+    lower_run_compare,
+    OUTPUTS_DIR,
+)
+
+
+@pytest.fixture(autouse=True)
+def reseed_model_per_test_run():
+    torch.manual_seed(23)
+    np.random.seed(23)
+
+
+def test_nsys_test_debug_results__single_input(caplog, request):
+    # Set log level to DEBUG to create debug results
+    caplog.set_level(logging.DEBUG)
+
+    input_shape = (2, 4, 6, 7)
+    model = AvgPool2dModule(False, 0)
+
+    graph_verifier = BaseGraphVerifier(1, [])
+
+    lower_run_compare(
+        model,
+        input_shape,
+        graph_verifier,
+        request,
+        remove_quant_io_ops=True,
+    )
+
+    test_name = get_test_name(request)
+    # Running by CI scripts adds prefix to the name
+    assert "test_nsys_test_debug_results__single_input" in test_name
+    assert os.path.isdir(os.path.join(OUTPUTS_DIR, test_name, "diff_cpu_npu_results"))
+    assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml"))
+
+    # Check file contains key symbols
+    with open(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) as f:
+        content = f.read()
+    keys = [
+        "date_time",
+        "eiq_neutron_sdk_version",
+        "eiq_nsys_version",
+        "git_branch",
+        "git_commit",
+        "test_name",
+    ]
+    assert all(key in content for key in keys)
+    assert os.path.isfile(
+        os.path.join(OUTPUTS_DIR, test_name, "tag1_neutron.et.tflite")
+    )
+    assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "tag1_pure.et.tflite"))
+
+    # Check text tensor variants
+    assert os.path.isfile(
+        os.path.join(OUTPUTS_DIR, test_name, "dataset", "calibration", "0000.txt")
+    )
+    assert os.path.isfile(
+        os.path.join(OUTPUTS_DIR, test_name, "dataset_quant", "0000.txt")
+    )
+    assert os.path.isfile(
+        os.path.join(OUTPUTS_DIR, test_name, "results_cpu", "0000.bin", "0000.txt")
+    )
+    assert os.path.isfile(
+        os.path.join(OUTPUTS_DIR, test_name, "results_npu", "0000.bin", "0000.txt")
+    )
+    assert os.path.isfile(
+        os.path.join(
+            OUTPUTS_DIR, test_name, "diff_cpu_npu_results", "0000.bin", "0000.txt"
+        )
+    )
+    assert os.path.isfile(os.path.join(OUTPUTS_DIR, f"{test_name}.zip"))
+
+
+class TestNsysDebugResults:
+    def test_nsys_test_debug_results__multiple_input(self, caplog, request):
+        # Set log level to DEBUG to create debug results
+        caplog.set_level(logging.DEBUG)
+
+        input_shape = (1, 4, 7)
+        x_input_spec = ModelInputSpec(input_shape)
+        model = AddTensorModule()
+
+        graph_verifier = BaseGraphVerifier(1, [])
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            request,
+        )
+
+        test_name = get_test_name(request)
+        # Running by CI scripts adds prefix to the name
+        assert (
+            "TestNsysDebugResults__test_nsys_test_debug_results__multiple_input"
+            in test_name
+        )
+        assert os.path.isdir(
+            os.path.join(OUTPUTS_DIR, test_name, "diff_cpu_npu_results")
+        )
+        assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml"))
+
+        # Check file contains key symbols
+        with open(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) as f:
+            content = f.read()
+        keys = [
+            "date_time",
+            "eiq_neutron_sdk_version",
+            "eiq_nsys_version",
+            "git_branch",
+            "git_commit",
+            "test_name",
+        ]
+        assert all(key in content for key in keys)
+        assert os.path.isfile(
+            os.path.join(OUTPUTS_DIR, test_name, "tag1_neutron.et.tflite")
+        )
+        assert os.path.isfile(
+            os.path.join(OUTPUTS_DIR, test_name, "tag1_pure.et.tflite")
+        )
+
+        # Check text tensor variants
+        assert os.path.isfile(
+            os.path.join(
+                OUTPUTS_DIR, test_name, "dataset", "calibration", "0000", "00.txt"
+            )
+        )
+        assert os.path.isfile(
+            os.path.join(OUTPUTS_DIR, test_name, "results_cpu", "0000", "0000.txt")
+        )
+        assert os.path.isfile(
+            os.path.join(OUTPUTS_DIR, test_name, "results_npu", "0000", "0000.txt")
+        )
+        assert os.path.isfile(
+            os.path.join(
+                OUTPUTS_DIR, test_name, "diff_cpu_npu_results", "0000", "0000.txt"
+            )
+        )
+        assert os.path.isfile(os.path.join(OUTPUTS_DIR, f"{test_name}.zip"))