diff --git a/.gitignore b/.gitignore index 02dcea02026..043edd31419 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ arm-scratch/ executorch.egg-info pip-out/ build-profiling/ +**/ddr_*_temp # Any exported models and profiling outputs *.bin diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py index d4262b3a9f6..b220a8be534 100644 --- a/backends/nxp/neutron_partitioner.py +++ b/backends/nxp/neutron_partitioner.py @@ -436,7 +436,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult: graph_module.recompile() - operators_not_to_delegate = self.delegation_spec[1][3].value.decode().split(",") + operators_not_to_delegate = self.delegation_spec[1][4].value.decode().split(",") logging.info(f"Operators not to delegate: {operators_not_to_delegate}") parameters_mapping = EdgeProgramToIRConverter.map_inputs_to_parameters( diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index f28eb34064c..ade8f46a307 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -9,8 +9,9 @@ # import logging +import os import struct -from typing import final, List, Optional +from typing import final import numpy as np import torch @@ -45,10 +46,11 @@ class NeutronCompileSpecBuilder: config: NeutronTargetSpec def __init__(self): - self.compile_spec: List[CompileSpec] = [] + self.compile_spec: list[CompileSpec] = [] self.compiler_flags = [] self.output_format = None - self.operators_not_to_delegate: List[str] = [] + self.test_dir = None + self.operators_not_to_delegate: list[str] = [] self.use_neutron_for_format_conversion = True self.fetch_constants_to_sram = False self.dump_kernel_selection_code = False @@ -62,8 +64,9 @@ def _replace_colons(self, operator: str) -> str: def neutron_compile_spec( self, config: str, - extra_flags: Optional[str] = None, - operators_not_to_delegate: Optional[List[str]] = None, + test_dir: str | None = None, + extra_flags: str | None = None, + operators_not_to_delegate: list[str] | None = None, use_neutron_for_format_conversion: bool = True, fetch_constants_to_sram: bool = False, dump_kernel_selection_code: bool = False, @@ -71,6 +74,7 @@ def neutron_compile_spec( """Generate compile spec for Neutron NPU :param config: Neutron accelerator configuration, e.g. "imxrt700" + :param test_dir: Test directory to store test related files. :param extra_flags: Extra flags for the Neutron compiler :param operators_not_to_delegate: List of operators that should not be delegated :param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to @@ -83,6 +87,7 @@ def neutron_compile_spec( """ self.config = NeutronTargetSpec(config) + self.test_dir = test_dir if test_dir is not None else os.getcwd() assert ( self.output_format is None @@ -113,6 +118,7 @@ def build(self): CompileSpec("output_format", "tflite".encode()), CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()), CompileSpec("target", self.config.get_name().encode()), + CompileSpec("test_dir", f"{self.test_dir}".encode()), CompileSpec( "operators_not_to_delegate", ",".join(self.operators_not_to_delegate).encode(), @@ -136,17 +142,19 @@ def build(self): def generate_neutron_compile_spec( config: str, # The target platform. For example "imxrt700". - system_config: Optional[str] = None, - extra_flags: Optional[str] = None, - operators_not_to_delegate: Optional[List[str]] = None, + system_config: str | None = None, + extra_flags: str | None = None, + test_dir: str | None = None, + operators_not_to_delegate: list[str] | None = None, use_neutron_for_format_conversion: bool = True, fetch_constants_to_sram: bool = False, dump_kernel_selection_code: bool = False, -) -> List[CompileSpec]: +) -> list[CompileSpec]: return ( NeutronCompileSpecBuilder() .neutron_compile_spec( config, + test_dir=test_dir, extra_flags=extra_flags, operators_not_to_delegate=operators_not_to_delegate, use_neutron_for_format_conversion=use_neutron_for_format_conversion, @@ -163,7 +171,7 @@ class NeutronBackend(BackendDetails): @staticmethod def preprocess( # noqa C901 edge_program: ExportedProgram, - compile_spec: List[CompileSpec], + compile_spec: list[CompileSpec], ) -> PreprocessResult: logging.info("NeutronBackend::preprocess") @@ -173,6 +181,7 @@ def preprocess( # noqa C901 compile_flags = [] binary = bytes() target = "" + test_dir = "" use_neutron_for_format_conversion = None fetch_constants_to_sram = False dump_kernel_selection_code = None @@ -181,6 +190,8 @@ def preprocess( # noqa C901 output_format = spec.value.decode() if spec.key == "target": target = spec.value.decode() + if spec.key == "test_dir": + test_dir = spec.value.decode() if spec.key == "compile_flags": compile_flags.append(spec.value.decode()) if spec.key == "use_neutron_for_format_conversion": @@ -230,14 +241,16 @@ def preprocess( # noqa C901 # Dump the tflite file if logging level is enabled if logging.root.isEnabledFor(logging.DEBUG): - import os - logging.debug( - f"Serializing converted graph with tag {delegation_tag} to {os.getcwd()}" + f"Serializing converted graph with tag {delegation_tag} to {test_dir}" ) - with open(f"{delegation_tag}_pure.et.tflite", "wb") as f: + with open( + os.path.join(test_dir, f"{delegation_tag}_pure.et.tflite"), "wb" + ) as f: f.write(bytes(tflite_model)) - with open(f"{delegation_tag}_neutron.et.tflite", "wb") as f: + with open( + os.path.join(test_dir, f"{delegation_tag}_neutron.et.tflite"), "wb" + ) as f: f.write(bytes(neutron_model)) binary = PayloadComposer().get_binary_payload(io_formats, neutron_model) diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index 5cfcb37c8a8..80da5ec679c 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -180,6 +180,7 @@ def to_quantized_edge_program( operators_not_to_delegate: list[str] = None, get_calibration_inputs_fn: GetCalibrationInputsFn = get_random_calibration_inputs, target: str = "imxrt700", + test_dir: str | None = None, use_qat: bool = False, train_fn: Callable[[torch.fx.GraphModule], None] | None = None, remove_quant_io_ops: bool = False, @@ -217,6 +218,7 @@ def to_quantized_edge_program( preserve_ops = [torch.ops.aten.prelu.default] compile_spec = generate_neutron_compile_spec( target, + test_dir=test_dir, operators_not_to_delegate=operators_not_to_delegate, use_neutron_for_format_conversion=use_neutron_for_format_conversion, fetch_constants_to_sram=fetch_constants_to_sram, @@ -266,6 +268,7 @@ def to_quantized_edge_program( def to_quantized_executorch_program( model: torch.nn.Module, input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]], + test_dir: str | None = None, use_qat: bool = False, train_fn: Callable[[torch.fx.GraphModule], None] | None = None, use_neutron_for_format_conversion: bool = True, @@ -287,6 +290,7 @@ def to_quantized_executorch_program( edge_program_manager = to_quantized_edge_program( model, input_spec, + test_dir=test_dir, use_qat=use_qat, train_fn=train_fn, use_neutron_for_format_conversion=use_neutron_for_format_conversion, diff --git a/backends/nxp/tests/generic_tests/test_cifarnet.py b/backends/nxp/tests/generic_tests/test_cifarnet.py index 1d795c938fe..c874ba24e47 100644 --- a/backends/nxp/tests/generic_tests/test_cifarnet.py +++ b/backends/nxp/tests/generic_tests/test_cifarnet.py @@ -34,7 +34,7 @@ def cifar_test_files(tmp_path_factory): @pytest.mark.parametrize("channels_last", [False, True]) -def test_cifarnet(mocker, cifar_test_files, channels_last): +def test_cifarnet(mocker, request, cifar_test_files, channels_last): model = ( CifarNet( pth_file=os.path.join( @@ -64,9 +64,10 @@ def test_cifarnet(mocker, cifar_test_files, channels_last): lower_run_compare( model, [input_spec], + BaseGraphVerifier(1, non_dlg_nodes), + request, dataset_creator=CopyDatasetCreator(cifar_test_files), output_comparator=comparator, - dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes), mocker=mocker, # Run the channels last reference in PyTorch as the ExecuTorch CPU model contains incorrectly # lowered channels last convolution weights, which cause incorrect inference results. The issue @@ -79,7 +80,7 @@ def test_cifarnet(mocker, cifar_test_files, channels_last): ) -def test_cifarnet_qat(mocker, cifar_test_files): +def test_cifarnet_qat(mocker, request, cifar_test_files): model = CifarNet().get_eager_model().eval() input_shape = (1, 3, 32, 32) @@ -94,9 +95,10 @@ def test_cifarnet_qat(mocker, cifar_test_files): lower_run_compare( model, input_shape, + BaseGraphVerifier(1, non_dlg_nodes), + request, dataset_creator=CopyDatasetCreator(cifar_test_files), output_comparator=comparator, - dlg_model_verifier=BaseGraphVerifier(1, non_dlg_nodes), mocker=mocker, use_qat=True, ) diff --git a/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py b/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py index fcd0aae2130..3415b79a39d 100644 --- a/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py +++ b/backends/nxp/tests/generic_tests/test_convert_div_to_mul.py @@ -208,7 +208,7 @@ class TestConvertDivToMul: ids=lambda is_scalar: "scalar" if is_scalar else "tensor", ) def test__static__full_pipeline( - self, mocker, input_shape: tuple[int, ...], is_scalar: bool + self, mocker, request, input_shape: tuple[int, ...], is_scalar: bool ): if is_scalar: divisor = np.random.uniform(0.01, 15) @@ -231,5 +231,6 @@ def test__static__full_pipeline( model, input_shape, graph_verifier, + request, dataset_creator, ) diff --git a/backends/nxp/tests/generic_tests/test_debug_results.py b/backends/nxp/tests/generic_tests/test_debug_results.py new file mode 100644 index 00000000000..ccf5c13b501 --- /dev/null +++ b/backends/nxp/tests/generic_tests/test_debug_results.py @@ -0,0 +1,155 @@ +# Copyright 2026 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os + +import numpy as np +import pytest +import torch + +from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec +from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier +from executorch.backends.nxp.tests.models import AddTensorModule, AvgPool2dModule +from executorch.backends.nxp.tests.nsys_testing import ( + get_test_name, + lower_run_compare, + OUTPUTS_DIR, +) + + +@pytest.fixture(autouse=True) +def reseed_model_per_test_run(): + torch.manual_seed(23) + np.random.seed(23) + + +def test_nsys_test_debug_results__single_input(caplog, request): + # Set log level to DEBUG to create debug results + caplog.set_level(logging.DEBUG) + + input_shape = (2, 4, 6, 7) + model = AvgPool2dModule(False, 0) + + graph_verifier = BaseGraphVerifier(1, []) + + lower_run_compare( + model, + input_shape, + graph_verifier, + request, + remove_quant_io_ops=True, + ) + + test_name = get_test_name(request) + # Running by CI scripts adds prefix to the name + assert "test_nsys_test_debug_results__single_input" in test_name + assert os.path.isdir(os.path.join(OUTPUTS_DIR, test_name, "diff_cpu_npu_results")) + assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) + + # Check file contains key symbols + with open(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) as f: + content = f.read() + keys = [ + "date_time", + "eiq_neutron_sdk_version", + "eiq_nsys_version", + "git_branch", + "git_commit", + "test_name", + ] + assert all(key in content for key in keys) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "tag1_neutron.et.tflite") + ) + assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "tag1_pure.et.tflite")) + + # Check text tensor variants + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "dataset", "calibration", "0000.txt") + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "dataset_quant", "0000.txt") + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "results_cpu", "0000.bin", "0000.txt") + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "results_npu", "0000.bin", "0000.txt") + ) + assert os.path.isfile( + os.path.join( + OUTPUTS_DIR, test_name, "diff_cpu_npu_results", "0000.bin", "0000.txt" + ) + ) + assert os.path.isfile(os.path.join(OUTPUTS_DIR, f"{test_name}.zip")) + + +class TestNsysDebugResults: + def test_nsys_test_debug_results__multiple_input(self, caplog, request): + # Set log level to DEBUG to create debug results + caplog.set_level(logging.DEBUG) + + input_shape = (1, 4, 7) + x_input_spec = ModelInputSpec(input_shape) + model = AddTensorModule() + + graph_verifier = BaseGraphVerifier(1, []) + + lower_run_compare( + model, + [x_input_spec, x_input_spec], + graph_verifier, + request, + ) + + test_name = get_test_name(request) + # Running by CI scripts adds prefix to the name + assert ( + "TestNsysDebugResults__test_nsys_test_debug_results__multiple_input" + in test_name + ) + assert os.path.isdir( + os.path.join(OUTPUTS_DIR, test_name, "diff_cpu_npu_results") + ) + assert os.path.isfile(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) + + # Check file contains key symbols + with open(os.path.join(OUTPUTS_DIR, test_name, "summary.yaml")) as f: + content = f.read() + keys = [ + "date_time", + "eiq_neutron_sdk_version", + "eiq_nsys_version", + "git_branch", + "git_commit", + "test_name", + ] + assert all(key in content for key in keys) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "tag1_neutron.et.tflite") + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "tag1_pure.et.tflite") + ) + + # Check text tensor variants + assert os.path.isfile( + os.path.join( + OUTPUTS_DIR, test_name, "dataset", "calibration", "0000", "00.txt" + ) + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "results_cpu", "0000", "0000.txt") + ) + assert os.path.isfile( + os.path.join(OUTPUTS_DIR, test_name, "results_npu", "0000", "0000.txt") + ) + assert os.path.isfile( + os.path.join( + OUTPUTS_DIR, test_name, "diff_cpu_npu_results", "0000", "0000.txt" + ) + ) + assert os.path.isfile(os.path.join(OUTPUTS_DIR, f"{test_name}.zip")) diff --git a/backends/nxp/tests/generic_tests/test_quantized_input_data.py b/backends/nxp/tests/generic_tests/test_quantized_input_data.py index 8b2f6823e8d..a9f9f3e47e6 100644 --- a/backends/nxp/tests/generic_tests/test_quantized_input_data.py +++ b/backends/nxp/tests/generic_tests/test_quantized_input_data.py @@ -17,7 +17,7 @@ from executorch.backends.nxp.tests.ops_aliases import AvgPool2D, MulTensor -def test__single_quantized_inputs(mocker): +def test__single_quantized_inputs(mocker, request): input_spec = ModelInputSpec((2, 4, 6, 7)) model = AvgPool2dModule(False, 0) graph_verifier = DetailedGraphVerifier( @@ -29,19 +29,19 @@ def test__single_quantized_inputs(mocker): model, [input_spec], graph_verifier, + request, remove_quant_io_ops=True, ) - assert ( - OUTPUTS_DIR / "test__single_quantized_inputs" / "dataset_quant" / "0000.bin" - ).exists() + test_name = nsys_testing.get_test_name(request) + assert (OUTPUTS_DIR / test_name / "dataset_quant" / "0000.bin").exists() # Check outputs are in quantized int8 format output_tensor_spec = output_tensor_spec_spy.spy_return assert output_tensor_spec[0].dtype == torch.int8 -def test__single_quantized_inputs_edge_python_reference(mocker): +def test__single_quantized_inputs_edge_python_reference(mocker, request): input_spec = ModelInputSpec((2, 4, 6, 7)) model = AvgPool2dModule(False, 0) graph_verifier = DetailedGraphVerifier( @@ -53,23 +53,20 @@ def test__single_quantized_inputs_edge_python_reference(mocker): model, [input_spec], graph_verifier, + request, reference_model=ReferenceModel.QUANTIZED_EDGE_PYTHON, remove_quant_io_ops=True, ) - assert ( - OUTPUTS_DIR - / "test__single_quantized_inputs_edge_python_reference" - / "dataset_quant" - / "0000.bin" - ).exists() + test_name = nsys_testing.get_test_name(request) + assert (OUTPUTS_DIR / test_name / "dataset_quant" / "0000.bin").exists() # Check outputs are in quantized int8 format output_tensor_spec = output_tensor_spec_spy.spy_return assert output_tensor_spec[0].dtype == torch.int8 -def test__multiple_quantized_inputs(mocker): +def test__multiple_quantized_inputs(mocker, request): x_input_spec = ModelInputSpec((1, 4, 8, 8)) model = MulTensorModule() graph_verifier = DetailedGraphVerifier( @@ -81,23 +78,19 @@ def test__multiple_quantized_inputs(mocker): model, [x_input_spec, x_input_spec], graph_verifier, + request, remove_quant_io_ops=True, ) - assert ( - OUTPUTS_DIR - / "test__multiple_quantized_inputs" - / "dataset_quant" - / "0000" - / "00.bin" - ).exists() + test_name = nsys_testing.get_test_name(request) + assert (OUTPUTS_DIR / test_name / "dataset_quant" / "0000" / "00.bin").exists() # Check outputs are in quantized int8 format output_tensor_spec = output_tensor_spec_spy.spy_return assert output_tensor_spec[0].dtype == torch.int8 -def test__multiple_quantized_inputs_edge_python_reference(mocker): +def test__multiple_quantized_inputs_edge_python_reference(mocker, request): x_input_spec = ModelInputSpec((1, 4, 8, 8)) model = MulTensorModule() graph_verifier = DetailedGraphVerifier( @@ -109,17 +102,13 @@ def test__multiple_quantized_inputs_edge_python_reference(mocker): model, [x_input_spec, x_input_spec], graph_verifier, + request, reference_model=ReferenceModel.QUANTIZED_EDGE_PYTHON, remove_quant_io_ops=True, ) - assert ( - OUTPUTS_DIR - / "test__multiple_quantized_inputs_edge_python_reference" - / "dataset_quant" - / "0000" - / "00.bin" - ).exists() + test_name = nsys_testing.get_test_name(request) + assert (OUTPUTS_DIR / test_name / "dataset_quant" / "0000" / "00.bin").exists() # Check outputs are in quantized int8 format output_tensor_spec = output_tensor_spec_spy.spy_return diff --git a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py index ebe782c5a98..d42ef4c6e7d 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py @@ -68,7 +68,7 @@ def _get_dataset_creator(): dataset = RandomDatasetCreator(low=low, high=high) return dataset - def test__basic_nsys_inference(self, mocker): + def test__basic_nsys_inference(self, mocker, request): input_shape = (2, 3, 6, 7) model = AbsModule() graph_verifier = DetailedGraphVerifier( @@ -80,10 +80,11 @@ def test__basic_nsys_inference(self, mocker): model, input_shape, graph_verifier, + request, dataset_creator, ) - def test__basic_nsys_inference__big(self, mocker): + def test__basic_nsys_inference__big(self, mocker, request): # some operators have delegation requirement that size must be < 4096 input_shape = (4097, 1) model = AbsModule() @@ -96,5 +97,6 @@ def test__basic_nsys_inference__big(self, mocker): model, input_shape, graph_verifier, + request, dataset_creator, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py index 8b8f2da8c4e..9646c04a3f2 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py @@ -44,7 +44,9 @@ class TestAdaptiveAvgPool2D: ), ], ) - def test__basic_nsys_inference(self, mocker, use_qat, input_shape, output_size): + def test__basic_nsys_inference( + self, mocker, request, use_qat, input_shape, output_size + ): model = AdaptiveAvgPool2dModule(output_size) graph_verifier = DetailedGraphVerifier( mocker, @@ -60,6 +62,7 @@ def test__basic_nsys_inference(self, mocker, use_qat, input_shape, output_size): model, input_shape, graph_verifier, + request, RandomDatasetCreator(low=-1, high=1), output_comparator=output_comparator, use_qat=use_qat, @@ -69,7 +72,7 @@ def test__basic_nsys_inference(self, mocker, use_qat, input_shape, output_size): strict=True, reason="Known Neutron bad compute issue. Will be fixed in Neutron SW 3.1.2.", ) - def test__know_neutron_issue(self, mocker): + def test__know_neutron_issue(self, mocker, request): input_shape = (2, 3, 10, 15) output_size = (5, 5) model = AdaptiveAvgPool2dModule(output_size) @@ -86,11 +89,12 @@ def test__know_neutron_issue(self, mocker): model, input_shape, graph_verifier, + request, RandomDatasetCreator(low=-1, high=1), output_comparator=output_comparator, ) - def test__kernel_size_and_stride_limit(self, mocker): + def test__kernel_size_and_stride_limit(self, mocker, request): input_shape = (1, 3, 4, 4096) # input_size = (1, 4096) output_size = ( 2, @@ -114,6 +118,7 @@ def test__kernel_size_and_stride_limit(self, mocker): model, input_shape, graph_verifier, + request, RandomDatasetCreator(low=-1, high=1), output_comparator=output_comparator, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py index 3ede2cfaadd..6ac96e41cd1 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py @@ -16,6 +16,9 @@ ) from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.model_output_comparator import ( + AllCloseOutputComparator, +) from executorch.backends.nxp.tests.models import AddTensorConvModule, AddTensorModule from executorch.backends.nxp.tests.nsys_testing import lower_run_compare from executorch.backends.nxp.tests.ops_aliases import ( @@ -38,67 +41,49 @@ class TestAddTensor: [ pytest.param((1,), id="1D."), pytest.param((6, 5), id="2D."), + pytest.param((6, 82), id="2D alt."), pytest.param((1, 4, 7), id="3D."), + pytest.param((1, 68, 7), id="3D alt."), pytest.param((2, 4, 3, 15), id="4D."), - pytest.param( - (6, 82), - id="2D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (1, 68, 7), - id="3D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (1, 4, 9, 11, 4), - id="5D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), + pytest.param((1, 4, 9, 11, 4), id="5D."), ], ) - def test__basic_nsys_inference(self, x_input_shape, mocker): + def test__basic_nsys_inference(self, mocker, request, x_input_shape): x_input_spec = ModelInputSpec(x_input_shape) model = AddTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, [x_input_spec, x_input_spec], graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) - @pytest.mark.parametrize( - "x_input_shape", - [ - pytest.param((1,), id="1D."), - pytest.param((6, 5), id="2D."), - pytest.param((1, 4, 7), id="3D."), - pytest.param((2, 4, 3, 15), id="4D."), - pytest.param( - (1, 4, 9, 11, 4), - id="5D.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - ], - ) - def test__basic_nsys_inference_qat(self, x_input_shape, mocker): - x_input_spec = ModelInputSpec(x_input_shape) + def test__basic_nsys_inference_qat(self, mocker, request): + x_input_spec = ModelInputSpec((1, 4, 7)) model = AddTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, [x_input_spec, x_input_spec], graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, use_qat=True, ) @@ -108,6 +93,10 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker): pytest.param( [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D." ), + pytest.param( + [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))], + id="2 inputs 2D alt.", + ), pytest.param( [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))], id="2 inputs 3D.", @@ -115,25 +104,24 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker): pytest.param( [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D." ), - pytest.param( - [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))], - id="2 inputs 2D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), ], ) - def test__broadcast(self, input_spec, mocker): + def test__broadcast(self, mocker, request, input_spec): model = AddTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, input_spec, graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) @pytest.mark.parametrize( @@ -172,7 +160,7 @@ def test__broadcast_unsupported(self, input_spec): ), ], ) - def test__w_conv(self, x_input_shape, mocker): + def test__w_conv(self, mocker, request, x_input_shape): model = AddTensorConvModule() n, c, h, w = x_input_shape @@ -187,7 +175,11 @@ def test__w_conv(self, x_input_shape, mocker): dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) lower_run_compare( - model, [x_input_spec, y_input_spec], graph_verifier, dataset_creator + model, + [x_input_spec, y_input_spec], + graph_verifier, + request, + dataset_creator, ) @pytest.mark.parametrize( @@ -198,13 +190,12 @@ def test__w_conv(self, x_input_shape, mocker): id="2 inputs 4D + 4D.", ), pytest.param( - [ModelInputSpec((1, 4, 5, 67)), ModelInputSpec((1, 8, 5, 1))], - id="2 inputs 4D + 4D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), + [ModelInputSpec((1, 4, 1, 67)), ModelInputSpec((1, 8, 5, 67))], + id="2 inputs 4D + 4D same width.", ), ], ) - def test__w_conv_broadcast(self, input_spec, mocker): + def test__w_conv_broadcast(self, mocker, request, input_spec): model = AddTensorConvModule() graph_verifier = DetailedGraphVerifier( @@ -213,12 +204,16 @@ def test__w_conv_broadcast(self, input_spec, mocker): expected_non_delegated_ops={}, ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, input_spec, graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) @pytest.mark.parametrize( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py index 120c3899ed4..3db1158d637 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py @@ -41,16 +41,16 @@ def forward(self, x): class TestAvgPool2D: - def test__basic_nsys_inference(self, mocker): + def test__basic_nsys_inference(self, mocker, request): input_shape = (2, 4, 6, 7) model = AvgPool2dModule(False, 0) graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) - def test__basic_nsys_inference_qat(self, mocker): + def test__basic_nsys_inference_qat(self, mocker, request): input_shape = (2, 9, 6, 15) model = AvgPool2dModule(False, 0) graph_verifier = DetailedGraphVerifier( @@ -61,10 +61,11 @@ def test__basic_nsys_inference_qat(self, mocker): model, input_shape, graph_verifier, + request, use_qat=True, ) - def test__kernel_size_limit(self, mocker): + def test__kernel_size_limit(self, mocker, request): kernel_size = (1, 4096) input_shape = (1, 4) + kernel_size model = AvgPool2dModule(False, 0, kernel_size) @@ -72,7 +73,7 @@ def test__kernel_size_limit(self, mocker): mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) def test__kernel_size_limit_exceeded(self): kernel_size = (1, 4097) # Exceeds the kernel size limit. @@ -87,7 +88,7 @@ def test__kernel_size_limit_exceeded(self): ) assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D]) - def test__stride_limit(self, mocker): + def test__stride_limit(self, mocker, request): stride = 4096 input_shape = (1, 4, 1, 4096) model = AvgPool2dModule(False, 0, 1, stride) @@ -95,7 +96,7 @@ def test__stride_limit(self, mocker): mocker, expected_delegated_ops={AvgPool2D: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) def test__stride_limit_exceeded(self): stride = 4097 # Exceeds the stride limit. @@ -114,7 +115,7 @@ def test__stride_limit_exceeded(self): class TestAvgPool1D: # Just a basic test to verify that the operator gets extended to the 2D variant correctly. - def test__basic_nsys_inference(self, mocker): + def test__basic_nsys_inference(self, mocker, request): input_shape = (2, 4, 6) # The old flow limited the batch size to 1. model = AvgPool1DModule() graph_verifier = DetailedGraphVerifier( @@ -123,4 +124,4 @@ def test__basic_nsys_inference(self, mocker): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py index 9bb1f30ee60..b28a431e3ca 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py @@ -56,7 +56,7 @@ def forward(self, *inputs: torch.Tensor): class TestCat: - def test__qat(self, mocker, use_qat): + def test__qat(self, mocker, request, use_qat): input_shape = (2, 3, 5) num_inputs = 2 @@ -66,11 +66,11 @@ def test__qat(self, mocker, use_qat): mocker, expected_delegated_ops={Cat: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shapes, graph_verifier, use_qat=use_qat) + lower_run_compare(model, input_shapes, graph_verifier, request, use_qat=use_qat) @pytest.mark.parametrize("dim", list(range(-3, 3)), ids=lambda dim: f"dim={dim}") @pytest.mark.parametrize("num_inputs", [2, 5], ids=lambda n: f"n={n}") - def test__same_shapes(self, mocker, dim, num_inputs): + def test__same_shapes(self, mocker, request, dim, num_inputs): input_shape = (2, 3, 5) input_shapes = [ModelInputSpec(input_shape) for _ in range(num_inputs)] @@ -79,11 +79,11 @@ def test__same_shapes(self, mocker, dim, num_inputs): mocker, expected_delegated_ops={Cat: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shapes, graph_verifier) + lower_run_compare(model, input_shapes, graph_verifier, request) @pytest.mark.parametrize("dim", [0, -3, 2, -1], ids=lambda dim: f"dim={dim}") @pytest.mark.parametrize("num_inputs", [2, 5], ids=lambda n: f"n={n}") - def test__same_shapes__channels_first(self, mocker, dim, num_inputs): + def test__same_shapes__channels_first(self, mocker, request, dim, num_inputs): input_shape = (2, 3, 4, 5) input_shapes = [ModelInputSpec(input_shape) for _ in range(num_inputs)] @@ -94,12 +94,12 @@ def test__same_shapes__channels_first(self, mocker, dim, num_inputs): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shapes, graph_verifier) + lower_run_compare(model, input_shapes, graph_verifier, request) @pytest.mark.parametrize("dim", [0, -1], ids=lambda dim: f"dim={dim}") @pytest.mark.parametrize("rank", [2, 3, 4], ids=lambda rank: f"rank={rank}") @pytest.mark.parametrize("num_inputs", [2, 3], ids=lambda n: f"n={n}") - def test__different_shapes(self, mocker, dim, rank, num_inputs): + def test__different_shapes(self, mocker, request, dim, rank, num_inputs): # The input shapes can only differ in the `dim` dimension. So we can just assign a different one for each input. # e.g. [(2, 3, 4), (3, 3, 4), (4, 3, 4), (5, 3, 4), (6, 3, 4)] base_shape = [i + 2 for i in range(rank)] @@ -113,11 +113,11 @@ def test__different_shapes(self, mocker, dim, rank, num_inputs): mocker, expected_delegated_ops={Cat: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_shapes, graph_verifier) + lower_run_compare(model, input_shapes, graph_verifier, request) @pytest.mark.parametrize("dim", [1, -1], ids=lambda dim: f"dim={dim}") @pytest.mark.parametrize("num_inputs", [2, 5], ids=lambda n: f"n={n}") - def test__different_shapes__channels_first(self, mocker, dim, num_inputs): + def test__different_shapes__channels_first(self, mocker, request, dim, num_inputs): # The input shapes can only differ in the `dim` dimension. So we can just assign a different one for each input. # e.g. [(1, 3, 4, 5), (2, 3, 4, 5)] base_shape = (2, 3, 4, 5) @@ -133,7 +133,7 @@ def test__different_shapes__channels_first(self, mocker, dim, num_inputs): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shapes, graph_verifier) + lower_run_compare(model, input_shapes, graph_verifier, request) def test__single_input__alone_in_partition__not_delegated(self): # The operator is a noop, and there is no other op in the model. The Neutron Converter would produce an empty @@ -149,7 +149,7 @@ def test__single_input__alone_in_partition__not_delegated(self): ) assert graph_contains_any_of_ops(delegated_ep.graph, [Cat]) - def test__single_input__not_alone_in_partition__delegated(self, mocker): + def test__single_input__not_alone_in_partition__delegated(self, mocker, request): # The operator is a noop, but there is another op in the model, so they are both delegated. input_shape = [ModelInputSpec((2, 3, 4, 5))] @@ -160,4 +160,4 @@ def test__single_input__not_alone_in_partition__delegated(self, mocker): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py index e0ae44b61f8..248063551af 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py @@ -90,7 +90,7 @@ class TestClamp: pytest.param(0.0, None, id="min = 0, max = None (Relu)"), ], ) - def test_convert_clamp__full_pipeline(self, mocker, min, max, use_qat): + def test_convert_clamp__full_pipeline(self, mocker, request, min, max, use_qat): input_shape = (2, 7, 2) # Indivisible by num_macs model = AddClampModule(min, max) @@ -109,6 +109,7 @@ def test_convert_clamp__full_pipeline(self, mocker, min, max, use_qat): model=model, input_spec=[x_input_spec], dlg_model_verifier=graph_verifier, + request=request, output_comparator=comparator, use_qat=use_qat, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py index 9ffa69139f6..32bbf93fae4 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py @@ -34,7 +34,7 @@ class TestConstantPadND: """ # noinspection PyMethodMayBeStatic - def assert_delegated(self, model, input_shape, mocker, use_qat=False): + def assert_delegated(self, model, input_shape, mocker, request, use_qat=False): graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={ConstantPadND: 1}, @@ -45,15 +45,16 @@ def assert_delegated(self, model, input_shape, mocker, use_qat=False): model, input_shape, graph_verifier, + request, use_qat=use_qat, ) def assert_delegated_and_output_shape_equals( - self, model, input_shape, expected_output_shape, mocker + self, model, input_shape, expected_output_shape, mocker, request ): model_builder_spy = mocker.spy(ModelBuilder, "finish") - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) neutron_ir_subgraph = model_builder_spy.call_args[0][0].get_sub_graph() assert neutron_ir_subgraph.outputs.tmp_outputs[0].shape.vector == list( @@ -74,12 +75,14 @@ def assert_delegated_and_output_shape_equals( pytest.param((1, 2, 3, 4, 5), tuple(range(4)), id="5D, padding W, D"), ], ) - def test__basic_nsys_inference(self, mocker, input_shape, paddings, use_qat): + def test__basic_nsys_inference( + self, mocker, request, input_shape, paddings, use_qat + ): # These test cases are also supported by the old flow. model = ConstantPadNDModule(paddings) - self.assert_delegated(model, input_shape, mocker, use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat) - def test__channels_padding(self, mocker): + def test__channels_padding(self, mocker, request): input_shape = (2, 4, 6) # These paddings will be applied to the last dimension, which is the channels as the input is formatless. paddings = (1, 1) @@ -87,25 +90,25 @@ def test__channels_padding(self, mocker): model = ConstantPadNDModule(paddings) self.assert_delegated_and_output_shape_equals( - model, input_shape, expected_output_shape, mocker + model, input_shape, expected_output_shape, mocker, request ) - def test__batch_padding(self, mocker): + def test__batch_padding(self, mocker, request): input_shape = (2, 4, 6) paddings = (0, 0, 0, 0, 1, 1) # Padding applied to the batch dimension. expected_output_shape = (4, 4, 6) # Padded batch. model = ConstantPadNDModule(paddings) self.assert_delegated_and_output_shape_equals( - model, input_shape, expected_output_shape, mocker + model, input_shape, expected_output_shape, mocker, request ) @pytest.mark.parametrize("constant", [0.0, -13.37]) - def test__specific_constant(self, mocker, constant): + def test__specific_constant(self, mocker, request, constant): input_shape = (2, 4, 6) paddings = (1, 1) model = ConstantPadNDModule(paddings, constant) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) @pytest.mark.parametrize( "input_shape, paddings", @@ -115,7 +118,7 @@ def test__specific_constant(self, mocker, constant): pytest.param((1, 2, 6, 8), (0, 1, 2, 3, 1, 1), id="4D, padding H, W"), ], ) - def test__channels_first(self, mocker, input_shape, paddings): + def test__channels_first(self, mocker, request, input_shape, paddings): model = ConstantPadNDConvModule(paddings) graph_verifier = DetailedGraphVerifier( mocker, @@ -123,4 +126,4 @@ def test__channels_first(self, mocker, input_shape, paddings): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_leaky_relu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_leaky_relu_converter.py index 81dbe9aa0fb..567cf85ebe5 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_leaky_relu_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_leaky_relu_converter.py @@ -34,7 +34,7 @@ def forward(self, x): class TestLeakyRelu: # noinspection PyMethodMayBeStatic - def assert_delegated(self, model, input_shape, mocker, use_qat=False): + def assert_delegated(self, model, input_shape, mocker, request, use_qat=False): graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={LeakyRelu: 1}, @@ -48,6 +48,7 @@ def assert_delegated(self, model, input_shape, mocker, use_qat=False): model, input_shape, graph_verifier, + request, dataset_creator, use_qat=use_qat, ) @@ -63,28 +64,29 @@ def assert_delegated(self, model, input_shape, mocker, use_qat=False): ], ids=lambda shape: f"{len(shape)}D", ) - def test__default_alpha__input_shapes(self, mocker, input_shape): + def test__default_alpha__input_shapes(self, mocker, request, input_shape): model = LeakyReluModule() - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) - def test__default_alpha__qat(self, mocker, use_qat): + def test__default_alpha__qat(self, mocker, request, use_qat): model = LeakyReluModule() input_shape = (23,) - self.assert_delegated(model, input_shape, mocker, use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat) @pytest.mark.parametrize( "alpha", [0.01, 3.14159, 0, 1, float("inf")], ids=lambda alpha: f"alpha = {alpha}", ) - def test__specific_alpha(self, mocker, alpha): + def test__specific_alpha(self, mocker, request, alpha): model = LeakyReluModule(negative_slope=alpha) - self.assert_delegated(model, (23,), mocker) + self.assert_delegated(model, (23,), mocker, request) - def test__inplace(self, mocker): + def test__inplace(self, mocker, request): model = LeakyReluModule(inplace=True) self.assert_delegated( model, (23,), mocker, + request, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_log_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_log_converter.py index 3e1d066103a..0b7fe88cffc 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_log_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_log_converter.py @@ -35,7 +35,7 @@ def forward(self, x): class TestLog: - def test__basic_nsys_inference(self, mocker): + def test__basic_nsys_inference(self, mocker, request): # Use 256 elements so that, after quantization to int8, the input can # cover the full discrete range [-128, 127]. # The dataset is generated as a linear float ramp and later quantized, @@ -49,6 +49,7 @@ def test__basic_nsys_inference(self, mocker): model, input_shape, graph_verifier, + request, dataset_creator=LinearRampDatasetCreator(low=0.0, high=1.0), ) @@ -60,7 +61,7 @@ def test__basic_nsys_inference(self, mocker): pytest.param((1, 3, 16, 16), id="4D"), ], ) - def test__basic_nsys_inference__qat(self, mocker, input_shape, use_qat): + def test__basic_nsys_inference__qat(self, mocker, request, input_shape, use_qat): model = LogModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={Log: 1}, expected_non_delegated_ops={} @@ -69,6 +70,7 @@ def test__basic_nsys_inference__qat(self, mocker, input_shape, use_qat): model, input_shape, graph_verifier, + request, dataset_creator=RandomDatasetCreator(low=1.0, high=10.0), use_qat=use_qat, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index c95b3cd3b8d..55a47146bfc 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -51,14 +51,14 @@ def reseed_model_per_test_run(): class TestMaxPool2D: # noinspection PyMethodMayBeStatic - def assert_delegated(self, model, input_shape, mocker): + def assert_delegated(self, model, input_shape, mocker, request): graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1}, expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) # noinspection PyMethodMayBeStatic def assert_not_delegated(self, model, input_shape): @@ -70,12 +70,12 @@ def assert_not_delegated(self, model, input_shape): ) assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2DWithIndices]) - def test__basic_nsys_inference(self, mocker): + def test__basic_nsys_inference(self, mocker, request): input_shape = (2, 4, 6, 7) # The old flow limited the batch size to 1. model = MaxPool2dModule() - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) - def test__basic_nsys_inference_qat(self, mocker): + def test__basic_nsys_inference_qat(self, mocker, request): input_shape = (2, 11, 7, 16) # The old flow limited the batch size to 1. model = MaxPool2dModule() graph_verifier = DetailedGraphVerifier( @@ -88,20 +88,21 @@ def test__basic_nsys_inference_qat(self, mocker): model, input_shape, graph_verifier, + request, use_qat=True, ) - def test__large_kernel_size(self, mocker): + def test__large_kernel_size(self, mocker, request): kernel_size = (1, 5000) input_shape = (1, 4) + kernel_size model = MaxPool2dModule(kernel_size, stride=1) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) - def test__stride_limit__no_padding(self, mocker): + def test__stride_limit__no_padding(self, mocker, request): stride = 4096 input_shape = (1, 4, 1, 4096) model = MaxPool2dModule(1, stride=stride) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) def test__stride_limit_exceeded__no_padding(self): stride = 4097 # Exceeds the stride limit. @@ -109,12 +110,12 @@ def test__stride_limit_exceeded__no_padding(self): model = MaxPool2dModule(1, stride=stride) self.assert_not_delegated(model, input_shape) - def test__stride_limit__padding(self, mocker): + def test__stride_limit__padding(self, mocker, request): padding = 1 stride = 4096 input_shape = (1, 2, 3, stride) model = MaxPool2dModule(3, stride=stride, padding=padding) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) def test__stride_limit_exceeded__padding(self): padding = 1 @@ -126,7 +127,7 @@ def test__stride_limit_exceeded__padding(self): @pytest.mark.skip( reason="Large padding requires large kernel size which results in an extremely slow test." ) - def test__padding_limit(self, mocker): + def test__padding_limit(self, mocker, request): # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded # value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited # to 4096, padding of 2048 is the limit. @@ -134,16 +135,16 @@ def test__padding_limit(self, mocker): kernel_size = padding * 2 input_shape = (1, 1, 2, 3) model = MaxPool2dModule(kernel_size, padding=padding) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) - def test__padding__max_pool_limit_exceeded(self, mocker): + def test__padding__max_pool_limit_exceeded(self, mocker, request): # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no # limit. This tests ensures the `MaxPool` padding limit is not a problem. padding = 33 kernel_size = padding * 2 input_shape = (1, 2, 3, 4) model = MaxPool2dModule(kernel_size, padding=padding) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) def test__padding_to_kernel_ratio_exceeded(self): # Both PyTorch and Neutron require the padding to be at most half of the kernel size. @@ -160,7 +161,7 @@ def test__padding_to_kernel_ratio_exceeded(self): class TestMaxPool1D: # Just a basic test to verify that the operator gets extended to the 2D variant correctly. - def test__basic_nsys_inference__view_not_delegated(self, mocker): + def test__basic_nsys_inference__view_not_delegated(self, mocker, request): input_shape = (2, 4, 6) # The old flow limited the batch size to 1. model = MaxPool1DModule() @@ -170,4 +171,4 @@ def test__basic_nsys_inference__view_not_delegated(self, mocker): expected_non_delegated_ops={}, ) - lower_run_compare(model, input_shape, graph_verifier) + lower_run_compare(model, input_shape, graph_verifier, request) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py index 8195581c0f6..a5bb05d6763 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py @@ -69,6 +69,7 @@ def assert_delegated( model, input_shape, mocker, + request, use_qat=False, atol=None, expected_delegated_ops=None, @@ -92,6 +93,7 @@ def assert_delegated( model, input_shape, graph_verifier, + request, dataset_creator, output_comparator, use_qat=use_qat, @@ -111,10 +113,10 @@ def assert_not_delegated(self, model, input_shape): def keep_dim(self, request): return request.param - def test__basic_nsys_inference__qat(self, mocker, use_qat, keep_dim): + def test__basic_nsys_inference__qat(self, mocker, request, use_qat, keep_dim): input_shape = (23,) model = MeanDimModule(0, keep_dim) - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat=use_qat) @pytest.mark.parametrize( "input_shape, dim", @@ -128,12 +130,12 @@ def test__basic_nsys_inference__qat(self, mocker, use_qat, keep_dim): pytest.param((3, 1, 4, 1, 5), 0, id="5D, dim = 0."), ], ) - def test__single_dims(self, mocker, input_shape, dim, keep_dim): + def test__single_dims(self, mocker, request, input_shape, dim, keep_dim): model = MeanDimModule(dim, keep_dim) # Relatively large error, but it is actually equal to the output scale, so it is a single bit error. # TODO Replace with quantized dataset testing and `atol = 1`. atol = 0.014 - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) @pytest.mark.parametrize( "input_shape, dim", @@ -145,12 +147,12 @@ def test__single_dims(self, mocker, input_shape, dim, keep_dim): pytest.param((3, 1, 4, 1, 5), (3, -5, -4), id="5D, dim = (3, -5 ,-4)."), ], ) - def test__tuple_dims(self, mocker, input_shape, dim, keep_dim): + def test__tuple_dims(self, mocker, request, input_shape, dim, keep_dim): model = MeanDimModule(dim, keep_dim) # Relatively large error, but it is actually equal to the output scale, so it is a single bit error. # TODO Replace with quantized dataset testing and `atol = 1`. atol = 0.015 - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) @pytest.mark.parametrize( "input_shape, dim", @@ -171,13 +173,14 @@ def test__noop__only_node__not_delegated(self, input_shape, dim): pytest.param((3, 1, 4, 1, 5), -2, id="5D, dim = -2."), ], ) - def test__noop__not_only_node__delegated(self, mocker, input_shape, dim): + def test__noop__not_only_node__delegated(self, mocker, request, input_shape, dim): keep_dim = True # Reduction over a dimension of size `1` with `keep_dim=True` is a no-op. model = MeanDimAddModule(dim, keep_dim) self.assert_delegated( model, input_shape, mocker, + request, expected_delegated_ops={MeanDim: 1, AddTensor: 1}, ) @@ -188,13 +191,15 @@ def test__noop__not_only_node__delegated(self, mocker, input_shape, dim): pytest.param((3, 1, 4, 1, 5), -2, id="5D, dim = -2."), ], ) - def test__no_reduction__keepdim_false__delegated(self, mocker, input_shape, dim): + def test__no_reduction__keepdim_false__delegated( + self, mocker, request, input_shape, dim + ): # These cases reduce over a dimension of size 1. # When `keep_dim=True` the node is a noop, and it's not delegated (see `test__noop__only_node__not_delegated`), # but with `keep_dim=False` it changes the shape so it's not a noop and is therefore delegated successfully. keep_dim = False model = MeanDimModule(dim, keep_dim) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) @pytest.mark.parametrize( "input_shape, dim", @@ -214,7 +219,9 @@ def test__no_reduction__keepdim_false__delegated(self, mocker, input_shape, dim) ], ids=lambda kd: f"keep_dim={kd}", ) - def test__channels_first__keep_dim__true(self, mocker, input_shape, dim, keep_dim): + def test__channels_first__keep_dim__true( + self, mocker, request, input_shape, dim, keep_dim + ): # Just 1 test case to verify correct handling of the `dim`. # Most cases fall into the single bit error case, and since this test uses 2 operators, the error accumulates # and the final error is larger. We cannot with 100% certainty say that the error is only caused by the single @@ -225,5 +232,6 @@ def test__channels_first__keep_dim__true(self, mocker, input_shape, dim, keep_di model, input_shape, mocker, + request, expected_delegated_ops={MaxPool2DWithIndices: 1, GetItem: 1, MeanDim: 1}, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py index 897c3efd850..d112ff1e1ac 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_mul_tensor_converter.py @@ -41,7 +41,7 @@ class TestMulTensor: pytest.param((1, 4, 8, 8), id="4D."), ], ) - def test__basic_nsys_inference(self, x_input_shape, mocker): + def test__basic_nsys_inference(self, mocker, request, x_input_shape): x_input_spec = ModelInputSpec(x_input_shape) model = MulTensorModule() graph_verifier = DetailedGraphVerifier( @@ -52,6 +52,7 @@ def test__basic_nsys_inference(self, x_input_shape, mocker): model, [x_input_spec, x_input_spec], graph_verifier, + request, ) @pytest.mark.parametrize( @@ -61,7 +62,7 @@ def test__basic_nsys_inference(self, x_input_shape, mocker): pytest.param((1, 4, 8, 8), id="4D."), ], ) - def test__basic_nsys_inference_qat(self, x_input_shape, mocker): + def test__basic_nsys_inference_qat(self, mocker, request, x_input_shape): x_input_spec = ModelInputSpec(x_input_shape) model = MulTensorModule() graph_verifier = DetailedGraphVerifier( @@ -72,6 +73,7 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker): model, [x_input_spec, x_input_spec], graph_verifier, + request, use_qat=True, ) @@ -90,13 +92,13 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker): ), ], ) - def test__correct_broadcast(self, input_spec, mocker): + def test__correct_broadcast(self, input_spec, mocker, request): model = MulTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={MulTensor: 1}, expected_non_delegated_ops={} ) - lower_run_compare(model, input_spec, graph_verifier) + lower_run_compare(model, input_spec, graph_verifier, request) @pytest.mark.parametrize( "input_spec", @@ -134,7 +136,7 @@ def test__incorrect_broadcast(self, input_spec): ), ], ) - def test__w_conv(self, x_input_shape, mocker): + def test__w_conv(self, mocker, request, x_input_shape): model = MulTensorConvModule() n, c, h, w = x_input_shape @@ -151,6 +153,7 @@ def test__w_conv(self, x_input_shape, mocker): model, [x_input_spec, y_input_spec], graph_verifier, + request, ) @pytest.mark.parametrize( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py index 31436a3f200..bdfd1e9da25 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py @@ -77,7 +77,13 @@ def forward(self, x): class TestPermuteCopy: # noinspection PyMethodMayBeStatic def assert_delegated( - self, model, input_shape, mocker, expected_delegated_ops=None, use_qat=False + self, + model, + input_shape, + mocker, + request, + expected_delegated_ops=None, + use_qat=False, ): graph_verifier = DetailedGraphVerifier( mocker, @@ -89,6 +95,7 @@ def assert_delegated( model, input_shape, graph_verifier, + request, use_qat=use_qat, ) @@ -115,18 +122,18 @@ def _special_4d_permutations() -> list[ParameterSet]: pytest.param((3, 2, 1, 0), id="reverse"), ] - def test__qat(self, mocker, use_qat): + def test__qat(self, mocker, request, use_qat): input_shape = (2, 3, 5, 7) permutation = (0, 2, 3, 1) # NCHW -> NHWC model = PermuteModule(permutation) - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat=use_qat) @pytest.mark.parametrize( "permutation", _all_permutations_for_rank(3), ids=lambda perm: f"permutation = {perm}", ) - def test__all_permutations__3d(self, mocker, permutation: tuple[int]): + def test__all_permutations__3d(self, mocker, request, permutation: tuple[int]): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5) model = PermuteModule(permutation) @@ -135,14 +142,14 @@ def test__all_permutations__3d(self, mocker, permutation: tuple[int]): # would result in an empty graph, which is not allowed. Therefore, it's not delegated. self.assert_not_delegated(model, input_shape) else: - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) @pytest.mark.parametrize( "permutation", _all_permutations_for_rank(4), ids=lambda perm: f"permutation = {perm}", ) - def test__all_permutations__4d(self, mocker, permutation: tuple[int]): + def test__all_permutations__4d(self, mocker, request, permutation: tuple[int]): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5, 7) model = PermuteModule(permutation) @@ -151,43 +158,55 @@ def test__all_permutations__4d(self, mocker, permutation: tuple[int]): # would result in an empty graph, which is not allowed. Therefore, it's not delegated. self.assert_not_delegated(model, input_shape) else: - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) @pytest.mark.parametrize("permutation", _special_4d_permutations()) def test__all_permutations__4d__channels_first_input( - self, mocker, permutation: tuple[int] + self, mocker, request, permutation: tuple[int] ): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5, 7) model = MaxPoolPermuteModule(permutation) expected_delegated_ops = {MaxPool2DWithIndices: 1, GetItem: 1, PermuteCopy: 1} self.assert_delegated( - model, input_shape, mocker, expected_delegated_ops=expected_delegated_ops + model, + input_shape, + mocker, + request, + expected_delegated_ops=expected_delegated_ops, ) @pytest.mark.parametrize("permutation", _special_4d_permutations()) def test__all_permutations__4d__channels_first_output( - self, mocker, permutation: tuple[int] + self, mocker, request, permutation: tuple[int] ): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5, 7) model = PermuteMaxPoolModule(permutation) expected_delegated_ops = {MaxPool2DWithIndices: 1, GetItem: 1, PermuteCopy: 1} self.assert_delegated( - model, input_shape, mocker, expected_delegated_ops=expected_delegated_ops + model, + input_shape, + mocker, + request, + expected_delegated_ops=expected_delegated_ops, ) @pytest.mark.parametrize("perm1", _special_4d_permutations()) @pytest.mark.parametrize("perm2", _special_4d_permutations()) def test__all_permutations__4d__channels_first_io( - self, mocker, perm1: tuple[int], perm2: tuple[int] + self, mocker, request, perm1: tuple[int], perm2: tuple[int] ): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5, 7) model = PermuteMaxPoolPermuteModule(perm1, perm2) expected_delegated_ops = {MaxPool2DWithIndices: 1, GetItem: 1, PermuteCopy: 2} self.assert_delegated( - model, input_shape, mocker, expected_delegated_ops=expected_delegated_ops + model, + input_shape, + mocker, + request, + expected_delegated_ops=expected_delegated_ops, ) @pytest.mark.parametrize( @@ -200,7 +219,7 @@ def test__all_permutations__4d__channels_first_io( pytest.param((4, 2, 3, 0, 1), id="perm = (4, 2, 3, 0, 1)"), ], ) - def test__5d(self, mocker, permutation): + def test__5d(self, mocker, request, permutation): # Avoid dimensions of size 1 and multiples of `num_macs` for a thorough test. input_shape = (2, 3, 5, 3, 5) model = PermuteModule(permutation) @@ -209,4 +228,4 @@ def test__5d(self, mocker, permutation): # would result in an empty graph, which is not allowed. Therefore, it's not delegated. self.assert_not_delegated(model, input_shape) else: - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py index ab42560f075..ca2abd18f32 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py @@ -62,7 +62,7 @@ def forward(self, x): return self.relu(x) -class TestReLUNewNeutronFlow: +class TestReLU: @pytest.mark.parametrize( ["model", "input_shape"], [ @@ -98,7 +98,7 @@ class TestReLUNewNeutronFlow: ), ], ) - def test_relu_conversion__full_pipeline(self, mocker, model, input_shape): + def test_relu_conversion__full_pipeline(self, mocker, request, model, input_shape): model = model() # Avoid model creation at import time is_conv_module = not hasattr(model, "linear") @@ -108,19 +108,20 @@ def test_relu_conversion__full_pipeline(self, mocker, model, input_shape): {Convolution: 1, Relu: 1} if is_conv_module else {AddMm: 1, Relu: 1} ), expected_non_delegated_ops={}, - ops_to_ignore=[ + ops_to_ignore={ PermuteCopy, ViewCopy, QuantizePerTensor, DequantizePerTensor, DequantizePerChannel, - ], + }, ) lower_run_compare( model, input_shape, graph_verifier, + request, ) @pytest.mark.parametrize( @@ -136,7 +137,9 @@ def test_relu_conversion__full_pipeline(self, mocker, model, input_shape): ), ], ) - def test_relu_conversion__non_delegated_with_old_flow(self, mocker, input_shape): + def test_relu_conversion__non_delegated_with_old_flow( + self, mocker, request, input_shape + ): verifier = DetailedGraphVerifier( mocker=mocker, expected_delegated_ops={Relu: 1}, @@ -146,8 +149,9 @@ def test_relu_conversion__non_delegated_with_old_flow(self, mocker, input_shape) lower_run_compare( ReLUModule(), input_shape, - dlg_model_verifier=verifier, - dataset_creator=RandomDatasetCreator(low=-1, high=1), + verifier, + request, + RandomDatasetCreator(low=-1, high=1), ) @pytest.mark.parametrize( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py index 75a32254a1d..bdd41d1eab0 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py @@ -30,7 +30,9 @@ def reseed_model_per_test_run(): class TestSigmoid: # noinspection PyMethodMayBeStatic - def assert_delegated(self, model, input_shape, mocker, use_qat=False, atol=None): + def assert_delegated( + self, model, input_shape, mocker, request, use_qat=False, atol=None + ): graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={Sigmoid: 1}, @@ -47,15 +49,16 @@ def assert_delegated(self, model, input_shape, mocker, use_qat=False, atol=None) model, input_shape, graph_verifier, + request, dataset_creator, output_comparator, use_qat=use_qat, ) - def test__basic_nsys_inference__qat(self, mocker, use_qat): + def test__basic_nsys_inference__qat(self, mocker, request, use_qat): input_shape = (23,) model = nn.Sigmoid() - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat=use_qat) @pytest.mark.parametrize( "input_shape", @@ -68,13 +71,13 @@ def test__basic_nsys_inference__qat(self, mocker, use_qat): ], ids=lambda shape: f"{len(shape)}D", ) - def test__input_shapes(self, mocker, input_shape): + def test__input_shapes(self, mocker, request, input_shape): model = nn.Sigmoid() output_scale = 1.0 / 256.0 lowering_spy = mocker.spy(NeutronPartitioner, "partition") self.assert_delegated( - model, input_shape, mocker, atol=output_scale + model, input_shape, mocker, request, atol=output_scale ) # Allow single bit error. # Verify that the `atol` is indeed equal to the output scale. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py index cb0ec09bcce..98cc924ee85 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_slice_tensor_converter.py @@ -41,7 +41,9 @@ def _slice_id(prefix, input_shape, dims, starts, ends): return f"{prefix}rank={len(input_shape)}_dims={str(dims)}_starts={str(starts)}_ends={str(ends)}" @staticmethod - def assert_delegated_and_correct(model, input_shape, num_slices, mocker, use_qat): + def assert_delegated_and_correct( + model, input_shape, num_slices, mocker, request, use_qat + ): graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={SliceCopy: num_slices}, @@ -54,6 +56,7 @@ def assert_delegated_and_correct(model, input_shape, num_slices, mocker, use_qat model, input_shape, graph_verifier, + request, dataset, comparator, use_qat=use_qat, @@ -182,12 +185,14 @@ def assert_not_delegated(model, input_shape): ), ], ) - def test_nsys_inference__basic(self, input_shape, dims, starts, ends, mocker): + def test_nsys_inference__basic( + self, input_shape, dims, starts, ends, mocker, request + ): model = SliceTensorModule(dims, starts, ends) num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=False + model, input_shape, num_slices, mocker, request, use_qat=False ) @pytest.mark.parametrize( @@ -209,7 +214,9 @@ def test_nsys_inference__basic(self, input_shape, dims, starts, ends, mocker): ), ], ) - def test_nsys_inference__reduction(self, input_shape, dims, starts, ends, mocker): + def test_nsys_inference__reduction( + self, input_shape, dims, starts, ends, mocker, request + ): model = SliceTensorModule(dims, starts, ends) slice_lengths = [e - s for s, e in zip(starts, ends)] @@ -219,7 +226,7 @@ def test_nsys_inference__reduction(self, input_shape, dims, starts, ends, mocker else: num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=False + model, input_shape, num_slices, mocker, request, use_qat=False ) @pytest.mark.parametrize( @@ -241,12 +248,14 @@ def test_nsys_inference__reduction(self, input_shape, dims, starts, ends, mocker ), ], ) - def test_nsys_inference__clipped(self, input_shape, dims, starts, ends, mocker): + def test_nsys_inference__clipped( + self, input_shape, dims, starts, ends, mocker, request + ): model = SliceTensorModule(dims, starts, ends) num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=False + model, input_shape, num_slices, mocker, request, use_qat=False ) @pytest.mark.parametrize( @@ -269,13 +278,13 @@ def test_nsys_inference__clipped(self, input_shape, dims, starts, ends, mocker): ], ) def test_nsys_inference__normalization( - self, input_shape, dims, starts, ends, mocker + self, input_shape, dims, starts, ends, mocker, request ): model = SliceTensorModule(dims, starts, ends) num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=False + model, input_shape, num_slices, mocker, request, use_qat=False ) @pytest.mark.parametrize( @@ -304,12 +313,14 @@ def test_nsys_inference__normalization( ), ], ) - def test_nsys_inference__big(self, input_shape, dims, starts, ends, mocker): + def test_nsys_inference__big( + self, input_shape, dims, starts, ends, mocker, request + ): model = SliceTensorModule(dims, starts, ends) num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=False + model, input_shape, num_slices, mocker, request, use_qat=False ) @pytest.mark.parametrize( @@ -336,7 +347,7 @@ def test_nsys_inference__identity(self, input_shape, dims, starts, ends): self.assert_model_without_slices(model, input_shape) - def test_nsys_inference__with_conv(self, mocker): + def test_nsys_inference__with_conv(self, mocker, request): input_shape = (11, 13, 5, 7) in_channels = input_shape[1] out_channels = 19 @@ -360,12 +371,13 @@ def test_nsys_inference__with_conv(self, mocker): model, input_shape, graph_verifier, + request, dataset, comparator, use_qat=False, ) - def test_nsys_inference__qat(self, mocker): + def test_nsys_inference__qat(self, mocker, request): input_shape = (7, 13, 7, 9) dims = (0, 1, 2, 3) starts = (1, 2, 3, 2) @@ -375,5 +387,5 @@ def test_nsys_inference__qat(self, mocker): num_slices = len(dims) self.assert_delegated_and_correct( - model, input_shape, num_slices, mocker, use_qat=True + model, input_shape, num_slices, mocker, request, use_qat=True ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py index 9638f8fe0ec..e71ff7e8af5 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py @@ -16,6 +16,9 @@ ) from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.model_output_comparator import ( + AllCloseOutputComparator, +) from executorch.backends.nxp.tests.models import SubTensorConvModule, SubTensorModule from executorch.backends.nxp.tests.nsys_testing import lower_run_compare from executorch.backends.nxp.tests.ops_aliases import ( @@ -38,76 +41,50 @@ class TestSubTensor: [ pytest.param((1,), id="1D."), pytest.param((6, 5), id="2D."), + pytest.param((6, 82), id="2D alt."), pytest.param((1, 4, 7), id="3D."), - pytest.param( - (6, 82), - id="2D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (1, 68, 7), - id="3D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (2, 4, 3, 15), - id="4D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (1, 4, 9, 11, 4), - id="5D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), + pytest.param((1, 68, 7), id="3D alt."), + pytest.param((2, 4, 3, 15), id="4D."), + pytest.param((1, 4, 9, 11, 4), id="5D."), ], ) - def test__basic_nsys_inference(self, x_input_shape, mocker): + def test__basic_nsys_inference(self, mocker, request, x_input_shape): x_input_spec = ModelInputSpec(x_input_shape) model = SubTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, [x_input_spec, x_input_spec], graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) - @pytest.mark.parametrize( - "x_input_shape", - [ - pytest.param((1,), id="1D."), - pytest.param((6, 5), id="2D."), - pytest.param((2, 4, 3, 15), id="4D."), - pytest.param( - (1, 4, 7), - id="3D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - pytest.param( - (1, 4, 9, 11, 4), - id="5D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), - ), - ], - ) - def test__basic_nsys_inference_qat(self, x_input_shape, mocker): - x_input_spec = ModelInputSpec(x_input_shape) + def test__basic_nsys_inference_qat(self, mocker, request): + x_input_spec = ModelInputSpec((2, 4, 3, 15)) model = SubTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, [x_input_spec, x_input_spec], graph_verifier, + request, dataset_creator, + comparator, use_qat=True, + remove_quant_io_ops=True, ) @pytest.mark.parametrize( @@ -117,32 +94,34 @@ def test__basic_nsys_inference_qat(self, x_input_shape, mocker): [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D." ), pytest.param( - [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D." + [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))], + id="2 inputs 2D alt.", ), pytest.param( - [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))], - id="2 inputs 3D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), + [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 1D + 2D." ), pytest.param( - [ModelInputSpec((69, 73)), ModelInputSpec((1, 73))], - id="2 inputs 2D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), + [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))], + id="2 inputs 3D.", ), ], ) - def test__broadcast(self, input_spec, mocker): + def test__broadcast(self, mocker, request, input_spec): model = SubTensorModule() graph_verifier = DetailedGraphVerifier( mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={} ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, input_spec, graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) @pytest.mark.parametrize( @@ -181,7 +160,7 @@ def test__broadcast_unsupported(self, input_spec): ), ], ) - def test__w_conv(self, x_input_shape, mocker): + def test__w_conv(self, mocker, request, x_input_shape): model = SubTensorConvModule() n, c, h, w = x_input_shape @@ -199,6 +178,7 @@ def test__w_conv(self, x_input_shape, mocker): model, [x_input_spec, y_input_spec], graph_verifier, + request, dataset_creator, ) @@ -211,12 +191,11 @@ def test__w_conv(self, x_input_shape, mocker): ), pytest.param( [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 8, 5, 1))], - id="2 inputs 4D + 4D incorrect.", - marks=pytest.mark.xfail(reason="AIR-14602: incorrect results"), + id="2 inputs 4D + 4D same height.", ), ], ) - def test__w_conv_broadcast(self, input_spec, mocker): + def test__w_conv_broadcast(self, mocker, request, input_spec): model = SubTensorConvModule() graph_verifier = DetailedGraphVerifier( mocker, @@ -224,12 +203,16 @@ def test__w_conv_broadcast(self, input_spec, mocker): expected_non_delegated_ops={}, ) dataset_creator = RandomDatasetCreator(low=-1.0, high=1.0) + comparator = AllCloseOutputComparator(atol=1) lower_run_compare( model, input_spec, graph_verifier, + request, dataset_creator, + comparator, + remove_quant_io_ops=True, ) @pytest.mark.parametrize( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py index 6336308e40b..51b7ee484a7 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py @@ -36,6 +36,7 @@ def assert_delegated( model, input_shape, mocker, + request, use_qat=False, expected_delegated_ops=None, ): @@ -55,6 +56,7 @@ def assert_delegated( model, input_shape, graph_verifier, + request, dataset_creator, use_qat=use_qat, ) @@ -63,10 +65,10 @@ def assert_delegated( def inplace(self, request): return request.param - def test__qat__inplace(self, mocker, use_qat, inplace): + def test__qat__inplace(self, mocker, request, use_qat, inplace): shape = (23,) model = TanhModule(inplace) - self.assert_delegated(model, shape, mocker, use_qat=use_qat) + self.assert_delegated(model, shape, mocker, request, use_qat=use_qat) @pytest.mark.parametrize( "shape", @@ -79,16 +81,20 @@ def test__qat__inplace(self, mocker, use_qat, inplace): ], ids=lambda shape: f"{len(shape)}D", ) - def test__shapes(self, mocker, shape): + def test__shapes(self, mocker, request, shape): model = TanhModule() - self.assert_delegated(model, shape, mocker) + self.assert_delegated(model, shape, mocker, request) - def test__with_convolution(self, mocker): + def test__with_convolution(self, mocker, request): input_shape = (1, 3, 12, 16) channels = input_shape[1] model = Conv2dWithActivation( activation=torch.tanh, in_channels=channels, out_channels=channels ) self.assert_delegated( - model, input_shape, mocker, expected_delegated_ops={Tanh: 1, Convolution: 1} + model, + input_shape, + mocker, + request, + expected_delegated_ops={Tanh: 1, Convolution: 1}, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_upsample_bilinear2d.py b/backends/nxp/tests/ir/converter/node_converter/test_upsample_bilinear2d.py index c4a698f4bfb..f9b2269751f 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_upsample_bilinear2d.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_upsample_bilinear2d.py @@ -59,6 +59,7 @@ def assert_delegated( model, input_shape, mocker, + request, use_qat=False, atol=None, expected_delegated_ops=None, @@ -82,6 +83,7 @@ def assert_delegated( model, input_shape, graph_verifier, + request, dataset_creator, output_comparator, use_qat=use_qat, @@ -96,21 +98,25 @@ def assert_not_delegated(self, model, input_shape): ) assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleBilinear2D]) - def test__qat__align_corners(self, mocker, use_qat): + def test__qat__align_corners(self, mocker, request, use_qat): align_corners = True input_shape = (1, 2, 3, 4) output_size = (5, 7) model = UpsampleBilinearModule(size=output_size, align_corners=align_corners) atol = 0.015 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat, atol=atol) + self.assert_delegated( + model, input_shape, mocker, request, use_qat=use_qat, atol=atol + ) - def test__qat__not_align_corners(self, mocker, use_qat): + def test__qat__not_align_corners(self, mocker, request, use_qat): align_corners = False input_shape = (1, 2, 3, 4) output_size = (6, 8) model = UpsampleBilinearModule(size=output_size, align_corners=align_corners) atol = 0.015 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat, atol=atol) + self.assert_delegated( + model, input_shape, mocker, request, use_qat=use_qat, atol=atol + ) @pytest.mark.parametrize( "input_shape, output_size", @@ -125,11 +131,13 @@ def test__qat__not_align_corners(self, mocker, use_qat): pytest.param((2, 2, 3, 4), (24, 8), id="batch=2, scale_h=8, scale_w=2"), ], ) - def test__not_align_corners__output_size(self, mocker, input_shape, output_size): + def test__not_align_corners__output_size( + self, mocker, request, input_shape, output_size + ): align_corners = False model = UpsampleBilinearModule(size=output_size, align_corners=align_corners) atol = 0.016 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) def test__not_align_corners__output_size__unsupported(self): align_corners = False @@ -151,11 +159,11 @@ def test__not_align_corners__output_size__unsupported(self): pytest.param((2, 2, 3, 4), (2, 8), id="batch=2, scale_h=2, scale_w=8"), ], ) - def test__not_align_corners__scales(self, mocker, input_shape, scale): + def test__not_align_corners__scales(self, mocker, request, input_shape, scale): align_corners = False model = UpsampleBilinearModule(scale=scale, align_corners=align_corners) atol = 0.016 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) def test__not_align_corners__scales__unsupported(self): align_corners = False @@ -183,11 +191,13 @@ def test__not_align_corners__scales__unsupported(self): ), ], ) - def test__align_corners__output_size(self, mocker, input_shape, output_size): + def test__align_corners__output_size( + self, mocker, request, input_shape, output_size + ): align_corners = True model = UpsampleBilinearModule(size=output_size, align_corners=align_corners) atol = 0.016 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) def test__align_corners__output_size__unsupported(self): align_corners = True @@ -240,11 +250,11 @@ def test__align_corners__output_size__input_size_equal_to_one(self): ), ], ) - def test__align_corners__scales(self, mocker, input_shape, scale): + def test__align_corners__scales(self, mocker, request, input_shape, scale): align_corners = True model = UpsampleBilinearModule(scale=scale, align_corners=align_corners) atol = 0.016 # ~= output scale -> single bit error. - self.assert_delegated(model, input_shape, mocker, atol=atol) + self.assert_delegated(model, input_shape, mocker, request, atol=atol) def test__align_corners__scales__unsupported(self): align_corners = True @@ -259,7 +269,7 @@ def test__noop__alone_in_partition__not_delegated(self): model = UpsampleBilinearModule(scale=scale) self.assert_not_delegated(model, input_shape) - def test__noop__not_alone_in_partition__delegated(self, mocker): + def test__noop__not_alone_in_partition__delegated(self, mocker, request): input_shape = (1, 2, 3, 4) scale = 1 model = UpsampleBilinearAddModule(scale=scale) @@ -267,5 +277,6 @@ def test__noop__not_alone_in_partition__delegated(self, mocker): model, input_shape, mocker, + request, expected_delegated_ops={UpsampleBilinear2D: 1, AddTensor: 1}, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_upsample_nearest2d.py b/backends/nxp/tests/ir/converter/node_converter/test_upsample_nearest2d.py index 438a580f6e8..b3e28a7b2f8 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_upsample_nearest2d.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_upsample_nearest2d.py @@ -53,6 +53,7 @@ def assert_delegated( model, input_shape, mocker, + request, use_qat=False, expected_delegated_ops=None, ): @@ -72,6 +73,7 @@ def assert_delegated( model, input_shape, graph_verifier, + request, dataset_creator, use_qat=use_qat, ) @@ -85,11 +87,11 @@ def assert_not_delegated(self, model, input_shape): ) assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleNearest2D]) - def test__qat(self, mocker, use_qat): + def test__qat(self, mocker, request, use_qat): input_shape = (1, 2, 3, 4) output_size = (6, 8) model = UpsampleNearestModule(size=output_size) - self.assert_delegated(model, input_shape, mocker, use_qat=use_qat) + self.assert_delegated(model, input_shape, mocker, request, use_qat=use_qat) @pytest.mark.parametrize( "input_shape, output_size", @@ -105,9 +107,9 @@ def test__qat(self, mocker, use_qat): pytest.param((2, 2, 3, 4), (24, 8), id="batch=2, scale_h=8, scale_w=2"), ], ) - def test__output_size(self, mocker, input_shape, output_size): + def test__output_size(self, mocker, request, input_shape, output_size): model = UpsampleNearestModule(size=output_size) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) def test__output_size__unsupported(self): input_shape = (1, 2, 3, 4) @@ -131,9 +133,9 @@ def test__output_size__unsupported(self): pytest.param((2, 2, 3, 4), (2, 8), id="batch=2, scale_h=2, scale_w=8"), ], ) - def test__scales(self, mocker, input_shape, scale): + def test__scales(self, mocker, request, input_shape, scale): model = UpsampleNearestModule(scale=scale) - self.assert_delegated(model, input_shape, mocker) + self.assert_delegated(model, input_shape, mocker, request) def test__scales__unsupported(self): input_shape = (1, 2, 3, 4) @@ -147,7 +149,7 @@ def test__noop__alone_in_partition__not_delegated(self): model = UpsampleNearestModule(scale=scale) self.assert_not_delegated(model, input_shape) - def test__noop__not_alone_in_partition__delegated(self, mocker): + def test__noop__not_alone_in_partition__delegated(self, mocker, request): input_shape = (1, 2, 3, 4) scale = 1 model = UpsampleNearestAddModule(scale=scale) @@ -155,5 +157,6 @@ def test__noop__not_alone_in_partition__delegated(self, mocker): model, input_shape, mocker, + request, expected_delegated_ops={UpsampleNearest2D: 1, AddTensor: 1}, ) diff --git a/backends/nxp/tests/model_output_comparator.py b/backends/nxp/tests/model_output_comparator.py index f0dd7cd2d60..5563703ae20 100644 --- a/backends/nxp/tests/model_output_comparator.py +++ b/backends/nxp/tests/model_output_comparator.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. import abc +import logging import os from abc import abstractmethod from pathlib import Path @@ -15,6 +16,7 @@ from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( torch_type_to_numpy_type, ) +from executorch.backends.nxp.tests.utils import archive_test_dir, store_txt_input_tensor class BaseOutputComparator(abc.ABC): @@ -35,6 +37,11 @@ def compare_results(self, cpu_results_dir, npu_results_dir, output_tensor_spec): :param npu_results_dir: Path to directory with NPU (delegated) results. :param output_tensor_spec: List of output tensor specifications. """ + if logging.root.isEnabledFor(logging.DEBUG): + diff_cpu_npu_results_dir = os.path.join( + os.path.dirname(cpu_results_dir), "diff_cpu_npu_results" + ) + sample_dirs = [ os.path.join(cpu_results_dir, file) for file in os.listdir(cpu_results_dir) ] @@ -65,7 +72,28 @@ def compare_results(self, cpu_results_dir, npu_results_dir, output_tensor_spec): ) npu_output_tensors.append((output_tensor_name, npu_tensor)) - self.compare_sample(sample_dir, cpu_output_tensors, npu_output_tensors) + if logging.root.isEnabledFor(logging.DEBUG): + # Store diff results if logging level is enabled + diff_cpu_npu_tensor = np.abs(cpu_tensor - npu_tensor) + os.makedirs( + os.path.join(diff_cpu_npu_results_dir, sample_dir), + exist_ok=True, + ) + diff_cpu_npu_tensor_path = os.path.join( + diff_cpu_npu_results_dir, sample_dir, output_tensor_name + ) + diff_cpu_npu_tensor.tofile(diff_cpu_npu_tensor_path) + + # Store text tensor results + store_txt_input_tensor(cpu_tensor_path, tensor_spec) + store_txt_input_tensor(npu_tensor_path, tensor_spec) + store_txt_input_tensor(diff_cpu_npu_tensor_path, tensor_spec) + + # We need to archive the test_dir before comparison, as comparison can cause AssertionError exception + test_dir = os.path.dirname(cpu_results_dir) + if logging.root.isEnabledFor(logging.DEBUG): + archive_test_dir(test_dir) + self.compare_sample(sample_dir, cpu_output_tensors, npu_output_tensors) @abstractmethod def compare_sample( diff --git a/backends/nxp/tests/nsys_testing.py b/backends/nxp/tests/nsys_testing.py index 7631ee20ca1..a69bb66e969 100644 --- a/backends/nxp/tests/nsys_testing.py +++ b/backends/nxp/tests/nsys_testing.py @@ -3,19 +3,22 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import datetime import functools -import inspect import logging import os.path +import re import shutil import subprocess from copy import deepcopy from enum import Enum +from importlib.metadata import version from os import environ, mkdir from typing import Callable, Iterable import numpy as np import torch +import yaml from executorch.backends.nxp.backend.edge_helper import is_channels_last_dim_order from executorch.backends.nxp.backend.ir.converter.conversion import translator from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( @@ -40,10 +43,11 @@ AllCloseOutputComparator, ) from executorch.backends.nxp.tests.outputs_dir_importer import outputs_dir -from executorch.backends.nxp.tests.utils import save_pte_program +from executorch.backends.nxp.tests.utils import save_pte_program, store_txt_input_tensor from executorch.devtools.visualization.visualization_utils import ( visualize_with_clusters, ) +from pytest import FixtureRequest from pytest_mock import MockerFixture from torch.export import ExportedProgram from torch.fx import GraphModule @@ -55,6 +59,7 @@ NSYS_CONFIG_PATH = test_config.NSYS_CONFIG_PATH NSYS_FIRMWARE_PATH = test_config.NSYS_FIRMWARE_PATH NEUTRON_TEST_PATH = test_config.NEUTRON_TEST_PATH +PROJECT_DIR = test_config.PROJECT_DIR class ReferenceModel(Enum): @@ -119,6 +124,7 @@ def wrapper(*args, **kwargs): delegated_program = to_quantized_executorch_program( model, input_spec, + test_dir=test_dir, dataset_dir=calibration_dataset_dir, delegate_to_npu=True, use_qat=use_qat, @@ -126,6 +132,7 @@ def wrapper(*args, **kwargs): operators_not_to_delegate=operators_not_to_delegate, remove_quant_io_ops=remove_quant_io_ops, ) + except RuntimeError as e: if "Model converted with neutron-converter has" in str(e) and hasattr( dlg_model_verifier, "check_num_delegated_nodes" @@ -391,6 +398,7 @@ def lower_run_compare( model: torch.nn.Module, input_spec: Iterable[ModelInputSpec] | tuple[int, ...], dlg_model_verifier: GraphVerifier, + request: FixtureRequest, dataset_creator=None, output_comparator=None, mocker: MockerFixture = None, @@ -408,11 +416,12 @@ def lower_run_compare( :param model: Executed PyTorch model. :param input_spec: Model input specification. Can be either tuple of ints - single float32 input model - or Iterable of ModelInputSpec. + :param dlg_model_verifier: Graph verifier instance. + :param request: PyTest request needed for correct test name extraction. :param dataset_creator: Creator that should fill provided `dataset_dir` with model input samples. :param output_comparator: Comparator of results produced by NPU and CPU runs of the program. - :param dlg_model_verifier: Graph verifier instance. - :param reference_model: Version of the model which will be run to obtain reference output data. :param mocker: Mocker instance used by visualizer. + :param reference_model: Version of the model which will be run to obtain reference output data. :param use_qat: If True, applies quantization-aware training before conversion (without the QAT training). :param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`. :param operators_not_to_delegate: list of operators not to delegate. @@ -430,7 +439,7 @@ def lower_run_compare( model_to_delegate = model model_to_not_delegate = deepcopy(model) - test_name = _get_caller_name() + test_name = get_test_name(request) test_dir = os.path.join(OUTPUTS_DIR, test_name) shutil.rmtree(test_dir, ignore_errors=True) @@ -538,6 +547,11 @@ def lower_run_compare( output_tensor_spec = _get_program_output_spec(delegated_program) + if logging.root.isEnabledFor(logging.DEBUG): + _generate_txt_test_data( + calibration_dataset_dir, testing_dataset_dir, list(input_spec) + ) + dump_debug_test_summary(test_name, test_dir) npu_results_dir = os.path.join(test_dir, "results_npu") cpu_results_dir = os.path.join(test_dir, "results_cpu") output_comparator.compare_results( @@ -549,10 +563,12 @@ def lower_run_compare_ptq_qat( model: torch.nn.Module, input_spec: list[ModelInputSpec] | tuple, dlg_model_verifier: GraphVerifier, + request: FixtureRequest, train_fn: Callable[[torch.fx.GraphModule], None], dataset_creator=None, output_comparator=None, mocker: MockerFixture = None, + operators_not_to_delegate: list[str] = None, ): """ Run provided program twice and compare it's results. @@ -562,10 +578,12 @@ def lower_run_compare_ptq_qat( :param input_spec: Model input specification. Can be either tuple - single float32 input model - or list of ModelInputSpec. :param dlg_model_verifier: Graph verifier instance. + :param request: PyTest request needed for correct test name extraction. :param train_fn: Train/finetune function for QAT training. :param dataset_creator: Creator that should fill provided `dataset_dir` with model input samples. :param output_comparator: Comparator of results produced by NPU and CPU runs of the program. :param mocker: Mocker instance used by visualizer. + :param operators_not_to_delegate: list of operators not to delegate. """ assert_NSYS() @@ -577,7 +595,7 @@ def lower_run_compare_ptq_qat( model_ptq = model model_qat = deepcopy(model) - test_name = _get_caller_name() + test_name = get_test_name(request) test_dir = os.path.join(OUTPUTS_DIR, test_name) shutil.rmtree(test_dir, ignore_errors=True) @@ -606,6 +624,7 @@ def lower_run_compare_ptq_qat( ptq_results_dir, mocker, use_qat=False, + operators_not_to_delegate=operators_not_to_delegate, ) _ = _run_delegated_executorch_program( @@ -620,10 +639,14 @@ def lower_run_compare_ptq_qat( mocker, use_qat=True, train_fn=train_fn, + operators_not_to_delegate=operators_not_to_delegate, ) output_tensor_spec = _get_program_output_spec(delegated_program_ptq) + if logging.root.isEnabledFor(logging.DEBUG): + dump_debug_test_summary(test_name, test_dir) + shutil.make_archive(test_dir, "zip", test_dir) ptq_results_dir = os.path.join(test_dir, "results_ptq") qat_results_dir = os.path.join(test_dir, "results_qat") output_comparator.compare_results( @@ -657,13 +680,13 @@ def _parse_input_quant_params( return q_params -def _get_caller_name(): - test_function_names = ["lower_run_compare", "lower_run_compare_ptq_qat"] - for idx, frame in enumerate(inspect.stack()): - if frame.function in test_function_names: - # Look one index above to get caller - return inspect.stack()[idx + 1].function - return None +def get_test_name(request): + # PyTest request is available, extract correct name including test class and params + test_name = request.node.nodeid.lstrip(":") + # Escape unacceptable characters from test name to make sure it is a valid filesystem directory name + test_name = re.sub(r'[<>:"/\\|?* ,()`]', "_", test_name) + test_name = test_name.strip(" .") + return test_name def execute_cmd(cmd, cwd="."): @@ -725,3 +748,60 @@ def _get_program_output_spec(exported_program) -> list[torch.Tensor]: output_tensors_spec = list(exported_program.graph.output_node().meta["val"]) return output_tensors_spec + + +def get_executorch_git_info() -> dict[str, str]: + git_branch_cmd = f"git -C {PROJECT_DIR} branch --show-current" + git_branch, _, _ = execute_cmd(git_branch_cmd) + git_commit_cmd = f"git -C {PROJECT_DIR} rev-parse --short HEAD" + git_commit, _, _ = execute_cmd(git_commit_cmd) + return {"git_branch": git_branch, "git_commit": git_commit} + + +def dump_debug_test_summary(test_name: str, test_dir: str): + git_info = get_executorch_git_info() + + summary = { + "test_name": test_name, + "date_time": datetime.datetime.now().isoformat(), + "git_branch": git_info["git_branch"], + "git_commit": git_info["git_commit"], + "eiq_neutron_sdk_version": version("eiq_neutron_sdk"), + "eiq_nsys_version": version("eiq_nsys"), + } + with open(os.path.join(test_dir, "summary.yaml"), "w") as f: + yaml.dump(summary, f) + + +def _generate_txt_test_data( + calibration_dataset_dir: str, + testing_dataset_dir: str, + input_tensor_spec: list[ModelInputSpec], +): + # Generates txt tensor variants for input datasets + # Testing dataset can point to calibration dataset + dataset_paths = ( + [calibration_dataset_dir, testing_dataset_dir] + if calibration_dataset_dir != testing_dataset_dir + else [testing_dataset_dir] + ) + for d_path in dataset_paths: + quant_dataset = d_path.endswith("dataset_quant") + + # For multiple input tests, list each sample dir, for single input tests the input files are in d_path + sample_dirs = [os.path.join(d_path, file) for file in os.listdir(d_path)] + sample_dirs = [file for file in sample_dirs if os.path.isdir(file)] + # Single input dataset has tensor directly in dataset path + if len(sample_dirs) == 0: + for input_tensor_name in sorted(os.listdir(d_path)): + input_tensor_path = os.path.join(d_path, input_tensor_name) + tensor_spec = input_tensor_spec[0] + store_txt_input_tensor(input_tensor_path, tensor_spec, quant_dataset) + else: + for sample_dir in sample_dirs: + for idx, input_tensor_name in enumerate(os.listdir(sample_dir)): + input_tensor_path = os.path.join(sample_dir, input_tensor_name) + tensor_spec = input_tensor_spec[idx] + store_txt_input_tensor( + input_tensor_path, tensor_spec, quant_dataset + ) diff --git a/backends/nxp/tests/utils.py b/backends/nxp/tests/utils.py index c210d9db8bc..00b7c364a31 100644 --- a/backends/nxp/tests/utils.py +++ b/backends/nxp/tests/utils.py @@ -7,11 +7,19 @@ import logging import os +import shutil +import numpy as np + +from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( + torch_type_to_numpy_type, +) +from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec from executorch.devtools.visualization.visualization_utils import ( visualize_with_clusters, ) from executorch.exir import ExecutorchProgramManager +from torch._subclasses import FakeTensor def save_pte_program( @@ -32,3 +40,27 @@ def save_pte_program( visualize_with_clusters(prog.exported_program(), visualize_file_name, False) return filename + + +def change_filepath_extension(path: str, extension: str) -> str: + base, _ = os.path.splitext(path) + return base + "." + extension + + +def store_txt_input_tensor( + input_tensor_path: str, + tensor_spec: ModelInputSpec | FakeTensor, + quant_dataset: bool = False, +): + dtype = np.int8 if quant_dataset else torch_type_to_numpy_type(tensor_spec.dtype) + input_tensor = np.fromfile(input_tensor_path, dtype=dtype) + int__max = np.iinfo(np.int32).max + + with open(change_filepath_extension(input_tensor_path, "txt"), "w") as f: + f.write("Flattened tensor shape:" + str(input_tensor.shape)) + f.write("\nOriginal tensor shape:" + str(list(tensor_spec.shape)) + "\n") + f.write(np.array2string(input_tensor, threshold=int__max)) + + +def archive_test_dir(test_dir: str): + shutil.make_archive(test_dir, "zip", test_dir)